Source code for microprobe.utils.asm

# Copyright 2011-2021 IBM Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""":mod:`microprobe.utils.asm` module

This module implements the required features to interpret assembly statements
and translate them into Microprobe internal representation of instruction,
operands, labels and addreses.

The main elements of this module are the following:

- :class:`~.MicroprobeAsmInstructionDefinition` objects to represent
  assembly statements (i.e. instruction definitions)
- :func:`~.interpret_asm` function validates the assembly statements and
  translates them into internal Microprobe represenation of instructions and
  operands.
"""

# Futures
from __future__ import absolute_import, division, print_function

# Built-in modules
import atexit
import copy
import collections
import itertools
import multiprocessing as mp
import os
import re
import string


# Third party modules
import cachetools

# Own modules
import microprobe.code.ins
from microprobe import MICROPROBE_RC
from microprobe.code.address import Address, InstructionAddress
from microprobe.exceptions import MicroprobeAsmError, \
    MicroprobeCodeGenerationError, MicroprobeDuplicatedValueError, \
    MicroprobeValueError, MicroprobeCacheError
from microprobe.target.isa.operand import InstructionAddressRelativeOperand, \
    OperandConst, OperandImmRange, OperandValueSet
from microprobe.utils.bin import interpret_bin
from microprobe.utils.cache import read_default_cache_data, \
    write_default_cache_data_silent
from microprobe.utils.logger import get_logger
from microprobe.utils.misc import Progress, RejectingDict, twocs_to_int, \
    range_to_sequence


# Constants
LOG = get_logger(__name__)

_ASM_CACHE_ENABLED = True
_ASM_CACHE_FILE = __file__ + ".asm"
_ASM_CACHE = None
_ASM_CACHE_SIZE = 16*1024
_ASM_CACHE_USED = False
_ASM_CACHE_SAVED = False

_DECORATOR_CACHE = RejectingDict()
_DECORATOR_CACHE_ENABLED = True

__all__ = [
    "interpret_asm", "MicroprobeAsmInstructionDefinition",
    "instruction_to_asm_definition"
]


# Functions

[docs]
def interpret_asm(code, target, labels, log=True, show_progress=False,
                  parallel=True, queue=None):
    """
    Return the list of :class:`~.MicroprobeInstructionDefinition` objects
    that results from interpreting the *code* (list of assembly statements).
    The *target* object is used to validate the existence of the instruction
    and operands in the target and the *labels* are needed to validate the
    correctness of the symbolic labels used in the assembly statements.

    :param code: Assembly to interpret
    :type code: :class:`list` of :class:`~.MicroprobeAsmInstructionDefinition`
                or string/s to interpret
    :param target: Target definition
    :type target: :class:`~.Target` object
    :param labels: Labels available
    :type labels: :class:`~.list` of :class:`~.str`
    :return: A list of instructions, operands, labels, etc. resulting from
            interpreting the assembly
    :rtype: :class:`~.list` of :class:`~.MicroprobeInstructionDefinition`
    :raise microprobe.exceptions.MicroprobeAsmError: if something is wrong
        during the interpretation
    """

    global _ASM_CACHE
    global _ASM_CACHE_SIZE

    if _ASM_CACHE is None:
        try:
            _ASM_CACHE = cachetools.LRUCache(_ASM_CACHE_SIZE, getsizeof=None)
            if _ASM_CACHE_ENABLED:
                _ASM_CACHE = read_default_cache_data(_ASM_CACHE_FILE)
        except MicroprobeCacheError:
            _ASM_CACHE = cachetools.LRUCache(_ASM_CACHE_SIZE, getsizeof=None)

    LOG.debug("Start interpret_asm")
    instructions_and_params = collections.deque()

    LOG.debug("Extract defined labels")

    def_labels = collections.deque()
    def_labels_dict = {}
    if labels is not None:
        for label in labels:
            def_labels.append(label)
            def_labels_dict[label] = None

    if isinstance(code, str):
        code = [code]

    if len(code) > MICROPROBE_RC["parallel_threshold"] and parallel:
        # Do parallel parsing
        processes = []
        queues = []

        chunksize = max(len(code) // MICROPROBE_RC['cpus'], 1)

        extra_args = {}
        extra_args['log'] = log
        extra_args['show_progress'] = show_progress
        extra_args['parallel'] = False

        for chunk in [code[i:i + chunksize]
                      for i in range(0, len(code), chunksize)]:
            queue = mp.Queue()
            extra_args['queue'] = queue
            proc = mp.Process(target=interpret_asm,
                              args=(chunk, target, def_labels),
                              kwargs=extra_args)
            processes.append(proc)
            queues.append(queue)
            proc.start()
            extra_args['show_progress'] = False

        instructions_and_params = []
        for queue in queues:
            instructions_and_params += queue.get()
            queue.close()
            queue.join_thread()

        for process in processes:
            process.join()
            process.terminate()

        return instructions_and_params

    try:

        if show_progress:
            progress = Progress(len(code), msg="Labels parsed:")

        for instr_def in code:

            if isinstance(instr_def, str):
                instr_def = _str_to_asmdef(instr_def)

            # if instr_def.label.upper() in def_labels:
            #    raise MicroprobeAsmError(
            #        "Label '%s' defined twice!" % instr_def.label
            #    )

            if instr_def.label is not None:
                if instr_def.label.upper() in def_labels_dict:
                    raise MicroprobeAsmError(
                        "Label '%s' defined twice!" % instr_def.label
                    )
                def_labels.append(instr_def.label.upper())
                def_labels_dict[instr_def.label.upper()] = None

            if show_progress:
                progress()

        del def_labels_dict

        if show_progress:
            progress = Progress(len(code), msg="Instructions parsed:")

        for instr_def in code:

            if show_progress:
                progress()

            if isinstance(instr_def, str):

                if instr_def.strip() == "":
                    # empty, string, continue
                    continue
                instr_def = _str_to_asmdef(instr_def)

            safe = None
            if instr_def.assembly.split(" ")[0].upper() == "RAW:":
                instr_def.assembly = instr_def.assembly.split(" ")[1]
                safe = True

            intr_asm = _interpret_instr_def(
                instr_def, target, def_labels, safe=safe
            )
            instructions_and_params.append(intr_asm)

            LOG.debug("Instruction: '%s' interpreted", instr_def.assembly)

    except MicroprobeAsmError as exc:

        if log:

            LOG.critical("Assembly provided:")
            LOG.critical(
                "%20s\t%20s\t%25s\t%s", "-" * 20, "-" * 20, "-" * 25, "-" * 20
            )
            LOG.critical(
                "%20s\t%20s\t%25s\t%s", "label", "address", "instruction",
                "decorators"
            )
            LOG.critical(
                "%20s\t%20s\t%25s\t%s", "-" * 20, "-" * 20, "-" * 25, "-" * 20
            )
            for instr in code:
                if isinstance(instr, str):
                    instr = _str_to_asmdef(instr)

                address = "--"
                if instr.address is not None:
                    address = "0x%016x" % instr.address

                LOG.critical(
                    "%20s\t%20s\t%25s\t%s", instr.label, address,
                    instr.assembly, instr.decorators
                )
            LOG.critical(
                "%20s\t%20s\t%25s\t%s", "-" * 20, "-" * 20, "-" * 25, "-" * 20
            )

            LOG.critical(
                "If the previous assembly is not correct, "
                "check the format of the assembly file provided"
            )
        raise exc

    LOG.debug("End interpret_asm")

    instructions_and_params = list(instructions_and_params)

    if queue is not None:
        queue.put(instructions_and_params)

    return instructions_and_params




[docs]
def instruction_to_asm_definition(instr):

    label = instr.label
    instr.set_label(None)
    return MicroprobeAsmInstructionDefinition(
        instr.assembly(), label, instr.address.displacement,
        None, instr.comments
    )



def _interpret_instr_def(instr_def, target, labels, safe=None):
    """

    :param instr_def:
    :type instr_def:
    :param target:
    :type target:
    :param labels:
    :type labels:
    """
    global _ASM_CACHE_USED
    global _ASM_CACHE_SAVED

    LOG.debug("Start interpret_asm: '%s'", instr_def)
    key = (target.name, target.isa.path, instr_def.assembly)
    if key in _ASM_CACHE and _ASM_CACHE_ENABLED:
        instruction_type, operands = _ASM_CACHE[key]
        operands = operands[:]
    elif instr_def.assembly.upper().startswith("0X"):
        binary_def = interpret_bin(
            instr_def.assembly[2:], target, fmt="hex", single=True, safe=safe
        )
        if len(binary_def) > 1:
            raise MicroprobeAsmError("More than one instruction parsed.")

        instruction_type = binary_def[0].instruction_type
        operands = binary_def[0].operands
        if _ASM_CACHE_ENABLED:
            _ASM_CACHE[key] = (instruction_type, operands)
            _ASM_CACHE_USED = True
            if _ASM_CACHE_USED and not _ASM_CACHE_SAVED:
                atexit.register(
                    write_default_cache_data_silent,
                    _ASM_CACHE_FILE, _ASM_CACHE,
                    data_reload=True
                )
                _ASM_CACHE_SAVED = True

    elif instr_def.assembly.upper().startswith("0B"):
        binary_def = interpret_bin(instr_def.assembly[2:], target, fmt="bin")
        if len(binary_def) > 1:
            raise MicroprobeAsmError("More than one instruction parsed.")

        instruction_type = binary_def[0].instruction_type
        operands = binary_def[0].operands
        if _ASM_CACHE_ENABLED:
            _ASM_CACHE[key] = (instruction_type, operands)
            _ASM_CACHE_USED = True
            if _ASM_CACHE_USED and not _ASM_CACHE_SAVED:
                atexit.register(
                    write_default_cache_data_silent,
                    _ASM_CACHE_FILE, _ASM_CACHE
                )
                _ASM_CACHE_SAVED = True

    else:

        asm_mnemonic = _extract_asm_mnemonic(instr_def.assembly)
        LOG.debug("Mnemonic: '%s'", asm_mnemonic)

        asm_operands = _extract_asm_operands(instr_def.assembly)
        LOG.debug("Operands: '%s'", asm_operands)

        # This is a work-around for RISC-V objdump implementation.
        # It does not dump negative numbers and the corresponding
        # absolute value is printed. Also a weird +1 needs to be added

        for idx, asm_operand in enumerate(asm_operands):
            if asm_operand.startswith("0XF") and len(asm_operand) == 18:
                nval = hex(
                    int(twocs_to_int(int(asm_operand, 16), 64)) + 1).upper()

                instr_def = list(instr_def)
                instr_def[0] = instr_def[0].upper().replace(
                    asm_operand, nval)
                instr_def = MicroprobeAsmInstructionDefinition(*instr_def)
                asm_operands[idx] = nval

        # End work-around

        asm_mnemonic, asm_operands = target.normalize_asm(asm_mnemonic,
                                                          asm_operands)

        LOG.debug("Norm. mnemonic: '%s'", asm_mnemonic)
        LOG.debug("Norm. Operands: '%s'", asm_operands)

        instruction_types, asm_operands = _find_instr_with_mnemonic(
            asm_mnemonic, asm_operands, target
        )

        LOG.debug(
            "Types detected: '%s'", [
                type_ins.name for type_ins in instruction_types
            ]
        )

        instruction_type, operands = _extract_operands(
            instr_def.assembly, asm_operands, instruction_types, target, labels
        )

        if _ASM_CACHE_ENABLED:
            _ASM_CACHE[key] = (instruction_type, operands)
            _ASM_CACHE_USED = True
            if _ASM_CACHE_USED and not _ASM_CACHE_SAVED:
                atexit.register(
                    write_default_cache_data_silent,
                    _ASM_CACHE_FILE, _ASM_CACHE
                )
                _ASM_CACHE_SAVED = True

    address = _extract_address(instr_def.address)

    if instr_def.decorators in _DECORATOR_CACHE and _DECORATOR_CACHE_ENABLED:
        decorators = copy.deepcopy(_DECORATOR_CACHE[instr_def.decorators])
    else:
        decorators = _interpret_decorators(instr_def.decorators)
        if _DECORATOR_CACHE_ENABLED:
            _DECORATOR_CACHE[instr_def.decorators] = decorators

    label = None
    if instr_def.label is not None:
        label = instr_def.label.upper()

    LOG.debug("End interpret_asm: '%s'", instr_def)
    return microprobe.code.ins.MicroprobeInstructionDefinition(
        instruction_type, operands, label, address, instr_def.assembly,
        decorators, instr_def.comments
    )


def _extract_address(address):
    """

    :param address:
    :type address:
    """
    if address is not None:
        return InstructionAddress(base_address="code", displacement=address)
    return address


def _extract_asm_mnemonic(asm):
    """

    :param asm:
    :type asm:
    """
    return asm.split()[0].strip().upper()


def _extract_asm_operands(asm):
    """

    :param asm:
    :type asm:
    """
    operands = " ".join(asm.strip().upper().split()[1:])
    operands = operands.replace(",", " ")
    return re.findall(r"[.a-zA-Z0-9_+-@]+", operands)


def _interpret_decorators(str_decorators):

    decorators = RejectingDict()

    if str_decorators is None:
        return decorators

    for decorator_def in str_decorators.split(" "):
        if decorator_def == '':
            continue

        key = decorator_def.split("=")[0].upper()
        lvalue = (decorator_def + "=").split("=")[1].split(",")

        nvalue = []

        for idx, value in enumerate(lvalue):

            if value == '':
                continue

            if os.path.isfile(value):
                LOG.warning(
                    "Decorator with references to files "
                    "not yet implemented"
                )

            origvalue = value
            value = value.upper()

            if value == '':
                value = None
            elif value == 'ON':
                value = True
            elif value == 'OFF':
                value = False
            elif value == 'TRUE':
                value = True
            elif value == 'FALSE':
                value = False
            elif value.isdigit():
                value = int(value)
            elif value.startswith("0X"):
                if value.count("-") > 1:
                    value = range_to_sequence(*value.split("-"))
                else:
                    try:
                        value = int(value, 16)
                    except ValueError:
                        value = origvalue
            else:
                value = origvalue

            if isinstance(value, list):
                nvalue.extend(value)
            else:
                nvalue.append(value)

        try:
            if len(lvalue) == 1:
                decorators[key] = nvalue[0]
            else:
                decorators[key] = nvalue

        except MicroprobeDuplicatedValueError:
            raise MicroprobeAsmError(
                "Decorator with key '%s' specified twice for the same "
                "instruction." % key
            )

    return decorators


def _find_instr_with_mnemonic(mnemonic, asm_operands, target):
    """

    :param mnemonic:
    :type mnemonic:
    :param asm_operands:
    :type asm_operands:
    :param target:
    :type target:
    """

    # First, look for default instructions

    base_instructions = [
        instr
        for instr in target.instructions.values()
        if (
            instr.mnemonic == mnemonic or instr.name == mnemonic
        )
    ]

    LOG.debug(
        "Instruction found with same mnemonic: %s",
        [instr.name for instr in base_instructions]
    )

    instructions = [
        instr
        for instr in base_instructions
        if len([op for op in target.new_instruction(instr.name).operands()
                #   if (op.is_input or op.is_output)
                ]) == len(asm_operands)
    ]

    LOG.debug(
        "Instruction found with same operands: %s", [
            instr.name for instr in instructions
        ]
    )

    fixed_asm_operands = asm_operands

    # TODO: Hack for Z amd RISC-V, needs to be
    # TODO: removed from here, it should be in the target backend

    if len(instructions) == 0 and len(base_instructions) > 0:
        # There are instructions but for some reason the number of operands
        # is not correct

        instructions = []
        for instruction in base_instructions:
            # check if it is our fault or user's fault
            fnames = [field.name for field in instruction.format.fields]
            insfmt = instruction.format.assembly_format

            # Z hacks
            if "DH1" in fnames and "DL1" in fnames and "D1" in insfmt:
                asm_operands.append("0x0")
                return _find_instr_with_mnemonic(
                    mnemonic, asm_operands, target
                )

            if "DH2" in fnames and "DL2" in fnames and "D2" in insfmt:
                asm_operands.append("0x0")
                return _find_instr_with_mnemonic(
                    mnemonic, asm_operands, target
                )

            # RISC-V hacks
            # One fix per array member.
            # The first parameter of each tuple is an array containing all the
            # fields which together represent the full codification of the
            # field in the second parameter.
            # Therefore, each element of the array in the first value of the
            # tuple is present in the instruction format definition, while
            # the second value is in the assembly format string.
            riscv_fixes = [
                (["s_imm5", "s_imm7"], "s_imm12"),
                (["sb_imm5", "sb_imm7"], "sb_imm12"),
                (["cd_imm3", "c_imm2"], "c_imm5"),
                (["cw_imm3", "c_imm2"], "c_imm5"),
                (['cb_imm5', 'c_imm3'], "c_imm8"),
                (['cw_imm5', 'c_imm1'], "c_imm6"),
                (['cd_imm5', 'c_imm1'], "c_imm6"),
                (['ci_imm5', 'c_imm1'], "c_imm6"),
                (['cu_imm5', 'c_imm1'], "c_imm6"),
                (['cs_imm5', 'c_imm1'], "c_imm6"),
                (['cls_imm5', 'c_imm1'], "c_imm6"),
            ]

            for fix in riscv_fixes:
                if (all(field in fnames for field in fix[0])
                        and fix[1] in insfmt):
                    asm_operands.append("0x0")
                    LOG.debug("Fixing in ASM")
                    return _find_instr_with_mnemonic(
                            mnemonic, asm_operands, target
                    )

    # TODO: Remove from here, it should be in the target backend
    # TODO: Remove from here, it should be in the target backend

    # If no instructions found, check for extended mnemonics
    if len(instructions) == 0 and len(base_instructions) == 0:
        # TODO: Implement
        pass

    if len(instructions) == 0:

        if len(
            [
                instr
                for instr in target.instructions.values(
                ) if instr.mnemonic == mnemonic
            ]
        ) > 0:
            raise MicroprobeAsmError(
                "Unable to interpret asm mnemonic '%s'. Number of operands "
                "is not correct" % mnemonic
            )

        raise MicroprobeAsmError(
            "Unable to interpret_asm mnemonic '%s'. Either the mnemonic is "
            "invalid, the instruction is not supported or this is a not "
            "supported extended mnemonic" % mnemonic
        )

    return instructions, fixed_asm_operands


def _extract_operands(base_asm, asm_operands, intr_types, target, labels):
    """

    :param base_asm:
    :type base_asm:
    :param asm_operands:
    :type asm_operands:
    :param intr_types:
    :type intr_types:
    :param target:
    :type target:
    :param labels:
    :type labels:
    """

    LOG.debug("Start extracting operands")

    operand_dict = {}

    for instr_type in intr_types:

        operands = []
        instruction = target.new_instruction(instr_type.name)
        LOG.debug("Instruction: '%s'", instruction.name)
        LOG.debug("Operands: '%s'", instruction.operands())

        if len(asm_operands) != len(instruction.operands()):
            raise MicroprobeAsmError(
                "Mismatch in number of operands: '%s' !="
                " '%s'. Base asm: '%s'", asm_operands, instruction.operands(),
                base_asm
            )

        sorted_asm_operands = _sort_asm_operands_by_intr_type(
            asm_operands, instruction
        )

        LOG.debug("Sorted asm operands: '%s'", sorted_asm_operands)

        operand_candidates = _generate_operand_candidates(
            sorted_asm_operands, target, instruction, labels
        )

        LOG.debug("Operand candidates: '%s'", operand_candidates)

        operand_candidates = _filter_operands_by_type(
            operand_candidates, instruction
        )

        LOG.debug("Filtered operand candidates: '%s'", operand_candidates)

        for operand_candidate in _find_operand_candidates(
            operand_candidates, instruction
        ):
            LOG.debug("Checking ASM for %s", operand_candidate)
            if _check_assembly_string(
                base_asm, instr_type, target, operand_candidate
            ):
                LOG.debug("ASM OK!")
                operands.append(operand_candidate)

        if len(operands) > 0:
            operand_dict[instr_type] = operands

    if len(list(operand_dict.keys())) == 0:
        raise MicroprobeAsmError(
            "Unable to find operands for assembly: '%s'. If the instruction "
            "contains labels or symbols, make sure they are declared."
            " Otherwise, check the rest of operands" % base_asm.strip()
        )
    elif len(list(operand_dict.keys())) > 1:

        LOG.warning(
            "Operands can be valid for multiple instruction definitions. "
            "Check your architecture definition files and specify a particular"
            " instruction variant. Base assembly: '%s'", base_asm.strip()
        )

        for key in operand_dict:
            LOG.warning("Possible instructions: %s", key)

    instr_type = list(operand_dict.keys())[0]
    operands = operand_dict[instr_type]

    if len(operands) > 1:
        raise MicroprobeAsmError(
            "Multiple operand possibilities for "
            "instruction '%s'. Possibilities: '%s'" % (
                base_asm, operands
            )
        )

    LOG.debug("End extracting operands")

    return instr_type, list(operands[0])


def _sort_asm_operands_by_intr_type(asm_operands, instruction):
    """

    :param asm_operands:
    :type asm_operands:
    :param instruction:
    :type instruction:
    """
    LOG.debug("Start: asm_operands: %s", asm_operands)
    new_operands = []

    asm_fmt = instruction.architecture_type.format.assembly_format + " "
    asm_fmt = asm_fmt.replace("(", " ")
    asm_fmt = asm_fmt.replace(")", " ")
    asm_fmt = asm_fmt.replace(",", " ")

    # TODO: Hack for Z and RISC-V needs to be
    # TODO: removed from here, it should be in the target backend

    fnames = [
        field.name for field in instruction.architecture_type.format.fields
    ]

    # Z hacks
    if "DH1" in fnames and "DL1" in fnames and " D1 " in asm_fmt:
        asm_fmt = asm_fmt.replace(" D1 ", " DL1 ")
        asm_fmt += " DH1 "

    if "DH2" in fnames and "DL2" in fnames and " D2 " in asm_fmt:
        asm_fmt = asm_fmt.replace(" D2 ", " DL2 ")
        asm_fmt += " DH2 "

    # RISC-V hacks
    # The first element of the tuple is the field containing the actual value.
    # The second element is an array containing dummy fields (i.e. fields with
    # a zero value).
    # The last element is the field which contains the value in the assembly
    # format string.
    riscv_fixes = [
        ("s_imm7", ["s_imm5"], "s_imm12"),
        ("sb_imm7", ["sb_imm5"], "sb_imm12"),
        ("cd_imm3", ["c_imm2"], "c_imm5"),
        ("cw_imm3", ["c_imm2"], "c_imm5"),
        ('cb_imm5', ['c_imm3'], "c_imm8"),
        ('cw_imm5', ['c_imm1'], "c_imm6"),
        ('cd_imm5', ['c_imm1'], "c_imm6"),
        ('ci_imm5', ['c_imm1'], "c_imm6"),
        ('cu_imm5', ['c_imm1'], "c_imm6"),
        ('cs_imm5', ['c_imm1'], "c_imm6"),
        ('cls_imm5', ['c_imm1'], "c_imm6"),
    ]

    fix_found = False

    for current_fix in riscv_fixes:
        if (current_fix[0] in fnames
                and all(item in fnames for item in current_fix[1])
                and current_fix[2] in asm_fmt):
            fix = current_fix
            fix_found = True
            break

    # TODO: removed from here, it should be in the target backend
    # TODO: removed from here, it should be in the target backend

    fields = []
    for field in instruction.architecture_type.format.fields:
        if " %s " % field.name in asm_fmt:
            fields.append([field.name, asm_fmt.find(" %s " % field.name)])
        elif fix_found:
            if field.name == fix[0]:
                fields.append([field.name, asm_fmt.find(" %s " % fix[2])])
            elif field.name in fix[1]:
                fields.append([field.name, len(asm_fmt)])

    sorted_fields = sorted(fields, key=lambda x: x[1])

    for asm_operand, sorted_field in zip(asm_operands, sorted_fields):
        sorted_field.append(asm_operand)

    for field in fields:

        field_name = field[0]
        new_operands.append(
            [
                elem[2] for elem in sorted_fields if elem[0] == field_name
            ][0]
        )

    if len(asm_operands) != len(new_operands):
        new_operands = []
        for asm_operand in asm_operands:
            # if asm_operand not in new_operands:
            new_operands.append(asm_operand)

    LOG.debug("Asm operands: %s", asm_operands)
    LOG.debug("New operands: %s", new_operands)
    if len(asm_operands) != len(new_operands):
        raise MicroprobeAsmError("Unable to interpret assembly operands")

    return new_operands


def _filter_operands_by_type_pos(operands, instruction):
    """

    :param operands:
    :type operands:
    :param instruction:
    :type instruction:
    """
    LOG.debug("Start")
    new_operands = []

    for operand, operand_values in zip(instruction.operands(), operands):
        valid_values = []
        for operand_val in operand_values:
            try:
                operand.type.check(operand_val)
                valid_values.append(operand_val)
            except MicroprobeValueError:
                continue

        new_operands.append(valid_values)

    LOG.debug("New operands: %s", new_operands)
    LOG.debug("End")

    return new_operands


def _check_assembly_string(base_asm, instr_type, target, operands):
    """

    :param base_asm:
    :type base_asm:
    :param instr_type:
    :type instr_type:
    :param target:
    :type target:
    :param operands:
    :type operands:
    """

    LOG.debug("Start checking assembly string: %s", base_asm)

    operands = list(operands)

    relocation_mode = False
    for idx, operand in enumerate(operands):
        if isinstance(operand, str) and "@" not in operand:
            operands[idx] = Address(base_address=operand)
        if isinstance(operand, str) and "@" in operand:
            relocation_mode = True

    instruction = target.new_instruction(instr_type.name)

    try:
        if not relocation_mode:
            instruction.set_operands(operands)
        else:
            # Go one by one, and make relocation safe
            for operand, value in zip(instruction.operands(), operands):
                if (isinstance(operand.type, OperandImmRange) and
                        "@" in value):
                    operand.set_value(value, check=False)
                else:
                    operand.set_value(value)
    except MicroprobeValueError:
        LOG.debug("End checking assembly string: Operands not valid")
        return False
    except MicroprobeCodeGenerationError:
        LOG.debug(
            "End checking assembly string: Operands not valid for "
            "callback"
        )
        return False

    nasm = _normalize_asm(instruction.assembly())
    base_asm = _normalize_asm(base_asm)
    base_asm = base_asm.replace(instr_type.name, instr_type.mnemonic)

    LOG.debug("'%s' == '%s' ?", nasm, base_asm)
    if nasm == base_asm:
        LOG.debug("End checking assembly string: Valid")
        return True

    LOG.debug("End checking assembly string: Not valid")
    return False


def _find_operand_candidates(candidates, instruction):
    """

    :param candidates:
    :type candidates:
    :param instruction:
    :type instruction:
    """

    LOG.debug("Start")
    # flat_candidates = [candidate
    #                    for candidate_pos in candidates
    #                    for candidate in candidate_pos]

    validated_combinations = []

    if len(candidates) != len(instruction.operands()):
        LOG.debug("No operand candidates, different number of operands")
        yield

    # for candidate_combination in itertools.permutations(flat_candidates,
    #                                                     len(candidates)):

    for candidate_combination in itertools.product(*candidates):

        LOG.debug("Combination: %s", candidate_combination)

        if str(candidate_combination) in validated_combinations:
            LOG.debug("Already validated")
            continue

        validated_combinations.append(str(candidate_combination))

        if _validate_operands(candidate_combination, instruction):
            LOG.debug("Combination valid: %s", candidate_combination)
            yield candidate_combination

        LOG.debug("Combination not valid: %s", candidate_combination)


def _validate_operands(operand_values, instruction):
    """

    :param operand_values:
    :type operand_values:
    :param instruction:
    :type instruction:
    """

    LOG.debug("Start validate operands")

    for operand_value, operand in zip(operand_values, instruction.operands()):

        LOG.debug("Validating: '%s' <-> '%s'", operand_value, operand)

        if isinstance(operand_value, str):

            LOG.debug("Value is a string")

            if (isinstance(operand.type, OperandImmRange) and
                    "@" in operand_value):
                LOG.debug("Look like a relocation")
                return True

            if not isinstance(operand.type, InstructionAddressRelativeOperand):
                LOG.debug("Invalid: A string in a non-relative operand")
                return False

        else:

            LOG.debug("Value is not a string")
            if (
                isinstance(operand.type, InstructionAddressRelativeOperand) and
                not isinstance(operand_value, int)
            ):
                LOG.debug("Invalid: A not int in a relative operand")
                return False

            try:
                if (
                    isinstance(
                        operand.type, (
                            OperandImmRange, OperandConst, OperandValueSet
                        )
                    ) and isinstance(
                        operand_value, int
                    )
                ):
                    LOG.debug("Checking int value: %s", operand_value)
                    operand.type.check(operand_value)

                elif (
                    not isinstance(
                        operand.type, (
                            OperandImmRange, OperandConst, OperandValueSet
                        )
                    ) and not isinstance(operand_value, int)
                ):

                    LOG.debug("Checking not int value: %s", operand_value)
                    operand.type.check(operand_value)

                elif (
                    isinstance(
                        operand.type, (InstructionAddressRelativeOperand)
                    ) and isinstance(operand_value, int)
                ):

                    LOG.debug("Checking relative int value: %s", operand_value)
                    operand.type.check(operand_value)

                else:
                    LOG.debug("Combination of value and types not supported")
                    return False

            except MicroprobeValueError as exc:
                LOG.debug(exc)
                return False

    return True


def _generate_operand_candidates(operands, target, instruction, labels):
    """

    :param operands:
    :type operands:
    :param target:
    :type target:
    :param instruction:
    :type instruction:
    :param labels:
    :type labels:
    """

    LOG.debug("Start: operands=%s", operands)
    candidates = []

    for idx, operand in enumerate(operands):
        options = []

        operdef = instruction.operands()[idx].descriptor.type

        LOG.debug("Processing operand: %s", operand)

        operand_value = _numeric_format(operand)
        if operand_value is not None:

            LOG.debug("Operand looks like a numeric value '%d'", operand_value)

            options.append(operand_value)
            options += _generate_immediate_variations(
                operand_value, instruction
            )
            if operand.isdigit():
                options += _generate_possible_registers(operand, target)

        else:

            LOG.debug(
                "Operand looks like a label, "
                "register value, or a rounding mode"
            )

            islabel = False
            for label in labels:
                if operand.upper().startswith(label.upper()):
                    islabel = True
                    break

            if islabel:
                options.append(operand)
            else:
                options += _generate_possible_registers(operand, target)
                options += _generate_possible_other_reps(operand, operdef)

        if len(options) == 0:
            raise MicroprobeAsmError(
                "Unable to generate operand candidates "
                "for operand '%s'" % operand
            )

        if len(options) > 0:
            LOG.debug("New candidate: %s", options)
            candidates.append(options)

    LOG.debug("End")
    return candidates


def _generate_immediate_variations(immediate, instr):
    """

    :param immediate:
    :type immediate:
    :param instr:
    :type instr:
    """

    variations = []
    for operand in [oper for oper in instr.operands() if oper.type.immediate]:
        new_val = _numeric_format(operand.type.representation(immediate))

        if new_val != immediate:
            # Relative represenation
            diff = new_val - immediate
            variation = immediate - diff

            if (
                _numeric_format(
                    operand.type.representation(variation)
                ) == immediate and variation not in variations
            ):
                variations.append(variation)

        if operand.type.shift > 0:
            new_val = immediate << operand.type.shift
            if new_val not in variations:
                variations.append(new_val)

    return variations


def _generate_possible_other_reps(operand, oper_def):
    """

    :param operand:
    :type operand:
    :param target:
    :type target:
    """

    LOG.debug("Looking possible values for operand: '%s'", operand)

    if not isinstance(oper_def, OperandValueSet):
        return []

    values = [
        val for val in oper_def.values()
        if oper_def.representation(val).upper() == operand
    ]
    return values


def _generate_possible_registers(operand, target):
    """

    :param operand:
    :type operand:
    :param target:
    :type target:
    """

    LOG.debug("Looking possible registers for operand: '%s'", operand)

    reg_repr = [reg for reg in target.registers.values()
                if reg.representation.upper() == operand.upper()]

    if len(reg_repr) > 0:
        registers = reg_repr
    elif len(re.findall("[0-9]+", operand)) != 1:
        registers = []
    elif operand[0].isdigit() and not operand.isdigit():
        registers = []
    elif operand[0] == '-' and operand[1:].isdigit():
        registers = []
    else:

        norm_operand = re.sub("[^0-9]", "", operand)

        registers = [
            register
            for register in target.registers.values()
            if register.representation == norm_operand
        ]

    LOG.debug("Registers for operand: '%s' are: '%s'", operand, registers)

    return registers


def _filter_operands_by_type(candidates, instruction):
    """

    :param candidates:
    :type candidates:
    :param instruction:
    :type instruction:
    """

    operand_types = _get_operand_types(instruction)

    new_candidates = []
    for candidate in candidates:

        operand_candidates = []

        for operand_option in candidate:

            LOG.debug("Operand option: %s", operand_option)

            if (
                isinstance(operand_option, str) and ('label' in [
                    oper for oper in operand_types
                    if isinstance(oper, str)
                ] or "@" in operand_option)
            ):
                operand_candidates.append(operand_option)
            elif (
                isinstance(operand_option, str) and
                    'label' not in [
                    oper for oper in operand_types
                    if isinstance(oper, str)
                ]
            ):
                raise MicroprobeAsmError(
                    "Unable to find operand candidates when interpreting "
                    "instruction '%s'. Check the assembly provided " %
                    instruction.name
                )
            elif isinstance(operand_option, int):
                # and
                # 'value' in [oper for oper in operand_types
                #            if isinstance(oper, str)]):
                operand_candidates.append(operand_option)
            elif operand_option.type in \
                    [list(oper.values())[0].type
                     for oper in operand_types if
                     not isinstance(oper,
                                    tuple([str, int])) and
                     not isinstance(list(oper.values())[0],
                                    int)]:
                operand_candidates.append(operand_option)
            else:

                LOG.debug(operand_option.type.name)
                LOG.debug(
                    [
                        list(oper.values())[0].type.name
                        for oper in operand_types
                        if not isinstance(oper,
                                          tuple([str, int]))
                        and not isinstance(
                            list(oper.values())[0], int
                        )
                    ]
                )

                LOG.debug([oper for oper in operand_types])

                LOG.debug("Removing: %s", operand_option)

        if len(operand_candidates) == 0:

            raise MicroprobeAsmError(
                "Unable to find operand candidates when interpreting "
                "instruction '%s'. Check the assembly provided " %
                instruction.name
            )

        assert len(operand_candidates) <= len(candidate)

        new_candidates.append(operand_candidates)

    return new_candidates


def _get_operand_types(instruction):
    """

    :param instruction:
    :type instruction:
    """

    LOG.debug("Start")

    types = []
    label = False
    value = False

    for operand in instruction.operands():

        LOG.debug("Operand: %s", operand)
        LOG.debug("Operand type: %s", operand.type)

        if operand.type not in types:
            if isinstance(operand.type, InstructionAddressRelativeOperand):
                label = True
                value = True
            elif isinstance(operand.type, (OperandConst, OperandImmRange)):
                value = True
            else:
                types.append(operand.type)

        LOG.debug("Current types: %s", types)

    if label:
        types.append('label')

    if value:
        types.append('value')

    LOG.debug("Final types: %s", types)
    LOG.debug("End")

    return types


def _normalize_asm(asm):
    """

    :param asm:
    :type asm:
    """

    LOG.debug("Start asm normalizing: '%s'", asm)
    nasm = asm.upper()
    nasm = nasm.strip()

    nasm = re.sub(r'\( +', r'(', nasm)
    nasm = re.sub(r' +\)', r')', nasm)
    nasm = re.sub(r' +\(', r'(', nasm)
    nasm = re.sub(r'\) +', r')', nasm)
    nasm = re.sub(r',', r', ', nasm)
    nasm = re.sub(r' +', r' ', nasm)

    nasm = re.sub(r" [^0-9-]+([0-9]+),", r" \1,", nasm)
    nasm = re.sub(r" [^0-9-]+([0-9]+)$", r" \1", nasm)
    nasm = re.sub(r"\([^0-9-]+([0-9]+),", r"(\1,", nasm)
    nasm = re.sub(r"\([^0-9-]+([0-9]+)\)", r"(\1)", nasm)
    nasm = re.sub(r" [^0-9-]+([0-9]+)\)", r" \1)", nasm)

    for hexnum in re.findall("0X[0-9ABCDEF]+", nasm):
        nasm = nasm.replace(hexnum, str(int(hexnum, 16)), 1)

    for octnum in re.findall("0O[0-7]+", nasm):
        nasm = nasm.replace(octnum, str(int(octnum, 8)), 1)

    for binnum in re.findall("0B[01]+", nasm):
        nasm = nasm.replace(binnum, str(int(binnum, 2)), 1)

    # Replace the leading zeros
    nasm = re.sub(r" [0]+([0-9]+),", r" \1,", nasm)
    nasm = re.sub(r" [0]+([0-9]+)$", r" \1", nasm)
    nasm = re.sub(r" [0]+([0-9]+)\(", r" \1(", nasm)
    nasm = re.sub(r" [0]+([0-9]+)\)", r" \1)", nasm)
    nasm = re.sub(r" -[0]+([0-9]+),", r" -\1,", nasm)
    nasm = re.sub(r" -[0]+([0-9]+)$", r" -\1", nasm)
    nasm = re.sub(r" -[0]+([0-9]+)\(", r" -\1(", nasm)
    nasm = re.sub(r" -[0]+([0-9]+)\)", r" -\1)", nasm)

    LOG.debug("End asm normalizing: '%s'", nasm)

    return nasm


def _numeric_format(operand):
    """

    :param operand:
    :type operand:
    """

    operand = operand.upper()

    negative = False
    if operand.startswith('-'):
        operand = operand[1:]
        negative = True

    if (
        operand.startswith('0X') and all(
            c in set(string.hexdigits) for c in operand[2:]
        )
    ):
        value = int(operand, 16)
    elif (
        operand.startswith('0O') and all(
            c in set(string.octdigits) for c in operand[2:]
        )
    ):
        value = int(operand, 8)
    elif (
        operand.startswith('0B') and all(c in set("01") for c in operand[2:])
    ):
        value = int(operand, 2)
    elif operand.isdigit():
        value = int(operand, 10)
    else:
        return None

    if negative:
        value = value * (-1)

    return value


def _str_to_asmdef(asm_string):
    """

    :param asm_string:
    :type asm_string:
    """

    # TODO: Add address SUPPORT

    comment = (asm_string + ";").split(";")[1].strip()
    asm_string = asm_string.split(";")[0].strip()

    # This is a work-around for RISC-V objdump implementation.
    # It generates comments with '#' character
    if "#" in asm_string:
        asm_string = asm_string.split("#")[0].strip()

    label = None
    address = None

    asmfull = asm_string
    if asm_string.find(":") > 0:
        # Has label/Address
        asmfull = asm_string.split(":")[1]
        address_label = asm_string.split(":")[0].strip()

        for elem in address_label.split(" "):

            if (
                (
                    elem.startswith("0x") or
                    re.search(r"^[0-9a-fA-F]+$", elem) is not None
                ) and address is None
            ):
                address = int(elem, 16)
            elif (
                elem[0] == '<' and elem[-1] == '>' and label is None and
                len(elem) > 2
            ):
                label = elem[1:-1]
            else:
                raise MicroprobeAsmError(
                    "Unable to interpret '%s'" % asm_string
                )

    asm = asmfull.split("@")[0].strip()

    decorator = (asmfull + '@').split("@")[1].strip()
    if decorator == '':
        decorator = None

    if label == '':
        label = None

    if address == '':
        address = None

    return MicroprobeAsmInstructionDefinition(
        asm, label, address, decorator, comment
    )


# Classes

[docs]
class MicroprobeAsmInstructionDefinition(object):


[docs]
    def __init__(self, assembly, label, address, decorators, comments):
        self.assembly = assembly
        self.label = label
        self.address = address
        self.decorators = decorators
        self.comments = comments


    def __iter__(self):
        yield self.assembly
        yield self.label
        yield self.address
        yield self.decorators
        yield self.comments

    def __str__(self):
        return str(self.__dict__)

    def __repr__(self):
        return str(self.__dict__)

    def __eq__(self, other):
        return self.__dict__ == other.__dict__