diff options
author | 2023-10-10 11:40:56 +0000 | |
---|---|---|
committer | 2023-10-10 11:40:56 +0000 | |
commit | e02cda008591317b1625707ff8e115a4841aa889 (patch) | |
tree | aee302e3cf8b59ec2d32ec481be3d1afddfc8968 /scripts/decodetree.py | |
parent | cc668e6b7e0ffd8c9d130513d12053cf5eda1d3b (diff) |
Introduce Virtio-loopback epsilon release:
Epsilon release introduces a new compatibility layer which make virtio-loopback
design to work with QEMU and rust-vmm vhost-user backend without require any
changes.
Signed-off-by: Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com>
Change-Id: I52e57563e08a7d0bdc002f8e928ee61ba0c53dd9
Diffstat (limited to 'scripts/decodetree.py')
-rw-r--r-- | scripts/decodetree.py | 1424 |
1 files changed, 1424 insertions, 0 deletions
diff --git a/scripts/decodetree.py b/scripts/decodetree.py new file mode 100644 index 000000000..a03dc6b5e --- /dev/null +++ b/scripts/decodetree.py @@ -0,0 +1,1424 @@ +#!/usr/bin/env python3 +# Copyright (c) 2018 Linaro Limited +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see <http://www.gnu.org/licenses/>. +# + +# +# Generate a decoding tree from a specification file. +# See the syntax and semantics in docs/devel/decodetree.rst. +# + +import io +import os +import re +import sys +import getopt + +insnwidth = 32 +bitop_width = 32 +insnmask = 0xffffffff +variablewidth = False +fields = {} +arguments = {} +formats = {} +allpatterns = [] +anyextern = False + +translate_prefix = 'trans' +translate_scope = 'static ' +input_file = '' +output_file = None +output_fd = None +insntype = 'uint32_t' +decode_function = 'decode' + +# An identifier for C. +re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*' + +# Identifiers for Arguments, Fields, Formats and Patterns. +re_arg_ident = '&[a-zA-Z0-9_]*' +re_fld_ident = '%[a-zA-Z0-9_]*' +re_fmt_ident = '@[a-zA-Z0-9_]*' +re_pat_ident = '[a-zA-Z0-9_]*' + +def error_with_file(file, lineno, *args): + """Print an error message from file:line and args and exit.""" + global output_file + global output_fd + + prefix = '' + if file: + prefix += f'{file}:' + if lineno: + prefix += f'{lineno}:' + if prefix: + prefix += ' ' + print(prefix, end='error: ', file=sys.stderr) + print(*args, file=sys.stderr) + + if output_file and output_fd: + output_fd.close() + os.remove(output_file) + exit(1) +# end error_with_file + + +def error(lineno, *args): + error_with_file(input_file, lineno, *args) +# end error + + +def output(*args): + global output_fd + for a in args: + output_fd.write(a) + + +def output_autogen(): + output('/* This file is autogenerated by scripts/decodetree.py. */\n\n') + + +def str_indent(c): + """Return a string with C spaces""" + return ' ' * c + + +def str_fields(fields): + """Return a string uniquely identifying FIELDS""" + r = '' + for n in sorted(fields.keys()): + r += '_' + n + return r[1:] + + +def whex(val): + """Return a hex string for val padded for insnwidth""" + global insnwidth + return f'0x{val:0{insnwidth // 4}x}' + + +def whexC(val): + """Return a hex string for val padded for insnwidth, + and with the proper suffix for a C constant.""" + suffix = '' + if val >= 0x100000000: + suffix = 'ull' + elif val >= 0x80000000: + suffix = 'u' + return whex(val) + suffix + + +def str_match_bits(bits, mask): + """Return a string pretty-printing BITS/MASK""" + global insnwidth + + i = 1 << (insnwidth - 1) + space = 0x01010100 + r = '' + while i != 0: + if i & mask: + if i & bits: + r += '1' + else: + r += '0' + else: + r += '.' + if i & space: + r += ' ' + i >>= 1 + return r + + +def is_pow2(x): + """Return true iff X is equal to a power of 2.""" + return (x & (x - 1)) == 0 + + +def ctz(x): + """Return the number of times 2 factors into X.""" + assert x != 0 + r = 0 + while ((x >> r) & 1) == 0: + r += 1 + return r + + +def is_contiguous(bits): + if bits == 0: + return -1 + shift = ctz(bits) + if is_pow2((bits >> shift) + 1): + return shift + else: + return -1 + + +def eq_fields_for_args(flds_a, arg): + if len(flds_a) != len(arg.fields): + return False + # Only allow inference on default types + for t in arg.types: + if t != 'int': + return False + for k, a in flds_a.items(): + if k not in arg.fields: + return False + return True + + +def eq_fields_for_fmts(flds_a, flds_b): + if len(flds_a) != len(flds_b): + return False + for k, a in flds_a.items(): + if k not in flds_b: + return False + b = flds_b[k] + if a.__class__ != b.__class__ or a != b: + return False + return True + + +class Field: + """Class representing a simple instruction field""" + def __init__(self, sign, pos, len): + self.sign = sign + self.pos = pos + self.len = len + self.mask = ((1 << len) - 1) << pos + + def __str__(self): + if self.sign: + s = 's' + else: + s = '' + return str(self.pos) + ':' + s + str(self.len) + + def str_extract(self): + global bitop_width + s = 's' if self.sign else '' + return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})' + + def __eq__(self, other): + return self.sign == other.sign and self.mask == other.mask + + def __ne__(self, other): + return not self.__eq__(other) +# end Field + + +class MultiField: + """Class representing a compound instruction field""" + def __init__(self, subs, mask): + self.subs = subs + self.sign = subs[0].sign + self.mask = mask + + def __str__(self): + return str(self.subs) + + def str_extract(self): + global bitop_width + ret = '0' + pos = 0 + for f in reversed(self.subs): + ext = f.str_extract() + if pos == 0: + ret = ext + else: + ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})' + pos += f.len + return ret + + def __ne__(self, other): + if len(self.subs) != len(other.subs): + return True + for a, b in zip(self.subs, other.subs): + if a.__class__ != b.__class__ or a != b: + return True + return False + + def __eq__(self, other): + return not self.__ne__(other) +# end MultiField + + +class ConstField: + """Class representing an argument field with constant value""" + def __init__(self, value): + self.value = value + self.mask = 0 + self.sign = value < 0 + + def __str__(self): + return str(self.value) + + def str_extract(self): + return str(self.value) + + def __cmp__(self, other): + return self.value - other.value +# end ConstField + + +class FunctionField: + """Class representing a field passed through a function""" + def __init__(self, func, base): + self.mask = base.mask + self.sign = base.sign + self.base = base + self.func = func + + def __str__(self): + return self.func + '(' + str(self.base) + ')' + + def str_extract(self): + return self.func + '(ctx, ' + self.base.str_extract() + ')' + + def __eq__(self, other): + return self.func == other.func and self.base == other.base + + def __ne__(self, other): + return not self.__eq__(other) +# end FunctionField + + +class ParameterField: + """Class representing a pseudo-field read from a function""" + def __init__(self, func): + self.mask = 0 + self.sign = 0 + self.func = func + + def __str__(self): + return self.func + + def str_extract(self): + return self.func + '(ctx)' + + def __eq__(self, other): + return self.func == other.func + + def __ne__(self, other): + return not self.__eq__(other) +# end ParameterField + + +class Arguments: + """Class representing the extracted fields of a format""" + def __init__(self, nm, flds, types, extern): + self.name = nm + self.extern = extern + self.fields = flds + self.types = types + + def __str__(self): + return self.name + ' ' + str(self.fields) + + def struct_name(self): + return 'arg_' + self.name + + def output_def(self): + if not self.extern: + output('typedef struct {\n') + for (n, t) in zip(self.fields, self.types): + output(f' {t} {n};\n') + output('} ', self.struct_name(), ';\n\n') +# end Arguments + + +class General: + """Common code between instruction formats and instruction patterns""" + def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): + self.name = name + self.file = input_file + self.lineno = lineno + self.base = base + self.fixedbits = fixb + self.fixedmask = fixm + self.undefmask = udfm + self.fieldmask = fldm + self.fields = flds + self.width = w + + def __str__(self): + return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask) + + def str1(self, i): + return str_indent(i) + self.__str__() +# end General + + +class Format(General): + """Class representing an instruction format""" + + def extract_name(self): + global decode_function + return decode_function + '_extract_' + self.name + + def output_extract(self): + output('static void ', self.extract_name(), '(DisasContext *ctx, ', + self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n') + for n, f in self.fields.items(): + output(' a->', n, ' = ', f.str_extract(), ';\n') + output('}\n\n') +# end Format + + +class Pattern(General): + """Class representing an instruction pattern""" + + def output_decl(self): + global translate_scope + global translate_prefix + output('typedef ', self.base.base.struct_name(), + ' arg_', self.name, ';\n') + output(translate_scope, 'bool ', translate_prefix, '_', self.name, + '(DisasContext *ctx, arg_', self.name, ' *a);\n') + + def output_code(self, i, extracted, outerbits, outermask): + global translate_prefix + ind = str_indent(i) + arg = self.base.base.name + output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n') + if not extracted: + output(ind, self.base.extract_name(), + '(ctx, &u.f_', arg, ', insn);\n') + for n, f in self.fields.items(): + output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n') + output(ind, 'if (', translate_prefix, '_', self.name, + '(ctx, &u.f_', arg, ')) return true;\n') + + # Normal patterns do not have children. + def build_tree(self): + return + def prop_masks(self): + return + def prop_format(self): + return + def prop_width(self): + return + +# end Pattern + + +class MultiPattern(General): + """Class representing a set of instruction patterns""" + + def __init__(self, lineno): + self.file = input_file + self.lineno = lineno + self.pats = [] + self.base = None + self.fixedbits = 0 + self.fixedmask = 0 + self.undefmask = 0 + self.width = None + + def __str__(self): + r = 'group' + if self.fixedbits is not None: + r += ' ' + str_match_bits(self.fixedbits, self.fixedmask) + return r + + def output_decl(self): + for p in self.pats: + p.output_decl() + + def prop_masks(self): + global insnmask + + fixedmask = insnmask + undefmask = insnmask + + # Collect fixedmask/undefmask for all of the children. + for p in self.pats: + p.prop_masks() + fixedmask &= p.fixedmask + undefmask &= p.undefmask + + # Widen fixedmask until all fixedbits match + repeat = True + fixedbits = 0 + while repeat and fixedmask != 0: + fixedbits = None + for p in self.pats: + thisbits = p.fixedbits & fixedmask + if fixedbits is None: + fixedbits = thisbits + elif fixedbits != thisbits: + fixedmask &= ~(fixedbits ^ thisbits) + break + else: + repeat = False + + self.fixedbits = fixedbits + self.fixedmask = fixedmask + self.undefmask = undefmask + + def build_tree(self): + for p in self.pats: + p.build_tree() + + def prop_format(self): + for p in self.pats: + p.build_tree() + + def prop_width(self): + width = None + for p in self.pats: + p.prop_width() + if width is None: + width = p.width + elif width != p.width: + error_with_file(self.file, self.lineno, + 'width mismatch in patterns within braces') + self.width = width + +# end MultiPattern + + +class IncMultiPattern(MultiPattern): + """Class representing an overlapping set of instruction patterns""" + + def output_code(self, i, extracted, outerbits, outermask): + global translate_prefix + ind = str_indent(i) + for p in self.pats: + if outermask != p.fixedmask: + innermask = p.fixedmask & ~outermask + innerbits = p.fixedbits & ~outermask + output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n') + output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n') + p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask) + output(ind, '}\n') + else: + p.output_code(i, extracted, p.fixedbits, p.fixedmask) +#end IncMultiPattern + + +class Tree: + """Class representing a node in a decode tree""" + + def __init__(self, fm, tm): + self.fixedmask = fm + self.thismask = tm + self.subs = [] + self.base = None + + def str1(self, i): + ind = str_indent(i) + r = ind + whex(self.fixedmask) + if self.format: + r += ' ' + self.format.name + r += ' [\n' + for (b, s) in self.subs: + r += ind + f' {whex(b)}:\n' + r += s.str1(i + 4) + '\n' + r += ind + ']' + return r + + def __str__(self): + return self.str1(0) + + def output_code(self, i, extracted, outerbits, outermask): + ind = str_indent(i) + + # If we identified all nodes below have the same format, + # extract the fields now. + if not extracted and self.base: + output(ind, self.base.extract_name(), + '(ctx, &u.f_', self.base.base.name, ', insn);\n') + extracted = True + + # Attempt to aid the compiler in producing compact switch statements. + # If the bits in the mask are contiguous, extract them. + sh = is_contiguous(self.thismask) + if sh > 0: + # Propagate SH down into the local functions. + def str_switch(b, sh=sh): + return f'(insn >> {sh}) & {b >> sh:#x}' + + def str_case(b, sh=sh): + return hex(b >> sh) + else: + def str_switch(b): + return f'insn & {whexC(b)}' + + def str_case(b): + return whexC(b) + + output(ind, 'switch (', str_switch(self.thismask), ') {\n') + for b, s in sorted(self.subs): + assert (self.thismask & ~s.fixedmask) == 0 + innermask = outermask | self.thismask + innerbits = outerbits | b + output(ind, 'case ', str_case(b), ':\n') + output(ind, ' /* ', + str_match_bits(innerbits, innermask), ' */\n') + s.output_code(i + 4, extracted, innerbits, innermask) + output(ind, ' break;\n') + output(ind, '}\n') +# end Tree + + +class ExcMultiPattern(MultiPattern): + """Class representing a non-overlapping set of instruction patterns""" + + def output_code(self, i, extracted, outerbits, outermask): + # Defer everything to our decomposed Tree node + self.tree.output_code(i, extracted, outerbits, outermask) + + @staticmethod + def __build_tree(pats, outerbits, outermask): + # Find the intersection of all remaining fixedmask. + innermask = ~outermask & insnmask + for i in pats: + innermask &= i.fixedmask + + if innermask == 0: + # Edge condition: One pattern covers the entire insnmask + if len(pats) == 1: + t = Tree(outermask, innermask) + t.subs.append((0, pats[0])) + return t + + text = 'overlapping patterns:' + for p in pats: + text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p) + error_with_file(pats[0].file, pats[0].lineno, text) + + fullmask = outermask | innermask + + # Sort each element of pats into the bin selected by the mask. + bins = {} + for i in pats: + fb = i.fixedbits & innermask + if fb in bins: + bins[fb].append(i) + else: + bins[fb] = [i] + + # We must recurse if any bin has more than one element or if + # the single element in the bin has not been fully matched. + t = Tree(fullmask, innermask) + + for b, l in bins.items(): + s = l[0] + if len(l) > 1 or s.fixedmask & ~fullmask != 0: + s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask) + t.subs.append((b, s)) + + return t + + def build_tree(self): + super().prop_format() + self.tree = self.__build_tree(self.pats, self.fixedbits, + self.fixedmask) + + @staticmethod + def __prop_format(tree): + """Propagate Format objects into the decode tree""" + + # Depth first search. + for (b, s) in tree.subs: + if isinstance(s, Tree): + ExcMultiPattern.__prop_format(s) + + # If all entries in SUBS have the same format, then + # propagate that into the tree. + f = None + for (b, s) in tree.subs: + if f is None: + f = s.base + if f is None: + return + if f is not s.base: + return + tree.base = f + + def prop_format(self): + super().prop_format() + self.__prop_format(self.tree) + +# end ExcMultiPattern + + +def parse_field(lineno, name, toks): + """Parse one instruction field from TOKS at LINENO""" + global fields + global insnwidth + + # A "simple" field will have only one entry; + # a "multifield" will have several. + subs = [] + width = 0 + func = None + for t in toks: + if re.match('^!function=', t): + if func: + error(lineno, 'duplicate function') + func = t.split('=') + func = func[1] + continue + + if re.fullmatch('[0-9]+:s[0-9]+', t): + # Signed field extract + subtoks = t.split(':s') + sign = True + elif re.fullmatch('[0-9]+:[0-9]+', t): + # Unsigned field extract + subtoks = t.split(':') + sign = False + else: + error(lineno, f'invalid field token "{t}"') + po = int(subtoks[0]) + le = int(subtoks[1]) + if po + le > insnwidth: + error(lineno, f'field {t} too large') + f = Field(sign, po, le) + subs.append(f) + width += le + + if width > insnwidth: + error(lineno, 'field too large') + if len(subs) == 0: + if func: + f = ParameterField(func) + else: + error(lineno, 'field with no value') + else: + if len(subs) == 1: + f = subs[0] + else: + mask = 0 + for s in subs: + if mask & s.mask: + error(lineno, 'field components overlap') + mask |= s.mask + f = MultiField(subs, mask) + if func: + f = FunctionField(func, f) + + if name in fields: + error(lineno, 'duplicate field', name) + fields[name] = f +# end parse_field + + +def parse_arguments(lineno, name, toks): + """Parse one argument set from TOKS at LINENO""" + global arguments + global re_C_ident + global anyextern + + flds = [] + types = [] + extern = False + for n in toks: + if re.fullmatch('!extern', n): + extern = True + anyextern = True + continue + if re.fullmatch(re_C_ident + ':' + re_C_ident, n): + (n, t) = n.split(':') + elif re.fullmatch(re_C_ident, n): + t = 'int' + else: + error(lineno, f'invalid argument set token "{n}"') + if n in flds: + error(lineno, f'duplicate argument "{n}"') + flds.append(n) + types.append(t) + + if name in arguments: + error(lineno, 'duplicate argument set', name) + arguments[name] = Arguments(name, flds, types, extern) +# end parse_arguments + + +def lookup_field(lineno, name): + global fields + if name in fields: + return fields[name] + error(lineno, 'undefined field', name) + + +def add_field(lineno, flds, new_name, f): + if new_name in flds: + error(lineno, 'duplicate field', new_name) + flds[new_name] = f + return flds + + +def add_field_byname(lineno, flds, new_name, old_name): + return add_field(lineno, flds, new_name, lookup_field(lineno, old_name)) + + +def infer_argument_set(flds): + global arguments + global decode_function + + for arg in arguments.values(): + if eq_fields_for_args(flds, arg): + return arg + + name = decode_function + str(len(arguments)) + arg = Arguments(name, flds.keys(), ['int'] * len(flds), False) + arguments[name] = arg + return arg + + +def infer_format(arg, fieldmask, flds, width): + global arguments + global formats + global decode_function + + const_flds = {} + var_flds = {} + for n, c in flds.items(): + if c is ConstField: + const_flds[n] = c + else: + var_flds[n] = c + + # Look for an existing format with the same argument set and fields + for fmt in formats.values(): + if arg and fmt.base != arg: + continue + if fieldmask != fmt.fieldmask: + continue + if width != fmt.width: + continue + if not eq_fields_for_fmts(flds, fmt.fields): + continue + return (fmt, const_flds) + + name = decode_function + '_Fmt_' + str(len(formats)) + if not arg: + arg = infer_argument_set(flds) + + fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width) + formats[name] = fmt + + return (fmt, const_flds) +# end infer_format + + +def parse_generic(lineno, parent_pat, name, toks): + """Parse one instruction format from TOKS at LINENO""" + global fields + global arguments + global formats + global allpatterns + global re_arg_ident + global re_fld_ident + global re_fmt_ident + global re_C_ident + global insnwidth + global insnmask + global variablewidth + + is_format = parent_pat is None + + fixedmask = 0 + fixedbits = 0 + undefmask = 0 + width = 0 + flds = {} + arg = None + fmt = None + for t in toks: + # '&Foo' gives a format an explicit argument set. + if re.fullmatch(re_arg_ident, t): + tt = t[1:] + if arg: + error(lineno, 'multiple argument sets') + if tt in arguments: + arg = arguments[tt] + else: + error(lineno, 'undefined argument set', t) + continue + + # '@Foo' gives a pattern an explicit format. + if re.fullmatch(re_fmt_ident, t): + tt = t[1:] + if fmt: + error(lineno, 'multiple formats') + if tt in formats: + fmt = formats[tt] + else: + error(lineno, 'undefined format', t) + continue + + # '%Foo' imports a field. + if re.fullmatch(re_fld_ident, t): + tt = t[1:] + flds = add_field_byname(lineno, flds, tt, tt) + continue + + # 'Foo=%Bar' imports a field with a different name. + if re.fullmatch(re_C_ident + '=' + re_fld_ident, t): + (fname, iname) = t.split('=%') + flds = add_field_byname(lineno, flds, fname, iname) + continue + + # 'Foo=number' sets an argument field to a constant value + if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t): + (fname, value) = t.split('=') + value = int(value) + flds = add_field(lineno, flds, fname, ConstField(value)) + continue + + # Pattern of 0s, 1s, dots and dashes indicate required zeros, + # required ones, or dont-cares. + if re.fullmatch('[01.-]+', t): + shift = len(t) + fms = t.replace('0', '1') + fms = fms.replace('.', '0') + fms = fms.replace('-', '0') + fbs = t.replace('.', '0') + fbs = fbs.replace('-', '0') + ubm = t.replace('1', '0') + ubm = ubm.replace('.', '0') + ubm = ubm.replace('-', '1') + fms = int(fms, 2) + fbs = int(fbs, 2) + ubm = int(ubm, 2) + fixedbits = (fixedbits << shift) | fbs + fixedmask = (fixedmask << shift) | fms + undefmask = (undefmask << shift) | ubm + # Otherwise, fieldname:fieldwidth + elif re.fullmatch(re_C_ident + ':s?[0-9]+', t): + (fname, flen) = t.split(':') + sign = False + if flen[0] == 's': + sign = True + flen = flen[1:] + shift = int(flen, 10) + if shift + width > insnwidth: + error(lineno, f'field {fname} exceeds insnwidth') + f = Field(sign, insnwidth - width - shift, shift) + flds = add_field(lineno, flds, fname, f) + fixedbits <<= shift + fixedmask <<= shift + undefmask <<= shift + else: + error(lineno, f'invalid token "{t}"') + width += shift + + if variablewidth and width < insnwidth and width % 8 == 0: + shift = insnwidth - width + fixedbits <<= shift + fixedmask <<= shift + undefmask <<= shift + undefmask |= (1 << shift) - 1 + + # We should have filled in all of the bits of the instruction. + elif not (is_format and width == 0) and width != insnwidth: + error(lineno, f'definition has {width} bits') + + # Do not check for fields overlapping fields; one valid usage + # is to be able to duplicate fields via import. + fieldmask = 0 + for f in flds.values(): + fieldmask |= f.mask + + # Fix up what we've parsed to match either a format or a pattern. + if is_format: + # Formats cannot reference formats. + if fmt: + error(lineno, 'format referencing format') + # If an argument set is given, then there should be no fields + # without a place to store it. + if arg: + for f in flds.keys(): + if f not in arg.fields: + error(lineno, f'field {f} not in argument set {arg.name}') + else: + arg = infer_argument_set(flds) + if name in formats: + error(lineno, 'duplicate format name', name) + fmt = Format(name, lineno, arg, fixedbits, fixedmask, + undefmask, fieldmask, flds, width) + formats[name] = fmt + else: + # Patterns can reference a format ... + if fmt: + # ... but not an argument simultaneously + if arg: + error(lineno, 'pattern specifies both format and argument set') + if fixedmask & fmt.fixedmask: + error(lineno, 'pattern fixed bits overlap format fixed bits') + if width != fmt.width: + error(lineno, 'pattern uses format of different width') + fieldmask |= fmt.fieldmask + fixedbits |= fmt.fixedbits + fixedmask |= fmt.fixedmask + undefmask |= fmt.undefmask + else: + (fmt, flds) = infer_format(arg, fieldmask, flds, width) + arg = fmt.base + for f in flds.keys(): + if f not in arg.fields: + error(lineno, f'field {f} not in argument set {arg.name}') + if f in fmt.fields.keys(): + error(lineno, f'field {f} set by format and pattern') + for f in arg.fields: + if f not in flds.keys() and f not in fmt.fields.keys(): + error(lineno, f'field {f} not initialized') + pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, + undefmask, fieldmask, flds, width) + parent_pat.pats.append(pat) + allpatterns.append(pat) + + # Validate the masks that we have assembled. + if fieldmask & fixedmask: + error(lineno, 'fieldmask overlaps fixedmask ', + f'({whex(fieldmask)} & {whex(fixedmask)})') + if fieldmask & undefmask: + error(lineno, 'fieldmask overlaps undefmask ', + f'({whex(fieldmask)} & {whex(undefmask)})') + if fixedmask & undefmask: + error(lineno, 'fixedmask overlaps undefmask ', + f'({whex(fixedmask)} & {whex(undefmask)})') + if not is_format: + allbits = fieldmask | fixedmask | undefmask + if allbits != insnmask: + error(lineno, 'bits left unspecified ', + f'({whex(allbits ^ insnmask)})') +# end parse_general + + +def parse_file(f, parent_pat): + """Parse all of the patterns within a file""" + global re_arg_ident + global re_fld_ident + global re_fmt_ident + global re_pat_ident + + # Read all of the lines of the file. Concatenate lines + # ending in backslash; discard empty lines and comments. + toks = [] + lineno = 0 + nesting = 0 + nesting_pats = [] + + for line in f: + lineno += 1 + + # Expand and strip spaces, to find indent. + line = line.rstrip() + line = line.expandtabs() + len1 = len(line) + line = line.lstrip() + len2 = len(line) + + # Discard comments + end = line.find('#') + if end >= 0: + line = line[:end] + + t = line.split() + if len(toks) != 0: + # Next line after continuation + toks.extend(t) + else: + # Allow completely blank lines. + if len1 == 0: + continue + indent = len1 - len2 + # Empty line due to comment. + if len(t) == 0: + # Indentation must be correct, even for comment lines. + if indent != nesting: + error(lineno, 'indentation ', indent, ' != ', nesting) + continue + start_lineno = lineno + toks = t + + # Continuation? + if toks[-1] == '\\': + toks.pop() + continue + + name = toks[0] + del toks[0] + + # End nesting? + if name == '}' or name == ']': + if len(toks) != 0: + error(start_lineno, 'extra tokens after close brace') + + # Make sure { } and [ ] nest properly. + if (name == '}') != isinstance(parent_pat, IncMultiPattern): + error(lineno, 'mismatched close brace') + + try: + parent_pat = nesting_pats.pop() + except: + error(lineno, 'extra close brace') + + nesting -= 2 + if indent != nesting: + error(lineno, 'indentation ', indent, ' != ', nesting) + + toks = [] + continue + + # Everything else should have current indentation. + if indent != nesting: + error(start_lineno, 'indentation ', indent, ' != ', nesting) + + # Start nesting? + if name == '{' or name == '[': + if len(toks) != 0: + error(start_lineno, 'extra tokens after open brace') + + if name == '{': + nested_pat = IncMultiPattern(start_lineno) + else: + nested_pat = ExcMultiPattern(start_lineno) + parent_pat.pats.append(nested_pat) + nesting_pats.append(parent_pat) + parent_pat = nested_pat + + nesting += 2 + toks = [] + continue + + # Determine the type of object needing to be parsed. + if re.fullmatch(re_fld_ident, name): + parse_field(start_lineno, name[1:], toks) + elif re.fullmatch(re_arg_ident, name): + parse_arguments(start_lineno, name[1:], toks) + elif re.fullmatch(re_fmt_ident, name): + parse_generic(start_lineno, None, name[1:], toks) + elif re.fullmatch(re_pat_ident, name): + parse_generic(start_lineno, parent_pat, name, toks) + else: + error(lineno, f'invalid token "{name}"') + toks = [] + + if nesting != 0: + error(lineno, 'missing close brace') +# end parse_file + + +class SizeTree: + """Class representing a node in a size decode tree""" + + def __init__(self, m, w): + self.mask = m + self.subs = [] + self.base = None + self.width = w + + def str1(self, i): + ind = str_indent(i) + r = ind + whex(self.mask) + ' [\n' + for (b, s) in self.subs: + r += ind + f' {whex(b)}:\n' + r += s.str1(i + 4) + '\n' + r += ind + ']' + return r + + def __str__(self): + return self.str1(0) + + def output_code(self, i, extracted, outerbits, outermask): + ind = str_indent(i) + + # If we need to load more bytes to test, do so now. + if extracted < self.width: + output(ind, f'insn = {decode_function}_load_bytes', + f'(ctx, insn, {extracted // 8}, {self.width // 8});\n') + extracted = self.width + + # Attempt to aid the compiler in producing compact switch statements. + # If the bits in the mask are contiguous, extract them. + sh = is_contiguous(self.mask) + if sh > 0: + # Propagate SH down into the local functions. + def str_switch(b, sh=sh): + return f'(insn >> {sh}) & {b >> sh:#x}' + + def str_case(b, sh=sh): + return hex(b >> sh) + else: + def str_switch(b): + return f'insn & {whexC(b)}' + + def str_case(b): + return whexC(b) + + output(ind, 'switch (', str_switch(self.mask), ') {\n') + for b, s in sorted(self.subs): + innermask = outermask | self.mask + innerbits = outerbits | b + output(ind, 'case ', str_case(b), ':\n') + output(ind, ' /* ', + str_match_bits(innerbits, innermask), ' */\n') + s.output_code(i + 4, extracted, innerbits, innermask) + output(ind, '}\n') + output(ind, 'return insn;\n') +# end SizeTree + +class SizeLeaf: + """Class representing a leaf node in a size decode tree""" + + def __init__(self, m, w): + self.mask = m + self.width = w + + def str1(self, i): + return str_indent(i) + whex(self.mask) + + def __str__(self): + return self.str1(0) + + def output_code(self, i, extracted, outerbits, outermask): + global decode_function + ind = str_indent(i) + + # If we need to load more bytes, do so now. + if extracted < self.width: + output(ind, f'insn = {decode_function}_load_bytes', + f'(ctx, insn, {extracted // 8}, {self.width // 8});\n') + extracted = self.width + output(ind, 'return insn;\n') +# end SizeLeaf + + +def build_size_tree(pats, width, outerbits, outermask): + global insnwidth + + # Collect the mask of bits that are fixed in this width + innermask = 0xff << (insnwidth - width) + innermask &= ~outermask + minwidth = None + onewidth = True + for i in pats: + innermask &= i.fixedmask + if minwidth is None: + minwidth = i.width + elif minwidth != i.width: + onewidth = False; + if minwidth < i.width: + minwidth = i.width + + if onewidth: + return SizeLeaf(innermask, minwidth) + + if innermask == 0: + if width < minwidth: + return build_size_tree(pats, width + 8, outerbits, outermask) + + pnames = [] + for p in pats: + pnames.append(p.name + ':' + p.file + ':' + str(p.lineno)) + error_with_file(pats[0].file, pats[0].lineno, + f'overlapping patterns size {width}:', pnames) + + bins = {} + for i in pats: + fb = i.fixedbits & innermask + if fb in bins: + bins[fb].append(i) + else: + bins[fb] = [i] + + fullmask = outermask | innermask + lens = sorted(bins.keys()) + if len(lens) == 1: + b = lens[0] + return build_size_tree(bins[b], width + 8, b | outerbits, fullmask) + + r = SizeTree(innermask, width) + for b, l in bins.items(): + s = build_size_tree(l, width, b | outerbits, fullmask) + r.subs.append((b, s)) + return r +# end build_size_tree + + +def prop_size(tree): + """Propagate minimum widths up the decode size tree""" + + if isinstance(tree, SizeTree): + min = None + for (b, s) in tree.subs: + width = prop_size(s) + if min is None or min > width: + min = width + assert min >= tree.width + tree.width = min + else: + min = tree.width + return min +# end prop_size + + +def main(): + global arguments + global formats + global allpatterns + global translate_scope + global translate_prefix + global output_fd + global output_file + global input_file + global insnwidth + global insntype + global insnmask + global decode_function + global bitop_width + global variablewidth + global anyextern + + decode_scope = 'static ' + + long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=', + 'static-decode=', 'varinsnwidth='] + try: + (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts) + except getopt.GetoptError as err: + error(0, err) + for o, a in opts: + if o in ('-o', '--output'): + output_file = a + elif o == '--decode': + decode_function = a + decode_scope = '' + elif o == '--static-decode': + decode_function = a + elif o == '--translate': + translate_prefix = a + translate_scope = '' + elif o in ('-w', '--insnwidth', '--varinsnwidth'): + if o == '--varinsnwidth': + variablewidth = True + insnwidth = int(a) + if insnwidth == 16: + insntype = 'uint16_t' + insnmask = 0xffff + elif insnwidth == 64: + insntype = 'uint64_t' + insnmask = 0xffffffffffffffff + bitop_width = 64 + elif insnwidth != 32: + error(0, 'cannot handle insns of width', insnwidth) + else: + assert False, 'unhandled option' + + if len(args) < 1: + error(0, 'missing input file') + + toppat = ExcMultiPattern(0) + + for filename in args: + input_file = filename + f = open(filename, 'rt', encoding='utf-8') + parse_file(f, toppat) + f.close() + + # We do not want to compute masks for toppat, because those masks + # are used as a starting point for build_tree. For toppat, we must + # insist that decode begins from naught. + for i in toppat.pats: + i.prop_masks() + + toppat.build_tree() + toppat.prop_format() + + if variablewidth: + for i in toppat.pats: + i.prop_width() + stree = build_size_tree(toppat.pats, 8, 0, 0) + prop_size(stree) + + if output_file: + output_fd = open(output_file, 'wt', encoding='utf-8') + else: + output_fd = io.TextIOWrapper(sys.stdout.buffer, + encoding=sys.stdout.encoding, + errors="ignore") + + output_autogen() + for n in sorted(arguments.keys()): + f = arguments[n] + f.output_def() + + # A single translate function can be invoked for different patterns. + # Make sure that the argument sets are the same, and declare the + # function only once. + # + # If we're sharing formats, we're likely also sharing trans_* functions, + # but we can't tell which ones. Prevent issues from the compiler by + # suppressing redundant declaration warnings. + if anyextern: + output("#pragma GCC diagnostic push\n", + "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n", + "#ifdef __clang__\n" + "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n", + "#endif\n\n") + + out_pats = {} + for i in allpatterns: + if i.name in out_pats: + p = out_pats[i.name] + if i.base.base != p.base.base: + error(0, i.name, ' has conflicting argument sets') + else: + i.output_decl() + out_pats[i.name] = i + output('\n') + + if anyextern: + output("#pragma GCC diagnostic pop\n\n") + + for n in sorted(formats.keys()): + f = formats[n] + f.output_extract() + + output(decode_scope, 'bool ', decode_function, + '(DisasContext *ctx, ', insntype, ' insn)\n{\n') + + i4 = str_indent(4) + + if len(allpatterns) != 0: + output(i4, 'union {\n') + for n in sorted(arguments.keys()): + f = arguments[n] + output(i4, i4, f.struct_name(), ' f_', f.name, ';\n') + output(i4, '} u;\n\n') + toppat.output_code(4, False, 0, 0) + + output(i4, 'return false;\n') + output('}\n') + + if variablewidth: + output('\n', decode_scope, insntype, ' ', decode_function, + '_load(DisasContext *ctx)\n{\n', + ' ', insntype, ' insn = 0;\n\n') + stree.output_code(4, 0, 0, 0) + output('}\n') + + if output_file: + output_fd.close() +# end main + + +if __name__ == '__main__': + main() |