aboutsummaryrefslogtreecommitdiffstats
path: root/capstone/suite/synctools/insn.py
diff options
context:
space:
mode:
authorAngelos Mouzakitis <a.mouzakitis@virtualopensystems.com>2023-10-10 14:33:42 +0000
committerAngelos Mouzakitis <a.mouzakitis@virtualopensystems.com>2023-10-10 14:33:42 +0000
commitaf1a266670d040d2f4083ff309d732d648afba2a (patch)
tree2fc46203448ddcc6f81546d379abfaeb323575e9 /capstone/suite/synctools/insn.py
parente02cda008591317b1625707ff8e115a4841aa889 (diff)
Add submodule dependency filesHEADmaster
Change-Id: Iaf8d18082d3991dec7c0ebbea540f092188eb4ec
Diffstat (limited to 'capstone/suite/synctools/insn.py')
-rwxr-xr-xcapstone/suite/synctools/insn.py324
1 files changed, 324 insertions, 0 deletions
diff --git a/capstone/suite/synctools/insn.py b/capstone/suite/synctools/insn.py
new file mode 100755
index 000000000..9e4612766
--- /dev/null
+++ b/capstone/suite/synctools/insn.py
@@ -0,0 +1,324 @@
+#!/usr/bin/python
+# print list of instructions LLVM inc files, for Capstone disassembler.
+# this will be put into capstone/<arch>.h
+# by Nguyen Anh Quynh, 2019
+
+import sys
+
+if len(sys.argv) == 1:
+ print("Syntax: %s <GenAsmMatcher.inc> <GenInstrInfo.inc> MappingInsn.inc" %sys.argv[0])
+ sys.exit(1)
+
+# MappingInsn.inc
+f = open(sys.argv[3])
+mapping = f.readlines()
+f.close()
+
+print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */
+/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */
+/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2019 */
+""")
+
+# lib/Target/X86/X86GenAsmMatcher.inc
+# static const MatchEntry MatchTable1[] = {
+# { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, },
+
+# extract insn from GenAsmMatcher Table
+# return (arch, mnem, insn_id)
+def extract_insn(line):
+ tmp = line.split(',')
+ insn_raw = tmp[1].strip()
+ insn_mnem = tmp[0].split(' ')[3]
+ # X86 mov.s
+ if '.' in insn_mnem:
+ tmp = insn_mnem.split('.')
+ insn_mnem = tmp[0]
+ tmp = insn_raw.split('::')
+ arch = tmp[0]
+ # AArch64 -> ARM64
+ if arch.upper() == 'AArch64':
+ arch = 'ARM64'
+ return (arch, insn_mnem, tmp[1])
+
+
+
+# extract all insn lines from GenAsmMatcher
+# return arch, insn_id_list, insn_lines
+def extract_matcher(filename):
+ f = open(filename)
+ lines = f.readlines()
+ f.close()
+
+ match_count = 0
+ #insn_lines = []
+ insn_id_list = {}
+ arch = None
+ first_insn = None
+
+ pattern = None
+ # first we try to find Table1, or Table0
+ for line in lines:
+ if 'MatchEntry MatchTable0[] = {' in line.strip():
+ pattern = 'MatchEntry MatchTable0[] = {'
+ elif 'MatchEntry MatchTable1[] = {' in line.strip():
+ pattern = 'MatchEntry MatchTable1[] = {'
+ # last pattern, done
+ break
+
+ # 1st enum is register enum
+ for line in lines:
+ line = line.rstrip()
+
+ if len(line.strip()) == 0:
+ continue
+
+ if pattern in line.strip():
+ match_count += 1
+ #print(line.strip())
+ continue
+
+ line = line.strip()
+ if match_count == 1:
+ if line == '};':
+ # done with first enum
+ break
+ else:
+ _arch, mnem, insn_id = extract_insn(line)
+ if not mnem.startswith('__'):
+ if not first_insn:
+ arch, first_insn = _arch, insn_id
+ if not insn_id in insn_id_list:
+ # print("***", arch, mnem, insn_id)
+ insn_id_list[insn_id] = mnem
+ #insn_lines.append(line)
+
+ #return arch, first_insn, insn_id_list, insn_lines
+ return arch, first_insn, insn_id_list
+
+# GenAsmMatcher.inc
+#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1])
+arch, first_insn, insn_id_list = extract_matcher(sys.argv[1])
+arch = arch.upper()
+
+#for line in insn_id_list:
+# print(line)
+
+
+insn_list = []
+#{
+# X86_AAA, X86_INS_AAA,
+##ifndef CAPSTONE_DIET
+# { 0 }, { 0 }, { X86_GRP_NOT64BITMODE, 0 }, 0, 0
+##endif
+#},
+def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong):
+ print(arch, insn_id, mnem, mnem_can_be_wrong)
+ if not mnem_can_be_wrong:
+ insn = "%s_INS_%s" %(arch.upper(), mnem.upper())
+ if insn in insn_list:
+ return
+ print("%s," %insn)
+ insn_list.append(insn)
+ return
+
+ insn = "%s_%s" %(arch.upper(), insn_id)
+ # so mnem can be wrong, we need to verify with MappingInsn.inc
+ # first, try to find this entry in old MappingInsn.inc file
+ for i in range(len(mapping)):
+ tmp = mapping[i].split(',')
+ if tmp[0].strip() == insn:
+ insn = tmp[1].strip()
+ if insn in insn_list:
+ return
+ #print("==== get below from MappingInsn.inc file: %s" %insn)
+ print("%s," %insn)
+ insn_list.append(insn)
+ return
+
+
+# extract from GenInstrInfo.inc, because the insn id is in order
+enum_count = 0
+meet_insn = False
+
+# GenInstrInfo.inc
+f = open(sys.argv[2])
+lines = f.readlines()
+f.close()
+
+count = 0
+last_mnem = None
+
+# 1st enum is register enum
+for line in lines:
+ line = line.rstrip()
+
+ if len(line.strip()) == 0:
+ continue
+
+ if line.strip() == 'enum {':
+ enum_count += 1
+ #print(line.strip())
+ continue
+
+ line = line.strip()
+ if enum_count == 1:
+ if 'INSTRUCTION_LIST_END' in line:
+ break
+ else:
+ insn = None
+ if meet_insn:
+ # enum items
+ insn = line.split('=')[0].strip()
+ if 'CALLSTACK' in insn or 'TAILJUMP' in insn:
+ # pseudo instruction
+ insn = None
+
+ elif line.startswith(first_insn):
+ insn = line.split('=')[0].strip()
+ meet_insn = True
+
+ if insn:
+ count += 1
+ if insn == 'BSWAP16r_BAD':
+ last_mnem = 'BSWAP'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'CMOVNP_Fp32':
+ last_mnem = 'FCMOVNP'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'CMOVP_Fp3':
+ last_mnem = 'FCMOVP'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'CMPSDrm_Int':
+ last_mnem = 'CMPSD'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'MOVSX16rm16':
+ last_mnem = 'MOVSX'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'MOVZX16rm16':
+ last_mnem = 'MOVZX'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'ST_Fp32m':
+ last_mnem = 'FST'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'CMOVNP_Fp64':
+ last_mnem = 'FCMOVNU'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'CMPSDrr_Int':
+ last_mnem = 'CMPSD'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'CMPSSrm_Int':
+ last_mnem = 'CMPSS'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VCMPSDrm_Int':
+ last_mnem = 'VCMPSD'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VCMPSSrm_Int':
+ last_mnem = 'VCMPSS'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VPCMOVYrrr_REV':
+ last_mnem = 'VPCMOV'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VRNDSCALESDZm':
+ last_mnem = 'VRNDSCALESD'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VRNDSCALESSZm':
+ last_mnem = 'VRNDSCALESS'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VMAXCPDZ128rm':
+ last_mnem = 'VMAXPD'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VMAXCPSZ128rm':
+ last_mnem = 'VMAXPS'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VMAXCSDZrm':
+ last_mnem = 'VMAXSD'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VMAXCSSZrm':
+ last_mnem = 'VMAXSS'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VMINCPDZ128rm':
+ last_mnem = 'VMINPD'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VMINCPSZ128rm':
+ last_mnem = 'VMINPS'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VMINCSDZrm':
+ last_mnem = 'VMINSD'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VMINCSSZrm':
+ last_mnem = 'VMINSS'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VMOV64toPQIZrm':
+ last_mnem = 'VMOVQ'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VPERMIL2PDYrr_REV':
+ last_mnem = 'VPERMILPD'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VPERMIL2PSYrr_REV':
+ last_mnem = 'VPERMILPS'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VCVTSD2SI64Zrm_Int':
+ last_mnem = 'VCVTSD2SI'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VCVTSD2SSrm_Int':
+ last_mnem = 'VCVTSD2SS'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VCVTSS2SI64Zrm_Int':
+ last_mnem = 'VCVTSS2SI'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VCVTTSD2SI64Zrm_Int':
+ last_mnem = 'VCVTTSD2SI'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ elif insn == 'VCVTTSS2SI64Zrm_Int':
+ last_mnem = 'VCVTTSS2SI'
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+
+ elif insn.startswith('VFMSUBADD'):
+ if insn[len('VFMSUBADD')].isdigit():
+ last_mnem = insn[:len('VFMSUBADD123xy')]
+ else:
+ last_mnem = insn[:len('VFMSUBADDSS')]
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+
+ elif insn.startswith('VFMADDSUB'):
+ if insn[len('VFMADDSUB')].isdigit():
+ last_mnem = insn[:len('VFMADDSUB123xy')]
+ else:
+ last_mnem = insn[:len('VFMADDSUBSS')]
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+
+ elif insn.startswith('VFMADD'):
+ if insn[len('VFMADD')].isdigit():
+ last_mnem = insn[:len('VFMADD123PD')]
+ else:
+ last_mnem = insn[:len('VFMADDPD')]
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+
+ elif insn.startswith('VFMSUB'):
+ if insn[len('VFMSUB')].isdigit():
+ last_mnem = insn[:len('VFMSUB123PD')]
+ else:
+ last_mnem = insn[:len('VFMSUBPD')]
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+
+ elif insn.startswith('VFNMADD'):
+ if insn[len('VFNMADD')].isdigit():
+ last_mnem = insn[:len('VFNMADD123xy')]
+ else:
+ last_mnem = insn[:len('VFNMADDSS')]
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+
+ elif insn.startswith('VFNMSUB'):
+ if insn[len('VFNMSUB')].isdigit():
+ last_mnem = insn[:len('VFNMSUB123xy')]
+ else:
+ last_mnem = insn[:len('VFNMSUBSS')]
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+
+ elif insn in insn_id_list:
+ # trust old mapping table
+ last_mnem = insn_id_list[insn].upper()
+ print_entry(arch.upper(), insn, last_mnem, mapping, False)
+ else:
+ # the last option when we cannot find mnem: use the last good mnem
+ print_entry(arch.upper(), insn, last_mnem, mapping, True)