From af1a266670d040d2f4083ff309d732d648afba2a Mon Sep 17 00:00:00 2001 From: Angelos Mouzakitis Date: Tue, 10 Oct 2023 14:33:42 +0000 Subject: Add submodule dependency files Change-Id: Iaf8d18082d3991dec7c0ebbea540f092188eb4ec --- capstone/suite/synctools/.gitignore | 9 + capstone/suite/synctools/Makefile | 14 + capstone/suite/synctools/README | 57 + .../suite/synctools/X86DisassemblerDecoderCommon.h | 483 + capstone/suite/synctools/arm64_gen_vreg.c | 38 + capstone/suite/synctools/asmwriter.py | 748 ++ capstone/suite/synctools/compare_mapping_insn.py | 38 + .../suite/synctools/disassemblertables-arch.py | 258 + capstone/suite/synctools/disassemblertables.py | 45 + capstone/suite/synctools/disassemblertables2.c | 176 + .../suite/synctools/disassemblertables_reduce.py | 50 + capstone/suite/synctools/genall-arch.sh | 70 + capstone/suite/synctools/genall-full.sh | 33 + capstone/suite/synctools/genall-reduce.sh | 28 + capstone/suite/synctools/insn.py | 324 + capstone/suite/synctools/insn3.py | 104 + capstone/suite/synctools/insn_check.py | 25 + capstone/suite/synctools/instrinfo-arch.py | 161 + capstone/suite/synctools/instrinfo.py | 115 + capstone/suite/synctools/mapping_insn-arch.py | 362 + capstone/suite/synctools/mapping_insn.py | 332 + capstone/suite/synctools/mapping_insn_name-arch.py | 104 + capstone/suite/synctools/mapping_insn_name.py | 314 + capstone/suite/synctools/mapping_insn_op-arch.py | 379 + capstone/suite/synctools/mapping_insn_op.py | 318 + capstone/suite/synctools/mapping_reg.py | 53 + capstone/suite/synctools/register.py | 48 + capstone/suite/synctools/registerinfo.py | 286 + capstone/suite/synctools/strinforeduce/Makefile | 10 + capstone/suite/synctools/strinforeduce/README | 15 + .../suite/synctools/strinforeduce/instrinfo2.py | 55 + .../synctools/strinforeduce/strinforeduce.cpp | 183 + capstone/suite/synctools/subtargetinfo.py | 53 + capstone/suite/synctools/systemoperand.py | 987 ++ capstone/suite/synctools/systemregister.py | 213 + .../suite/synctools/tablegen/AArch64/AArch64.td | 579 + .../tablegen/AArch64/AArch64CallingConvention.td | 366 + .../tablegen/AArch64/AArch64InstrAtomics.td | 426 + .../tablegen/AArch64/AArch64InstrFormats.td | 10402 ++++++++++++++++ .../synctools/tablegen/AArch64/AArch64InstrInfo.td | 6494 ++++++++++ .../tablegen/AArch64/AArch64RegisterBanks.td | 20 + .../tablegen/AArch64/AArch64RegisterInfo.td | 1113 ++ .../tablegen/AArch64/AArch64SVEInstrInfo.td | 1024 ++ .../synctools/tablegen/AArch64/AArch64SchedA53.td | 295 + .../synctools/tablegen/AArch64/AArch64SchedA57.td | 668 ++ .../tablegen/AArch64/AArch64SchedA57WriteRes.td | 544 + .../tablegen/AArch64/AArch64SchedCyclone.td | 871 ++ .../tablegen/AArch64/AArch64SchedExynosM1.td | 847 ++ .../tablegen/AArch64/AArch64SchedExynosM3.td | 860 ++ .../tablegen/AArch64/AArch64SchedFalkor.td | 119 + .../tablegen/AArch64/AArch64SchedFalkorDetails.td | 1292 ++ .../synctools/tablegen/AArch64/AArch64SchedKryo.td | 138 + .../tablegen/AArch64/AArch64SchedKryoDetails.td | 2378 ++++ .../tablegen/AArch64/AArch64SchedThunderX.td | 357 + .../tablegen/AArch64/AArch64SchedThunderX2T99.td | 1880 +++ .../synctools/tablegen/AArch64/AArch64Schedule.td | 106 + .../tablegen/AArch64/AArch64SystemOperands.td | 1332 +++ .../synctools/tablegen/AArch64/SVEInstrFormats.td | 4456 +++++++ capstone/suite/synctools/tablegen/ARM/ARM-digit.td | 1098 ++ capstone/suite/synctools/tablegen/ARM/ARM.td | 1098 ++ .../suite/synctools/tablegen/ARM/ARMCallingConv.td | 318 + .../synctools/tablegen/ARM/ARMInstrFormats.td | 2620 ++++ .../suite/synctools/tablegen/ARM/ARMInstrInfo.td | 6167 ++++++++++ .../suite/synctools/tablegen/ARM/ARMInstrNEON.td | 8545 +++++++++++++ .../suite/synctools/tablegen/ARM/ARMInstrThumb.td | 1707 +++ .../suite/synctools/tablegen/ARM/ARMInstrThumb2.td | 4867 ++++++++ .../suite/synctools/tablegen/ARM/ARMInstrVFP.td | 2482 ++++ .../synctools/tablegen/ARM/ARMRegisterBanks.td | 14 + .../tablegen/ARM/ARMRegisterInfo-digit.td | 481 + .../synctools/tablegen/ARM/ARMRegisterInfo.td | 481 + .../suite/synctools/tablegen/ARM/ARMSchedule.td | 428 + .../suite/synctools/tablegen/ARM/ARMScheduleA57.td | 1502 +++ .../tablegen/ARM/ARMScheduleA57WriteRes.td | 323 + .../suite/synctools/tablegen/ARM/ARMScheduleA8.td | 1075 ++ .../suite/synctools/tablegen/ARM/ARMScheduleA9.td | 2579 ++++ .../suite/synctools/tablegen/ARM/ARMScheduleM3.td | 21 + .../suite/synctools/tablegen/ARM/ARMScheduleR52.td | 928 ++ .../synctools/tablegen/ARM/ARMScheduleSwift.td | 1093 ++ .../suite/synctools/tablegen/ARM/ARMScheduleV6.td | 300 + .../synctools/tablegen/ARM/ARMSystemRegister.td | 156 + .../synctools/tablegen/PPC/P9InstrResources.td | 1420 +++ capstone/suite/synctools/tablegen/PPC/PPC.td | 480 + .../suite/synctools/tablegen/PPC/PPCCallingConv.td | 378 + .../suite/synctools/tablegen/PPC/PPCInstr64Bit.td | 1453 +++ .../synctools/tablegen/PPC/PPCInstrAltivec.td | 1507 +++ .../synctools/tablegen/PPC/PPCInstrFormats.td | 2167 ++++ .../suite/synctools/tablegen/PPC/PPCInstrHTM.td | 170 + .../suite/synctools/tablegen/PPC/PPCInstrInfo.td | 4948 ++++++++ .../suite/synctools/tablegen/PPC/PPCInstrQPX.td | 1216 ++ .../suite/synctools/tablegen/PPC/PPCInstrSPE.td | 892 ++ .../suite/synctools/tablegen/PPC/PPCInstrVSX.td | 4007 +++++++ .../synctools/tablegen/PPC/PPCRegisterInfo.td | 386 + .../suite/synctools/tablegen/PPC/PPCSchedule.td | 140 + .../suite/synctools/tablegen/PPC/PPCSchedule440.td | 608 + .../suite/synctools/tablegen/PPC/PPCScheduleA2.td | 172 + .../synctools/tablegen/PPC/PPCScheduleE500.td | 274 + .../synctools/tablegen/PPC/PPCScheduleE500mc.td | 329 + .../synctools/tablegen/PPC/PPCScheduleE5500.td | 385 + .../suite/synctools/tablegen/PPC/PPCScheduleG3.td | 80 + .../suite/synctools/tablegen/PPC/PPCScheduleG4.td | 96 + .../synctools/tablegen/PPC/PPCScheduleG4Plus.td | 112 + .../suite/synctools/tablegen/PPC/PPCScheduleG5.td | 130 + .../suite/synctools/tablegen/PPC/PPCScheduleP7.td | 397 + .../suite/synctools/tablegen/PPC/PPCScheduleP8.td | 406 + .../suite/synctools/tablegen/PPC/PPCScheduleP9.td | 400 + capstone/suite/synctools/tablegen/X86/X86.td | 1203 ++ .../suite/synctools/tablegen/X86/X86CallingConv.td | 1150 ++ .../suite/synctools/tablegen/X86/X86Capstone.td | 7 + .../suite/synctools/tablegen/X86/X86Instr3DNow.td | 111 + .../suite/synctools/tablegen/X86/X86InstrAVX512.td | 11968 +++++++++++++++++++ .../synctools/tablegen/X86/X86InstrArithmetic.td | 1350 +++ .../synctools/tablegen/X86/X86InstrCMovSetCC.td | 116 + .../synctools/tablegen/X86/X86InstrCompiler.td | 2103 ++++ .../synctools/tablegen/X86/X86InstrControl.td | 413 + .../synctools/tablegen/X86/X86InstrExtension.td | 204 + .../suite/synctools/tablegen/X86/X86InstrFMA.td | 636 + .../synctools/tablegen/X86/X86InstrFPStack.td | 748 ++ .../synctools/tablegen/X86/X86InstrFormats.td | 993 ++ .../tablegen/X86/X86InstrFragmentsSIMD.td | 1075 ++ .../suite/synctools/tablegen/X86/X86InstrInfo.td | 3582 ++++++ .../synctools/tablegen/X86/X86InstrInfo_reduce.td | 3582 ++++++ .../suite/synctools/tablegen/X86/X86InstrMMX.td | 612 + .../suite/synctools/tablegen/X86/X86InstrMPX.td | 80 + .../suite/synctools/tablegen/X86/X86InstrSGX.td | 30 + .../suite/synctools/tablegen/X86/X86InstrSSE.td | 8258 +++++++++++++ .../suite/synctools/tablegen/X86/X86InstrSVM.td | 63 + .../synctools/tablegen/X86/X86InstrShiftRotate.td | 1031 ++ .../suite/synctools/tablegen/X86/X86InstrSystem.td | 743 ++ .../suite/synctools/tablegen/X86/X86InstrTSX.td | 60 + .../suite/synctools/tablegen/X86/X86InstrVMX.td | 88 + .../synctools/tablegen/X86/X86InstrVecCompiler.td | 511 + .../suite/synctools/tablegen/X86/X86InstrXOP.td | 446 + .../suite/synctools/tablegen/X86/X86PfmCounters.td | 77 + .../synctools/tablegen/X86/X86RegisterBanks.td | 17 + .../synctools/tablegen/X86/X86RegisterInfo.td | 591 + .../synctools/tablegen/X86/X86SchedBroadwell.td | 1692 +++ .../synctools/tablegen/X86/X86SchedHaswell.td | 1975 +++ .../synctools/tablegen/X86/X86SchedPredicates.td | 49 + .../synctools/tablegen/X86/X86SchedSandyBridge.td | 1159 ++ .../tablegen/X86/X86SchedSkylakeClient.td | 1850 +++ .../tablegen/X86/X86SchedSkylakeServer.td | 2580 ++++ .../suite/synctools/tablegen/X86/X86Schedule.td | 661 + .../synctools/tablegen/X86/X86ScheduleAtom.td | 917 ++ .../synctools/tablegen/X86/X86ScheduleBtVer2.td | 682 ++ .../suite/synctools/tablegen/X86/X86ScheduleSLM.td | 486 + .../synctools/tablegen/X86/X86ScheduleZnver1.td | 1544 +++ .../suite/synctools/tablegen/X86/X86_reduce.td | 459 + capstone/suite/synctools/tablegen/X86/back/X86.td | 1203 ++ .../synctools/tablegen/X86/back/X86CallingConv.td | 1150 ++ .../synctools/tablegen/X86/back/X86Capstone.td | 7 + .../synctools/tablegen/X86/back/X86CapstoneFull.td | 103 + .../tablegen/X86/back/X86CapstoneReduce.td | 101 + .../synctools/tablegen/X86/back/X86Instr3DNow.td | 111 + .../synctools/tablegen/X86/back/X86InstrAVX512.td | 11968 +++++++++++++++++++ .../tablegen/X86/back/X86InstrArithmetic.td | 1338 +++ .../tablegen/X86/back/X86InstrCMovSetCC.td | 116 + .../tablegen/X86/back/X86InstrCompiler.td | 2103 ++++ .../synctools/tablegen/X86/back/X86InstrControl.td | 413 + .../tablegen/X86/back/X86InstrExtension.td | 204 + .../synctools/tablegen/X86/back/X86InstrFMA.td | 636 + .../synctools/tablegen/X86/back/X86InstrFPStack.td | 748 ++ .../synctools/tablegen/X86/back/X86InstrFormats.td | 993 ++ .../tablegen/X86/back/X86InstrFragmentsSIMD.td | 1075 ++ .../synctools/tablegen/X86/back/X86InstrInfo.td | 3580 ++++++ .../tablegen/X86/back/X86InstrInfo_reduce.td | 3572 ++++++ .../synctools/tablegen/X86/back/X86InstrMMX.td | 612 + .../synctools/tablegen/X86/back/X86InstrMPX.td | 80 + .../synctools/tablegen/X86/back/X86InstrSGX.td | 30 + .../synctools/tablegen/X86/back/X86InstrSSE.td | 8256 +++++++++++++ .../synctools/tablegen/X86/back/X86InstrSVM.td | 63 + .../tablegen/X86/back/X86InstrShiftRotate.td | 1031 ++ .../synctools/tablegen/X86/back/X86InstrSystem.td | 743 ++ .../synctools/tablegen/X86/back/X86InstrTSX.td | 60 + .../synctools/tablegen/X86/back/X86InstrVMX.td | 88 + .../tablegen/X86/back/X86InstrVecCompiler.td | 511 + .../synctools/tablegen/X86/back/X86InstrXOP.td | 446 + .../synctools/tablegen/X86/back/X86PfmCounters.td | 77 + .../tablegen/X86/back/X86RegisterBanks.td | 17 + .../synctools/tablegen/X86/back/X86RegisterInfo.td | 591 + .../tablegen/X86/back/X86SchedBroadwell.td | 1692 +++ .../synctools/tablegen/X86/back/X86SchedHaswell.td | 1975 +++ .../tablegen/X86/back/X86SchedPredicates.td | 49 + .../tablegen/X86/back/X86SchedSandyBridge.td | 1159 ++ .../tablegen/X86/back/X86SchedSkylakeClient.td | 1850 +++ .../tablegen/X86/back/X86SchedSkylakeServer.td | 2580 ++++ .../synctools/tablegen/X86/back/X86Schedule.td | 661 + .../synctools/tablegen/X86/back/X86ScheduleAtom.td | 917 ++ .../tablegen/X86/back/X86ScheduleBtVer2.td | 682 ++ .../synctools/tablegen/X86/back/X86ScheduleSLM.td | 486 + .../tablegen/X86/back/X86ScheduleZnver1.td | 1544 +++ .../synctools/tablegen/X86/back/X86_reduce.td | 459 + .../suite/synctools/tablegen/gen-tablegen-arch.sh | 45 + .../suite/synctools/tablegen/gen-tablegen-full.sh | 32 + .../synctools/tablegen/gen-tablegen-reduce.sh | 28 + capstone/suite/synctools/tablegen/gen-tablegen.sh | 47 + .../include/llvm/CodeGen/SDNodeProperties.td | 34 + .../tablegen/include/llvm/CodeGen/ValueTypes.td | 169 + .../tablegen/include/llvm/IR/Attributes.td | 239 + .../tablegen/include/llvm/IR/DebugInfoFlags.def | 64 + .../tablegen/include/llvm/IR/Instruction.def | 231 + .../tablegen/include/llvm/IR/Intrinsics.td | 1010 ++ .../tablegen/include/llvm/IR/IntrinsicsAArch64.td | 669 ++ .../tablegen/include/llvm/IR/IntrinsicsAMDGPU.td | 1340 +++ .../tablegen/include/llvm/IR/IntrinsicsARM.td | 770 ++ .../tablegen/include/llvm/IR/IntrinsicsBPF.td | 24 + .../tablegen/include/llvm/IR/IntrinsicsHexagon.td | 10975 +++++++++++++++++ .../tablegen/include/llvm/IR/IntrinsicsMips.td | 1771 +++ .../tablegen/include/llvm/IR/IntrinsicsNVVM.td | 4047 +++++++ .../tablegen/include/llvm/IR/IntrinsicsPowerPC.td | 1164 ++ .../tablegen/include/llvm/IR/IntrinsicsSystemZ.td | 431 + .../include/llvm/IR/IntrinsicsWebAssembly.td | 67 + .../tablegen/include/llvm/IR/IntrinsicsX86.td | 5215 ++++++++ .../tablegen/include/llvm/IR/IntrinsicsXCore.td | 121 + .../tablegen/include/llvm/IR/Metadata.def | 126 + .../tablegen/include/llvm/IR/RuntimeLibcalls.def | 527 + .../synctools/tablegen/include/llvm/IR/Value.def | 117 + .../include/llvm/TableGen/SearchableTable.td | 136 + .../tablegen/include/llvm/Target/GenericOpcodes.td | 672 ++ .../include/llvm/Target/GlobalISel/RegisterBank.td | 16 + .../llvm/Target/GlobalISel/SelectionDAGCompat.td | 131 + .../include/llvm/Target/GlobalISel/Target.td | 61 + .../tablegen/include/llvm/Target/Target.td | 1556 +++ .../include/llvm/Target/TargetCallingConv.td | 187 + .../include/llvm/Target/TargetInstrPredicate.td | 197 + .../include/llvm/Target/TargetItinerary.td | 152 + .../tablegen/include/llvm/Target/TargetSchedule.td | 553 + .../include/llvm/Target/TargetSelectionDAG.td | 1335 +++ 227 files changed, 250132 insertions(+) create mode 100644 capstone/suite/synctools/.gitignore create mode 100644 capstone/suite/synctools/Makefile create mode 100644 capstone/suite/synctools/README create mode 100644 capstone/suite/synctools/X86DisassemblerDecoderCommon.h create mode 100644 capstone/suite/synctools/arm64_gen_vreg.c create mode 100755 capstone/suite/synctools/asmwriter.py create mode 100755 capstone/suite/synctools/compare_mapping_insn.py create mode 100755 capstone/suite/synctools/disassemblertables-arch.py create mode 100755 capstone/suite/synctools/disassemblertables.py create mode 100644 capstone/suite/synctools/disassemblertables2.c create mode 100755 capstone/suite/synctools/disassemblertables_reduce.py create mode 100755 capstone/suite/synctools/genall-arch.sh create mode 100755 capstone/suite/synctools/genall-full.sh create mode 100755 capstone/suite/synctools/genall-reduce.sh create mode 100755 capstone/suite/synctools/insn.py create mode 100755 capstone/suite/synctools/insn3.py create mode 100755 capstone/suite/synctools/insn_check.py create mode 100755 capstone/suite/synctools/instrinfo-arch.py create mode 100755 capstone/suite/synctools/instrinfo.py create mode 100755 capstone/suite/synctools/mapping_insn-arch.py create mode 100755 capstone/suite/synctools/mapping_insn.py create mode 100755 capstone/suite/synctools/mapping_insn_name-arch.py create mode 100755 capstone/suite/synctools/mapping_insn_name.py create mode 100755 capstone/suite/synctools/mapping_insn_op-arch.py create mode 100755 capstone/suite/synctools/mapping_insn_op.py create mode 100755 capstone/suite/synctools/mapping_reg.py create mode 100755 capstone/suite/synctools/register.py create mode 100755 capstone/suite/synctools/registerinfo.py create mode 100644 capstone/suite/synctools/strinforeduce/Makefile create mode 100644 capstone/suite/synctools/strinforeduce/README create mode 100755 capstone/suite/synctools/strinforeduce/instrinfo2.py create mode 100644 capstone/suite/synctools/strinforeduce/strinforeduce.cpp create mode 100755 capstone/suite/synctools/subtargetinfo.py create mode 100755 capstone/suite/synctools/systemoperand.py create mode 100755 capstone/suite/synctools/systemregister.py create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64SchedA53.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM1.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64Schedule.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td create mode 100644 capstone/suite/synctools/tablegen/AArch64/SVEInstrFormats.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARM-digit.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARM.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMCallingConv.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMInstrFormats.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMInstrInfo.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMInstrNEON.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMInstrThumb.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMInstrThumb2.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMInstrVFP.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMRegisterBanks.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMRegisterInfo-digit.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMRegisterInfo.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMSchedule.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMScheduleA57.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMScheduleA57WriteRes.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMScheduleA8.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMScheduleA9.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMScheduleM3.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMScheduleR52.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMScheduleSwift.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMScheduleV6.td create mode 100644 capstone/suite/synctools/tablegen/ARM/ARMSystemRegister.td create mode 100644 capstone/suite/synctools/tablegen/PPC/P9InstrResources.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPC.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCCallingConv.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCInstr64Bit.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCInstrAltivec.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCInstrFormats.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCInstrHTM.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCInstrInfo.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCInstrQPX.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCInstrSPE.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCInstrVSX.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCRegisterInfo.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCSchedule.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCSchedule440.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCScheduleA2.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCScheduleE500.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCScheduleE500mc.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCScheduleE5500.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCScheduleG3.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCScheduleG4.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCScheduleG4Plus.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCScheduleG5.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCScheduleP7.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCScheduleP8.td create mode 100644 capstone/suite/synctools/tablegen/PPC/PPCScheduleP9.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86CallingConv.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86Capstone.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86Instr3DNow.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrAVX512.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrArithmetic.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrCMovSetCC.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrCompiler.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrControl.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrExtension.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrFMA.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrFPStack.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrFormats.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrFragmentsSIMD.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrInfo.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrInfo_reduce.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrMMX.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrMPX.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrSGX.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrSSE.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrSVM.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrShiftRotate.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrSystem.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrTSX.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrVMX.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrVecCompiler.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86InstrXOP.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86PfmCounters.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86RegisterBanks.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86RegisterInfo.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86SchedBroadwell.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86SchedHaswell.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86SchedPredicates.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86SchedSandyBridge.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86SchedSkylakeClient.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86SchedSkylakeServer.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86Schedule.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86ScheduleAtom.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86ScheduleBtVer2.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86ScheduleSLM.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86ScheduleZnver1.td create mode 100644 capstone/suite/synctools/tablegen/X86/X86_reduce.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86CallingConv.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86Capstone.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86CapstoneFull.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86CapstoneReduce.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86Instr3DNow.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrAVX512.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrArithmetic.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrCMovSetCC.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrCompiler.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrControl.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrExtension.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrFMA.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrFPStack.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrFormats.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrFragmentsSIMD.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrInfo.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrInfo_reduce.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrMMX.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrMPX.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrSGX.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrSSE.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrSVM.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrShiftRotate.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrSystem.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrTSX.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrVMX.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrVecCompiler.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86InstrXOP.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86PfmCounters.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86RegisterBanks.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86RegisterInfo.td create mode 100755 capstone/suite/synctools/tablegen/X86/back/X86SchedBroadwell.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86SchedHaswell.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86SchedPredicates.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86SchedSandyBridge.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86SchedSkylakeClient.td create mode 100755 capstone/suite/synctools/tablegen/X86/back/X86SchedSkylakeServer.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86Schedule.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86ScheduleAtom.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86ScheduleBtVer2.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86ScheduleSLM.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86ScheduleZnver1.td create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86_reduce.td create mode 100755 capstone/suite/synctools/tablegen/gen-tablegen-arch.sh create mode 100755 capstone/suite/synctools/tablegen/gen-tablegen-full.sh create mode 100755 capstone/suite/synctools/tablegen/gen-tablegen-reduce.sh create mode 100755 capstone/suite/synctools/tablegen/gen-tablegen.sh create mode 100644 capstone/suite/synctools/tablegen/include/llvm/CodeGen/SDNodeProperties.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/CodeGen/ValueTypes.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/Attributes.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/DebugInfoFlags.def create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/Instruction.def create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/Intrinsics.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/IntrinsicsAArch64.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/IntrinsicsAMDGPU.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/IntrinsicsARM.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/IntrinsicsBPF.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/IntrinsicsHexagon.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/IntrinsicsMips.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/IntrinsicsNVVM.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/IntrinsicsPowerPC.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/IntrinsicsSystemZ.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/IntrinsicsWebAssembly.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/IntrinsicsX86.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/IntrinsicsXCore.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/Metadata.def create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/RuntimeLibcalls.def create mode 100644 capstone/suite/synctools/tablegen/include/llvm/IR/Value.def create mode 100644 capstone/suite/synctools/tablegen/include/llvm/TableGen/SearchableTable.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/Target/GenericOpcodes.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/Target/GlobalISel/RegisterBank.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/Target/GlobalISel/SelectionDAGCompat.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/Target/GlobalISel/Target.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/Target/Target.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/Target/TargetCallingConv.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/Target/TargetInstrPredicate.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/Target/TargetItinerary.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/Target/TargetSchedule.td create mode 100644 capstone/suite/synctools/tablegen/include/llvm/Target/TargetSelectionDAG.td (limited to 'capstone/suite/synctools') diff --git a/capstone/suite/synctools/.gitignore b/capstone/suite/synctools/.gitignore new file mode 100644 index 000000000..44f61368a --- /dev/null +++ b/capstone/suite/synctools/.gitignore @@ -0,0 +1,9 @@ +*.inc + +disassemblertables_reduce2 +disassemblertables2 +ppc_gen_reg +arm64_gen_vreg + +strinforeduce/strinforeduce +strinforeduce/strinforeduce_reduce diff --git a/capstone/suite/synctools/Makefile b/capstone/suite/synctools/Makefile new file mode 100644 index 000000000..fd4d605c1 --- /dev/null +++ b/capstone/suite/synctools/Makefile @@ -0,0 +1,14 @@ +all: + +x86: + # compile disassembler2 with X86GenDisassemblerTables2.inc + $(CC) disassemblertables2.c -o disassemblertables2 + + # compile disassembler2 with X86GenDisassemblerTables_reduce2.inc + $(CC) -DCAPSTONE_X86_REDUCE disassemblertables2.c -o disassemblertables_reduce2 + +arm64: + $(CC) arm64_gen_vreg.c -o arm64_gen_vreg + +clean: + $(RM) disassemblertables2 disassemblertables_reduce2 arm64_gen_vreg diff --git a/capstone/suite/synctools/README b/capstone/suite/synctools/README new file mode 100644 index 000000000..61a0ec45d --- /dev/null +++ b/capstone/suite/synctools/README @@ -0,0 +1,57 @@ +Sync tools to port LLVM inc files to Capstone. + +For X86 +======= +0. cd tablegen/, then follow its README. + +1. Run genall-{full|reduce}.sh, then copy generated .inc files to arch// directory + + $ ./genall-full.sh tablegen ~/projects/tmp/capstone777.git/arch/X86 + $ ./genall-reduce.sh tablegen ~/projects/tmp/capstone777.git/arch/X86 + +2. Run disassemblertables_reduce2 & disassemblertables_reduce2 to generate optimized (index table) X86GenDisassemblerTables2.inc & X86GenDisassemblerTables_reduce2.inc + + # use 2x name to avoid overwritting X86GenDisassemblerTables2.inc & X86GenDisassemblerTables_reduce2.inc + + $ make + $ ./disassemblertables2 > X86GenDisassemblerTables2x.inc + $ ./disassemblertables_reduce2 > X86GenDisassemblerTables_reduce2x.inc + +3. cd strinforeduce/, and follow its README. + +4. Copy all generated .inc files to arch/X86/ + + $ cp X86GenAsmWriter_reduce.inc ~/projects/capstone.git/arch/X86 + $ cp X86GenAsmWriter1_reduce.inc ~/projects/capstone.git/arch/X86 + $ cp X86MappingInsnName_reduce.inc ~/projects/capstone.git/arch/X86 + $ cp X86MappingInsn_reduce.inc ~/projects/capstone.git/arch/X86 + $ cp X86MappingInsnOp_reduce.inc ~/projects/capstone.git/arch/X86 + $ cp X86GenInstrInfo_reduce.inc ~/projects/capstone.git/arch/X86 + $ cp X86GenDisassemblerTables_reduce.inc ~/projects/capstone.git/arch/X86 + $ cp X86GenDisassemblerTables_reduce2x.inc ~/projects/capstone.git/arch/X86/X86GenDisassemblerTables_reduce2.inc + + $ cp X86GenAsmWriter.inc ~/projects/capstone.git/arch/X86 + $ cp X86GenAsmWriter1.inc ~/projects/capstone.git/arch/X86 + $ cp X86MappingInsnName.inc ~/projects/capstone.git/arch/X86 + $ cp X86MappingInsn.inc ~/projects/capstone.git/arch/X86 + $ cp X86MappingInsnOp.inc ~/projects/capstone.git/arch/X86 + $ cp X86GenInstrInfo.inc ~/projects/capstone.git/arch/X86 + $ cp X86GenDisassemblerTables.inc ~/projects/capstone.git/arch/X86 + $ cp X86GenDisassemblerTables2x.inc ~/projects/capstone.git/arch/X86/X86GenDisassemblerTables2.inc + +5. copy insn_list.txt to include/capstone/ + +For non-X86 +=========== + +0. cd tablegen/, then follow its README. + + 1. Run gen-tablegen-arch.sh + +2. Run genall-arch.sh + + ./genall-arch.sh tablegen ~/projects/capstone.git/arch/ARM ARM + ./genall-arch.sh tablegen ~/projects/capstone.git/arch/ARM AArch64 + ./genall-arch.sh tablegen ~/projects/capstone.git/arch/ARM PowerPC + +3. Copy generated *.inc files to arch// diff --git a/capstone/suite/synctools/X86DisassemblerDecoderCommon.h b/capstone/suite/synctools/X86DisassemblerDecoderCommon.h new file mode 100644 index 000000000..edf68aab8 --- /dev/null +++ b/capstone/suite/synctools/X86DisassemblerDecoderCommon.h @@ -0,0 +1,483 @@ +/*===-- X86DisassemblerDecoderCommon.h - Disassembler decoder -----*- C -*-===* + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===* + * + * This file is part of the X86 Disassembler. + * It contains common definitions used by both the disassembler and the table + * generator. + * Documentation for the disassembler can be found in X86Disassembler.h. + * + *===----------------------------------------------------------------------===*/ + +/* Capstone Disassembly Engine */ +/* By Nguyen Anh Quynh , 2013-2019 */ + +/* + * This header file provides those definitions that need to be shared between + * the decoder and the table generator in a C-friendly manner. + */ + +#ifndef CS_X86_DISASSEMBLERDECODERCOMMON_H +#define CS_X86_DISASSEMBLERDECODERCOMMON_H + +#define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers +#define CONTEXTS_SYM x86DisassemblerContexts +#define ONEBYTE_SYM x86DisassemblerOneByteOpcodes +#define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes +#define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes +#define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes +#define XOP8_MAP_SYM x86DisassemblerXOP8Opcodes +#define XOP9_MAP_SYM x86DisassemblerXOP9Opcodes +#define XOPA_MAP_SYM x86DisassemblerXOPAOpcodes +#define THREEDNOW_MAP_SYM x86Disassembler3DNowOpcodes + + +/* + * Attributes of an instruction that must be known before the opcode can be + * processed correctly. Most of these indicate the presence of particular + * prefixes, but ATTR_64BIT is simply an attribute of the decoding context. + */ +#define ATTRIBUTE_BITS \ + ENUM_ENTRY(ATTR_NONE, 0x00) \ + ENUM_ENTRY(ATTR_64BIT, (0x1 << 0)) \ + ENUM_ENTRY(ATTR_XS, (0x1 << 1)) \ + ENUM_ENTRY(ATTR_XD, (0x1 << 2)) \ + ENUM_ENTRY(ATTR_REXW, (0x1 << 3)) \ + ENUM_ENTRY(ATTR_OPSIZE, (0x1 << 4)) \ + ENUM_ENTRY(ATTR_ADSIZE, (0x1 << 5)) \ + ENUM_ENTRY(ATTR_VEX, (0x1 << 6)) \ + ENUM_ENTRY(ATTR_VEXL, (0x1 << 7)) \ + ENUM_ENTRY(ATTR_EVEX, (0x1 << 8)) \ + ENUM_ENTRY(ATTR_EVEXL, (0x1 << 9)) \ + ENUM_ENTRY(ATTR_EVEXL2, (0x1 << 10)) \ + ENUM_ENTRY(ATTR_EVEXK, (0x1 << 11)) \ + ENUM_ENTRY(ATTR_EVEXKZ, (0x1 << 12)) \ + ENUM_ENTRY(ATTR_EVEXB, (0x1 << 13)) + +#define ENUM_ENTRY(n, v) n = v, +enum attributeBits { + ATTRIBUTE_BITS + ATTR_max +}; +#undef ENUM_ENTRY + +/* + * Combinations of the above attributes that are relevant to instruction + * decode. Although other combinations are possible, they can be reduced to + * these without affecting the ultimately decoded instruction. + */ + +// Class name Rank Rationale for rank assignment +#define INSTRUCTION_CONTEXTS \ + ENUM_ENTRY(IC, 0, "says nothing about the instruction") \ + ENUM_ENTRY(IC_64BIT, 1, "says the instruction applies in " \ + "64-bit mode but no more") \ + ENUM_ENTRY(IC_OPSIZE, 3, "requires an OPSIZE prefix, so " \ + "operands change width") \ + ENUM_ENTRY(IC_ADSIZE, 3, "requires an ADSIZE prefix, so " \ + "operands change width") \ + ENUM_ENTRY(IC_OPSIZE_ADSIZE, 4, "requires ADSIZE and OPSIZE prefixes") \ + ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \ + "but not the operands") \ + ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \ + "but not the operands") \ + ENUM_ENTRY(IC_XD_OPSIZE, 3, "requires an OPSIZE prefix, so " \ + "operands change width") \ + ENUM_ENTRY(IC_XS_OPSIZE, 3, "requires an OPSIZE prefix, so " \ + "operands change width") \ + ENUM_ENTRY(IC_XD_ADSIZE, 3, "requires an ADSIZE prefix, so " \ + "operands change width") \ + ENUM_ENTRY(IC_XS_ADSIZE, 3, "requires an ADSIZE prefix, so " \ + "operands change width") \ + ENUM_ENTRY(IC_64BIT_REXW, 5, "requires a REX.W prefix, so operands "\ + "change width; overrides IC_OPSIZE") \ + ENUM_ENTRY(IC_64BIT_REXW_ADSIZE, 6, "requires a REX.W prefix and 0x67 " \ + "prefix") \ + ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \ + ENUM_ENTRY(IC_64BIT_ADSIZE, 3, "Just as meaningful as IC_ADSIZE") \ + ENUM_ENTRY(IC_64BIT_OPSIZE_ADSIZE, 4, "Just as meaningful as IC_OPSIZE/" \ + "IC_ADSIZE") \ + ENUM_ENTRY(IC_64BIT_XD, 6, "XD instructions are SSE; REX.W is " \ + "secondary") \ + ENUM_ENTRY(IC_64BIT_XS, 6, "Just as meaningful as IC_64BIT_XD") \ + ENUM_ENTRY(IC_64BIT_XD_OPSIZE, 3, "Just as meaningful as IC_XD_OPSIZE") \ + ENUM_ENTRY(IC_64BIT_XS_OPSIZE, 3, "Just as meaningful as IC_XS_OPSIZE") \ + ENUM_ENTRY(IC_64BIT_XD_ADSIZE, 3, "Just as meaningful as IC_XD_ADSIZE") \ + ENUM_ENTRY(IC_64BIT_XS_ADSIZE, 3, "Just as meaningful as IC_XS_ADSIZE") \ + ENUM_ENTRY(IC_64BIT_REXW_XS, 7, "OPSIZE could mean a different " \ + "opcode") \ + ENUM_ENTRY(IC_64BIT_REXW_XD, 7, "Just as meaningful as " \ + "IC_64BIT_REXW_XS") \ + ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 8, "The Dynamic Duo! Prefer over all " \ + "else because this changes most " \ + "operands' meaning") \ + ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \ + ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \ + ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \ + ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \ + ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \ + ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \ + ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \ + ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \ + ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \ + ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\ + ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\ + ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") \ + ENUM_ENTRY(IC_VEX_L_W, 4, "requires VEX, L and W") \ + ENUM_ENTRY(IC_VEX_L_W_XS, 5, "requires VEX, L, W and XS prefix") \ + ENUM_ENTRY(IC_VEX_L_W_XD, 5, "requires VEX, L, W and XD prefix") \ + ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize") \ + ENUM_ENTRY(IC_EVEX, 1, "requires an EVEX prefix") \ + ENUM_ENTRY(IC_EVEX_XS, 2, "requires EVEX and the XS prefix") \ + ENUM_ENTRY(IC_EVEX_XD, 2, "requires EVEX and the XD prefix") \ + ENUM_ENTRY(IC_EVEX_OPSIZE, 2, "requires EVEX and the OpSize prefix") \ + ENUM_ENTRY(IC_EVEX_W, 3, "requires EVEX and the W prefix") \ + ENUM_ENTRY(IC_EVEX_W_XS, 4, "requires EVEX, W, and XS prefix") \ + ENUM_ENTRY(IC_EVEX_W_XD, 4, "requires EVEX, W, and XD prefix") \ + ENUM_ENTRY(IC_EVEX_W_OPSIZE, 4, "requires EVEX, W, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L, 3, "requires EVEX and the L prefix") \ + ENUM_ENTRY(IC_EVEX_L_XS, 4, "requires EVEX and the L and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L_XD, 4, "requires EVEX and the L and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L_OPSIZE, 4, "requires EVEX, L, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L_W, 3, "requires EVEX, L and W") \ + ENUM_ENTRY(IC_EVEX_L_W_XS, 4, "requires EVEX, L, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_XD, 4, "requires EVEX, L, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_OPSIZE, 4, "requires EVEX, L, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2, 3, "requires EVEX and the L2 prefix") \ + ENUM_ENTRY(IC_EVEX_L2_XS, 4, "requires EVEX and the L2 and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L2_XD, 4, "requires EVEX and the L2 and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L2_OPSIZE, 4, "requires EVEX, L2, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2_W, 3, "requires EVEX, L2 and W") \ + ENUM_ENTRY(IC_EVEX_L2_W_XS, 4, "requires EVEX, L2, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_XD, 4, "requires EVEX, L2, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE, 4, "requires EVEX, L2, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_K, 1, "requires an EVEX_K prefix") \ + ENUM_ENTRY(IC_EVEX_XS_K, 2, "requires EVEX_K and the XS prefix") \ + ENUM_ENTRY(IC_EVEX_XD_K, 2, "requires EVEX_K and the XD prefix") \ + ENUM_ENTRY(IC_EVEX_OPSIZE_K, 2, "requires EVEX_K and the OpSize prefix") \ + ENUM_ENTRY(IC_EVEX_W_K, 3, "requires EVEX_K and the W prefix") \ + ENUM_ENTRY(IC_EVEX_W_XS_K, 4, "requires EVEX_K, W, and XS prefix") \ + ENUM_ENTRY(IC_EVEX_W_XD_K, 4, "requires EVEX_K, W, and XD prefix") \ + ENUM_ENTRY(IC_EVEX_W_OPSIZE_K, 4, "requires EVEX_K, W, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L_K, 3, "requires EVEX_K and the L prefix") \ + ENUM_ENTRY(IC_EVEX_L_XS_K, 4, "requires EVEX_K and the L and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L_XD_K, 4, "requires EVEX_K and the L and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L_OPSIZE_K, 4, "requires EVEX_K, L, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L_W_K, 3, "requires EVEX_K, L and W") \ + ENUM_ENTRY(IC_EVEX_L_W_XS_K, 4, "requires EVEX_K, L, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_XD_K, 4, "requires EVEX_K, L, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_K, 4, "requires EVEX_K, L, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2_K, 3, "requires EVEX_K and the L2 prefix") \ + ENUM_ENTRY(IC_EVEX_L2_XS_K, 4, "requires EVEX_K and the L2 and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L2_XD_K, 4, "requires EVEX_K and the L2 and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L2_OPSIZE_K, 4, "requires EVEX_K, L2, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2_W_K, 3, "requires EVEX_K, L2 and W") \ + ENUM_ENTRY(IC_EVEX_L2_W_XS_K, 4, "requires EVEX_K, L2, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_XD_K, 4, "requires EVEX_K, L2, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K, 4, "requires EVEX_K, L2, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_B, 1, "requires an EVEX_B prefix") \ + ENUM_ENTRY(IC_EVEX_XS_B, 2, "requires EVEX_B and the XS prefix") \ + ENUM_ENTRY(IC_EVEX_XD_B, 2, "requires EVEX_B and the XD prefix") \ + ENUM_ENTRY(IC_EVEX_OPSIZE_B, 2, "requires EVEX_B and the OpSize prefix") \ + ENUM_ENTRY(IC_EVEX_W_B, 3, "requires EVEX_B and the W prefix") \ + ENUM_ENTRY(IC_EVEX_W_XS_B, 4, "requires EVEX_B, W, and XS prefix") \ + ENUM_ENTRY(IC_EVEX_W_XD_B, 4, "requires EVEX_B, W, and XD prefix") \ + ENUM_ENTRY(IC_EVEX_W_OPSIZE_B, 4, "requires EVEX_B, W, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L_B, 3, "requires EVEX_B and the L prefix") \ + ENUM_ENTRY(IC_EVEX_L_XS_B, 4, "requires EVEX_B and the L and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L_XD_B, 4, "requires EVEX_B and the L and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L_OPSIZE_B, 4, "requires EVEX_B, L, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L_W_B, 3, "requires EVEX_B, L and W") \ + ENUM_ENTRY(IC_EVEX_L_W_XS_B, 4, "requires EVEX_B, L, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_XD_B, 4, "requires EVEX_B, L, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_B, 4, "requires EVEX_B, L, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2_B, 3, "requires EVEX_B and the L2 prefix") \ + ENUM_ENTRY(IC_EVEX_L2_XS_B, 4, "requires EVEX_B and the L2 and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L2_XD_B, 4, "requires EVEX_B and the L2 and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L2_OPSIZE_B, 4, "requires EVEX_B, L2, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2_W_B, 3, "requires EVEX_B, L2 and W") \ + ENUM_ENTRY(IC_EVEX_L2_W_XS_B, 4, "requires EVEX_B, L2, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_XD_B, 4, "requires EVEX_B, L2, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_B, 4, "requires EVEX_B, L2, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_K_B, 1, "requires EVEX_B and EVEX_K prefix") \ + ENUM_ENTRY(IC_EVEX_XS_K_B, 2, "requires EVEX_B, EVEX_K and the XS prefix") \ + ENUM_ENTRY(IC_EVEX_XD_K_B, 2, "requires EVEX_B, EVEX_K and the XD prefix") \ + ENUM_ENTRY(IC_EVEX_OPSIZE_K_B, 2, "requires EVEX_B, EVEX_K and the OpSize prefix") \ + ENUM_ENTRY(IC_EVEX_W_K_B, 3, "requires EVEX_B, EVEX_K and the W prefix") \ + ENUM_ENTRY(IC_EVEX_W_XS_K_B, 4, "requires EVEX_B, EVEX_K, W, and XS prefix") \ + ENUM_ENTRY(IC_EVEX_W_XD_K_B, 4, "requires EVEX_B, EVEX_K, W, and XD prefix") \ + ENUM_ENTRY(IC_EVEX_W_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, W, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L_K_B, 3, "requires EVEX_B, EVEX_K and the L prefix") \ + ENUM_ENTRY(IC_EVEX_L_XS_K_B, 4, "requires EVEX_B, EVEX_K and the L and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L_XD_K_B, 4, "requires EVEX_B, EVEX_K and the L and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L_W_K_B, 3, "requires EVEX_B, EVEX_K, L and W") \ + ENUM_ENTRY(IC_EVEX_L_W_XS_K_B, 4, "requires EVEX_B, EVEX_K, L, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_XD_K_B, 4, "requires EVEX_B, EVEX_K, L, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_K_B,4, "requires EVEX_B, EVEX_K, L, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2_K_B, 3, "requires EVEX_B, EVEX_K and the L2 prefix") \ + ENUM_ENTRY(IC_EVEX_L2_XS_K_B, 4, "requires EVEX_B, EVEX_K and the L2 and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L2_XD_K_B, 4, "requires EVEX_B, EVEX_K and the L2 and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L2_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L2, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2_W_K_B, 3, "requires EVEX_B, EVEX_K, L2 and W") \ + ENUM_ENTRY(IC_EVEX_L2_W_XS_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_XD_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K_B,4, "requires EVEX_B, EVEX_K, L2, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_KZ_B, 1, "requires EVEX_B and EVEX_KZ prefix") \ + ENUM_ENTRY(IC_EVEX_XS_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the XS prefix") \ + ENUM_ENTRY(IC_EVEX_XD_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the XD prefix") \ + ENUM_ENTRY(IC_EVEX_OPSIZE_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the OpSize prefix") \ + ENUM_ENTRY(IC_EVEX_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the W prefix") \ + ENUM_ENTRY(IC_EVEX_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and XS prefix") \ + ENUM_ENTRY(IC_EVEX_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and XD prefix") \ + ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the L prefix") \ + ENUM_ENTRY(IC_EVEX_L_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ, L and W") \ + ENUM_ENTRY(IC_EVEX_L_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the L2 prefix") \ + ENUM_ENTRY(IC_EVEX_L2_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L2 and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L2_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L2 and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L2_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ, L2 and W") \ + ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_KZ, 1, "requires an EVEX_KZ prefix") \ + ENUM_ENTRY(IC_EVEX_XS_KZ, 2, "requires EVEX_KZ and the XS prefix") \ + ENUM_ENTRY(IC_EVEX_XD_KZ, 2, "requires EVEX_KZ and the XD prefix") \ + ENUM_ENTRY(IC_EVEX_OPSIZE_KZ, 2, "requires EVEX_KZ and the OpSize prefix") \ + ENUM_ENTRY(IC_EVEX_W_KZ, 3, "requires EVEX_KZ and the W prefix") \ + ENUM_ENTRY(IC_EVEX_W_XS_KZ, 4, "requires EVEX_KZ, W, and XS prefix") \ + ENUM_ENTRY(IC_EVEX_W_XD_KZ, 4, "requires EVEX_KZ, W, and XD prefix") \ + ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ, 4, "requires EVEX_KZ, W, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L_KZ, 3, "requires EVEX_KZ and the L prefix") \ + ENUM_ENTRY(IC_EVEX_L_XS_KZ, 4, "requires EVEX_KZ and the L and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L_XD_KZ, 4, "requires EVEX_KZ and the L and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L_OPSIZE_KZ, 4, "requires EVEX_KZ, L, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L_W_KZ, 3, "requires EVEX_KZ, L and W") \ + ENUM_ENTRY(IC_EVEX_L_W_XS_KZ, 4, "requires EVEX_KZ, L, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_XD_KZ, 4, "requires EVEX_KZ, L, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2_KZ, 3, "requires EVEX_KZ and the L2 prefix") \ + ENUM_ENTRY(IC_EVEX_L2_XS_KZ, 4, "requires EVEX_KZ and the L2 and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L2_XD_KZ, 4, "requires EVEX_KZ and the L2 and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L2_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2_W_KZ, 3, "requires EVEX_KZ, L2 and W") \ + ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ, 4, "requires EVEX_KZ, L2, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ, 4, "requires EVEX_KZ, L2, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize") + +#define ENUM_ENTRY(n, r, d) n, +typedef enum { + INSTRUCTION_CONTEXTS + IC_max +} InstructionContext; +#undef ENUM_ENTRY + +/* + * Opcode types, which determine which decode table to use, both in the Intel + * manual and also for the decoder. + */ +typedef enum { + ONEBYTE = 0, + TWOBYTE = 1, + THREEBYTE_38 = 2, + THREEBYTE_3A = 3, + XOP8_MAP = 4, + XOP9_MAP = 5, + XOPA_MAP = 6, + THREEDNOW_MAP = 7 +} OpcodeType; + +/* + * The following structs are used for the hierarchical decode table. After + * determining the instruction's class (i.e., which IC_* constant applies to + * it), the decoder reads the opcode. Some instructions require specific + * values of the ModR/M byte, so the ModR/M byte indexes into the final table. + * + * If a ModR/M byte is not required, "required" is left unset, and the values + * for each instructionID are identical. + */ + +typedef uint16_t InstrUID; + +/* + * ModRMDecisionType - describes the type of ModR/M decision, allowing the + * consumer to determine the number of entries in it. + * + * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded + * instruction is the same. + * MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode + * corresponds to one instruction; otherwise, it corresponds to + * a different instruction. + * MODRM_SPLITMISC- If the ModR/M byte is between 0x00 and 0xbf, ModR/M byte + * divided by 8 is used to select instruction; otherwise, each + * value of the ModR/M byte could correspond to a different + * instruction. + * MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This + corresponds to instructions that use reg field as opcode + * MODRM_FULL - Potentially, each value of the ModR/M byte could correspond + * to a different instruction. + */ + +#define MODRMTYPES \ + ENUM_ENTRY(MODRM_ONEENTRY) \ +ENUM_ENTRY(MODRM_SPLITRM) \ +ENUM_ENTRY(MODRM_SPLITMISC) \ +ENUM_ENTRY(MODRM_SPLITREG) \ +ENUM_ENTRY(MODRM_FULL) + +#define ENUM_ENTRY(n) n, +typedef enum { + MODRMTYPES + MODRM_max +} ModRMDecisionType; +#undef ENUM_ENTRY + +#define CASE_ENCODING_RM \ + case ENCODING_RM: \ + case ENCODING_RM_CD2: \ + case ENCODING_RM_CD4: \ + case ENCODING_RM_CD8: \ + case ENCODING_RM_CD16: \ + case ENCODING_RM_CD32: \ + case ENCODING_RM_CD64 + +#define CASE_ENCODING_VSIB \ + case ENCODING_VSIB: \ + case ENCODING_VSIB_CD2: \ + case ENCODING_VSIB_CD4: \ + case ENCODING_VSIB_CD8: \ + case ENCODING_VSIB_CD16: \ + case ENCODING_VSIB_CD32: \ + case ENCODING_VSIB_CD64 + +// Physical encodings of instruction operands. + +#define ENCODINGS \ +ENUM_ENTRY(ENCODING_NONE, "") \ +ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \ +ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \ +ENUM_ENTRY(ENCODING_RM_CD2, "R/M operand with CDisp scaling of 2") \ +ENUM_ENTRY(ENCODING_RM_CD4, "R/M operand with CDisp scaling of 4") \ +ENUM_ENTRY(ENCODING_RM_CD8, "R/M operand with CDisp scaling of 8") \ +ENUM_ENTRY(ENCODING_RM_CD16,"R/M operand with CDisp scaling of 16") \ +ENUM_ENTRY(ENCODING_RM_CD32,"R/M operand with CDisp scaling of 32") \ +ENUM_ENTRY(ENCODING_RM_CD64,"R/M operand with CDisp scaling of 64") \ +ENUM_ENTRY(ENCODING_VSIB, "VSIB operand in ModR/M byte.") \ +ENUM_ENTRY(ENCODING_VSIB_CD2, "VSIB operand with CDisp scaling of 2") \ +ENUM_ENTRY(ENCODING_VSIB_CD4, "VSIB operand with CDisp scaling of 4") \ +ENUM_ENTRY(ENCODING_VSIB_CD8, "VSIB operand with CDisp scaling of 8") \ +ENUM_ENTRY(ENCODING_VSIB_CD16,"VSIB operand with CDisp scaling of 16") \ +ENUM_ENTRY(ENCODING_VSIB_CD32,"VSIB operand with CDisp scaling of 32") \ +ENUM_ENTRY(ENCODING_VSIB_CD64,"VSIB operand with CDisp scaling of 64") \ +ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \ +ENUM_ENTRY(ENCODING_WRITEMASK, "Register operand in EVEX.aaa byte.") \ +ENUM_ENTRY(ENCODING_IB, "1-byte immediate") \ +ENUM_ENTRY(ENCODING_IW, "2-byte") \ +ENUM_ENTRY(ENCODING_ID, "4-byte") \ +ENUM_ENTRY(ENCODING_IO, "8-byte") \ +ENUM_ENTRY(ENCODING_RB, "(AL..DIL, R8L..R15L) Register code added to " \ + "the opcode byte") \ +ENUM_ENTRY(ENCODING_RW, "(AX..DI, R8W..R15W)") \ +ENUM_ENTRY(ENCODING_RD, "(EAX..EDI, R8D..R15D)") \ +ENUM_ENTRY(ENCODING_RO, "(RAX..RDI, R8..R15)") \ +ENUM_ENTRY(ENCODING_FP, "Position on floating-point stack in ModR/M " \ + "byte.") \ +ENUM_ENTRY(ENCODING_Iv, "Immediate of operand size") \ +ENUM_ENTRY(ENCODING_Ia, "Immediate of address size") \ +ENUM_ENTRY(ENCODING_IRC, "Immediate for static rounding control") \ +ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \ + "opcode byte") \ +ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \ + "in type") \ +ENUM_ENTRY(ENCODING_SI, "Source index; encoded in OpSize/Adsize prefix") \ +ENUM_ENTRY(ENCODING_DI, "Destination index; encoded in prefixes") + +#define ENUM_ENTRY(n, d) n, +typedef enum { + ENCODINGS + ENCODING_max +} OperandEncoding; +#undef ENUM_ENTRY + +/* + * Semantic interpretations of instruction operands. + */ +#define TYPES \ + ENUM_ENTRY(TYPE_NONE, "") \ + ENUM_ENTRY(TYPE_REL, "immediate address") \ + ENUM_ENTRY(TYPE_R8, "1-byte register operand") \ + ENUM_ENTRY(TYPE_R16, "2-byte") \ + ENUM_ENTRY(TYPE_R32, "4-byte") \ + ENUM_ENTRY(TYPE_R64, "8-byte") \ + ENUM_ENTRY(TYPE_IMM, "immediate operand") \ + ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \ + ENUM_ENTRY(TYPE_IMM5, "1-byte immediate operand between 0 and 31") \ + ENUM_ENTRY(TYPE_AVX512ICC, "1-byte immediate operand for AVX512 icmp") \ + ENUM_ENTRY(TYPE_UIMM8, "1-byte unsigned immediate operand") \ + ENUM_ENTRY(TYPE_M, "Memory operand") \ + ENUM_ENTRY(TYPE_MVSIBX, "Memory operand using XMM index") \ + ENUM_ENTRY(TYPE_MVSIBY, "Memory operand using YMM index") \ + ENUM_ENTRY(TYPE_MVSIBZ, "Memory operand using ZMM index") \ + ENUM_ENTRY(TYPE_SRCIDX, "memory at source index") \ + ENUM_ENTRY(TYPE_DSTIDX, "memory at destination index") \ + ENUM_ENTRY(TYPE_MOFFS, "memory offset (relative to segment base)") \ + ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \ + ENUM_ENTRY(TYPE_MM64, "8-byte MMX register") \ + ENUM_ENTRY(TYPE_XMM, "16-byte") \ + ENUM_ENTRY(TYPE_YMM, "32-byte") \ + ENUM_ENTRY(TYPE_ZMM, "64-byte") \ + ENUM_ENTRY(TYPE_VK, "mask register") \ + ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \ + ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \ + ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \ + ENUM_ENTRY(TYPE_BNDR, "MPX bounds register") \ + \ + ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \ + ENUM_ENTRY(TYPE_RELv, "Immediate address of operand size") \ + ENUM_ENTRY(TYPE_DUP0, "Duplicate of operand 0") \ + ENUM_ENTRY(TYPE_DUP1, "operand 1") \ + ENUM_ENTRY(TYPE_DUP2, "operand 2") \ + ENUM_ENTRY(TYPE_DUP3, "operand 3") \ + ENUM_ENTRY(TYPE_DUP4, "operand 4") \ + +#define ENUM_ENTRY(n, d) n, +typedef enum { + TYPES + TYPE_max +} OperandType; +#undef ENUM_ENTRY + +/* + * The specification for how to extract and interpret one operand. + */ +typedef struct OperandSpecifier { + uint8_t encoding; + uint8_t type; +} OperandSpecifier; + +#define X86_MAX_OPERANDS 6 + +/* + * Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode + * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode, + * respectively. + */ +typedef enum { + MODE_16BIT, + MODE_32BIT, + MODE_64BIT +} DisassemblerMode; + +#endif diff --git a/capstone/suite/synctools/arm64_gen_vreg.c b/capstone/suite/synctools/arm64_gen_vreg.c new file mode 100644 index 000000000..9b7a93835 --- /dev/null +++ b/capstone/suite/synctools/arm64_gen_vreg.c @@ -0,0 +1,38 @@ +// $ make arm64_gen_vreg +// $ ./arm64_gen_vreg > AArch64GenRegisterV.inc + +#include +#include +#include +#include + +#undef CAPSTONE_DIET +#define GET_REGINFO_ENUM + +#include "AArch64GenRegisterInfo.inc" +#include "AArch64GenRegisterName.inc" + +int main() +{ + unsigned int i; + size_t size = (size_t)getRegisterName(i, 100); + + printf("// size = %zu\n", size); + + for(i = 1; i < size; i++) { + unsigned int j; + const char *name = getRegisterName(i, AArch64_vreg); + //printf("%u: ARM64_REG_%s, ", i, getRegisterName(i, AArch64_vreg)); + if (strlen(name) == 0) { + printf("0,\n"); + } else { + printf("ARM64_REG_"); + for(j = 0; j < strlen(name); j++) { + printf("%c", toupper(name[j])); + } + printf(",\n"); + } + } + + return 0; +} diff --git a/capstone/suite/synctools/asmwriter.py b/capstone/suite/synctools/asmwriter.py new file mode 100755 index 000000000..b511759c2 --- /dev/null +++ b/capstone/suite/synctools/asmwriter.py @@ -0,0 +1,748 @@ +#!/usr/bin/python +# convert LLVM GenAsmWriter.inc for Capstone disassembler. +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + +arch = sys.argv[4] +f = open(sys.argv[1]) +lines = f.readlines() +f.close() + +f1 = open(sys.argv[2], 'w+') + +f2 = open(sys.argv[3], 'w+') + +f1.write("/* Capstone Disassembly Engine, http://www.capstone-engine.org */\n") +f1.write("/* By Nguyen Anh Quynh , 2013-2019 */\n") +f1.write("\n") + +f2.write("/* Capstone Disassembly Engine, http://www.capstone-engine.org */\n") +f2.write("/* By Nguyen Anh Quynh , 2013-2019 */\n") +f2.write("\n") + +need_endif = False +in_getRegisterName = False +in_printAliasInstr = False +fragment_no = None +skip_printing = False + +skip_line = 0 +skip_count = 0 + +def replace_getOp(line): + line2 = line + if 'MI->getOperand(0)' in line: + line2 = line.replace('MI->getOperand(0)', 'MCInst_getOperand(MI, 0)') + elif 'MI->getOperand(1)' in line: + line2 = line.replace('MI->getOperand(1)', 'MCInst_getOperand(MI, 1)') + elif 'MI->getOperand(2)' in line: + line2 = line.replace('MI->getOperand(2)', 'MCInst_getOperand(MI, 2)') + elif 'MI->getOperand(3)' in line: + line2 = line.replace('MI->getOperand(3)', 'MCInst_getOperand(MI, 3)') + elif 'MI->getOperand(4)' in line: + line2 = line.replace('MI->getOperand(4)', 'MCInst_getOperand(MI, 4)') + elif 'MI->getOperand(5)' in line: + line2 = line.replace('MI->getOperand(5)', 'MCInst_getOperand(MI, 5)') + elif 'MI->getOperand(6)' in line: + line2 = line.replace('MI->getOperand(6)', 'MCInst_getOperand(MI, 6)') + elif 'MI->getOperand(7)' in line: + line2 = line.replace('MI->getOperand(7)', 'MCInst_getOperand(MI, 7)') + elif 'MI->getOperand(8)' in line: + line2 = line.replace('MI->getOperand(8)', 'MCInst_getOperand(MI, 8)') + return line2 + +def replace_getReg(line): + line2 = line + if 'MI->getOperand(0).getReg()' in line: + line2 = line.replace('MI->getOperand(0).getReg()', 'MCOperand_getReg(MCInst_getOperand(MI, 0))') + elif 'MI->getOperand(1).getReg()' in line: + line2 = line.replace('MI->getOperand(1).getReg()', 'MCOperand_getReg(MCInst_getOperand(MI, 1))') + elif 'MI->getOperand(2).getReg()' in line: + line2 = line.replace('MI->getOperand(2).getReg()', 'MCOperand_getReg(MCInst_getOperand(MI, 2))') + elif 'MI->getOperand(3).getReg()' in line: + line2 = line.replace('MI->getOperand(3).getReg()', 'MCOperand_getReg(MCInst_getOperand(MI, 3))') + elif 'MI->getOperand(4).getReg()' in line: + line2 = line.replace('MI->getOperand(4).getReg()', 'MCOperand_getReg(MCInst_getOperand(MI, 4))') + elif 'MI->getOperand(5).getReg()' in line: + line2 = line.replace('MI->getOperand(5).getReg()', 'MCOperand_getReg(MCInst_getOperand(MI, 5))') + elif 'MI->getOperand(6).getReg()' in line: + line2 = line.replace('MI->getOperand(6).getReg()', 'MCOperand_getReg(MCInst_getOperand(MI, 6))') + elif 'MI->getOperand(7).getReg()' in line: + line2 = line.replace('MI->getOperand(7).getReg()', 'MCOperand_getReg(MCInst_getOperand(MI, 7))') + elif 'MI->getOperand(8).getReg()' in line: + line2 = line.replace('MI->getOperand(8).getReg()', 'MCOperand_getReg(MCInst_getOperand(MI, 8))') + return line2 + +# extract param between text() +# MRI.getRegClass(AArch64::GPR32spRegClassID).contains(MI->getOperand(1).getReg())) +def extract_paren(line, text): + i = line.index(text) + return line[line.index('(', i)+1 : line.index(')', i)] + + +# extract text between <> +# printSVERegOp<'q'> +def extract_brackets(line): + if '<' in line: + return line[line.index('<')+1 : line.index('>')] + else: + return '' + +# delete text between <>, including <> +# printSVERegOp<'q'> +def del_brackets(line): + if '<' in line: + return line[:line.index('<')] + line[line.index('>') + 1:] + else: + return line + + +def print_line(line): + line = line.replace('::', '_') + line = line.replace('nullptr', 'NULL') + if not skip_printing: + if in_getRegisterName: + f2.write(line + "\n") + else: + f1.write(line + "\n") + + +for line in lines: + line = line.rstrip() + #print("@", line) + + # skip Alias + if arch.upper() == 'X86': + if 'PRINT_ALIAS_INSTR' in line: + # done + break + + if skip_line: + skip_count += 1 + if skip_count <= skip_line: + # skip this line + continue + else: + # skip enough number of lines, reset counters + skip_line = 0 + skip_count = 0 + + if "::printInstruction" in line: + if arch.upper() in ('AARCH64', 'ARM64'): + #print_line("static void printInstruction(MCInst *MI, SStream *O, MCRegisterInfo *MRI)\n{") + print_line("static void printInstruction(MCInst *MI, SStream *O)\n{") + else: + print_line("static void printInstruction(MCInst *MI, SStream *O)\n{") + elif 'const char *AArch64InstPrinter::' in line: + continue + elif 'getRegisterName(' in line: + if 'unsigned AltIdx' in line: + print_line("static const char *getRegisterName(unsigned RegNo, unsigned AltIdx)\n{") + else: + print_line("static const char *getRegisterName(unsigned RegNo)\n{") + elif 'getRegisterName' in line: + in_getRegisterName = True + print_line(line) + elif '::printAliasInstr' in line: + if arch.upper() in ('AARCH64', 'PPC'): + print_line("static char *printAliasInstr(MCInst *MI, SStream *OS, MCRegisterInfo *MRI)\n{") + print_line(' #define GETREGCLASS_CONTAIN(_class, _reg) MCRegisterClass_contains(MCRegisterInfo_getRegClass(MRI, _class), MCOperand_getReg(MCInst_getOperand(MI, _reg)))') + else: + print_line("static bool printAliasInstr(MCInst *MI, SStream *OS)\n{") + print_line(" unsigned int I = 0, OpIdx, PrintMethodIdx;") + print_line(" char *tmpString;") + in_printAliasInstr = True + elif 'STI.getFeatureBits()[' in line: + if arch.upper() == 'ARM': + line2 = line.replace('STI.getFeatureBits()[', 'ARM_getFeatureBits(MI->csh->mode, ') + elif arch.upper() == 'AARCH64': + line2 = line.replace('STI.getFeatureBits()[', 'AArch64_getFeatureBits(') + line2 = line2.replace(']', ')') + print_line(line2) + elif ', STI, ' in line: + line2 = line.replace(', STI, ', ', ') + + if 'printSVELogicalImm<' in line: + if 'int16' in line: + line2 = line2.replace('printSVELogicalImm', 'printSVELogicalImm16') + line2 = line2.replace('', '') + elif 'int32' in line: + line2 = line2.replace('printSVELogicalImm', 'printSVELogicalImm32') + line2 = line2.replace('', '') + else: + line2 = line2.replace('printSVELogicalImm', 'printSVELogicalImm64') + line2 = line2.replace('', '') + + if 'MI->getOperand(' in line: + line2 = replace_getOp(line2) + + # C++ template + if 'printPrefetchOp' in line2: + param = extract_brackets(line2) + if param == '': + param = 'false' + line2 = del_brackets(line2) + line2 = line2.replace(', O);', ', O, %s);' %param) + line2 = line2.replace(', OS);', ', OS, %s);' %param) + elif '' in line2: + line2 = line2.replace('', '') + line2 = line2.replace(', O);', ', O, false);') + line2 = line2.replace('STI, ', '') + elif '' in line: + line2 = line2.replace('', '') + line2 = line2.replace(', O);', ', O, true);') + line2 = line2.replace('STI, ', '') + elif 'printAdrLabelOperand' in line: + # C++ template + if '<0>' in line: + line2 = line2.replace('<0>', '') + line2 = line2.replace(', O);', ', O, 0);') + elif '<1>' in line: + line2 = line2.replace('<1>', '') + line2 = line2.replace(', O);', ', O, 1);') + elif '<2>' in line: + line2 = line2.replace('<2>', '') + line2 = line2.replace(', O);', ', O, 2);') + elif 'printImm8OptLsl' in line2: + param = extract_brackets(line2) + line2 = del_brackets(line2) + if '8' in param or '16' in param or '32' in param: + line2 = line2.replace('printImm8OptLsl', 'printImm8OptLsl32') + elif '64' in param: + line2 = line2.replace('printImm8OptLsl', 'printImm8OptLsl64') + elif 'printLogicalImm' in line2: + param = extract_brackets(line2) + line2 = del_brackets(line2) + if '8' in param or '16' in param or '32' in param: + line2 = line2.replace('printLogicalImm', 'printLogicalImm32') + elif '64' in param: + line2 = line2.replace('printLogicalImm', 'printLogicalImm64') + elif 'printSVERegOp' in line2 or 'printGPRSeqPairsClassOperand' in line2 or 'printTypedVectorList' in line2 or 'printPostIncOperand' in line2 or 'printImmScale' in line2 or 'printRegWithShiftExtend' in line2 or 'printUImm12Offset' in line2 or 'printExactFPImm' in line2 or 'printMemExtend' in line2 or 'printZPRasFPR' in line2: + param = extract_brackets(line2) + if param == '': + param = '0' + line2 = del_brackets(line2) + line2 = line2.replace(', O);', ', O, %s);' %param) + line2 = line2.replace(', OS);', ', OS, %s);' %param) + elif 'printComplexRotationOp' in line: + # printComplexRotationOp<90, 0>(MI, 5, STI, O); + bracket_content = line2[line2.index('<') + 1 : line2.index('>')] + line2 = line2.replace('<' + bracket_content + '>', '') + line2 = line2.replace(' O);', ' O, %s);' %bracket_content) + + print_line(line2) + elif "static const char AsmStrs[]" in line: + print_line("#ifndef CAPSTONE_DIET") + print_line(" static const char AsmStrs[] = {") + need_endif = True + elif "static const char AsmStrsNoRegAltName[]" in line: + print_line("#ifndef CAPSTONE_DIET") + print_line(" static const char AsmStrsNoRegAltName[] = {") + need_endif = True + elif line == ' O << "\\t";': + print_line(" unsigned int opcode = MCInst_getOpcode(MI);") + print_line(' // printf("opcode = %u\\n", opcode);'); + elif 'MI->getOpcode()' in line: + if 'switch' in line: + line2 = line.replace('MI->getOpcode()', 'MCInst_getOpcode(MI)') + else: + line2 = line.replace('MI->getOpcode()', 'opcode') + print_line(line2) + + elif 'O << ' in line: + if '"' in line: + line2 = line.lower() + line2 = line2.replace('o << ', 'SStream_concat0(O, '); + else: + line2 = line.replace('O << ', 'SStream_concat0(O, '); + line2 = line2.replace("'", '"') + line2 = line2.replace(';', ');') + if '" : "' in line2: # "segment : offset" in X86 + line2 = line2.replace('" : "', '":"') + + # ARM + print_line(line2) + + if '", #0"' in line2: + print_line(' op_addImm(MI, 0);') + + if '", #1"' in line2: + print_line(' op_addImm(MI, 1);') + + # PowerPC + if '", 268"' in line2: + print_line(' op_addImm(MI, 268);') + + elif '", 256"' in line2: + print_line(' op_addImm(MI, 256);') + + elif '", 0, "' in line2 or '", 0"' in line2: + print_line(' op_addImm(MI, 0);') + + elif '", -1"' in line2: + print_line(' op_addImm(MI, -1);') + + if '[' in line2: + if not '[]' in line2: + print_line(' set_mem_access(MI, true);') + + if ']' in line2: + if not '[]' in line2: + print_line(' set_mem_access(MI, false);') + + if '".f64\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F64);') + elif '".f32\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F32);') + elif '".f16\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F16);') + elif '".s64\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_S64);') + elif '".s32\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_S32);') + elif '".s16\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_S16);') + elif '".s8\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_S8);') + elif '".u64\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_U64);') + elif '".u32\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_U32);') + elif '".u16\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_U16);') + elif '".u8\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_U8);') + elif '".i64\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_I64);') + elif '".i32\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_I32);') + elif '".i16\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_I16);') + elif '".i8\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_I8);') + elif '".f16.f64\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F16F64);') + elif '".f64.f16\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F64F16);') + elif '".f16.f32\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F16F32);') + elif '".f32.f16\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F32F16);') + elif '".f64.f32\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F64F32);') + elif '".f32.f64\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F32F64);') + elif '".s32.f32\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_S32F32);') + elif '".f32.s32\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F32S32);') + elif '".u32.f32\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_U32F32);') + elif '".f32.u32\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F32U32);') + elif '".p8\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_P8);') + elif '".f64.s16\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F64S16);') + elif '".s16.f64\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_S16F64);') + elif '".f32.s16\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F32S16);') + elif '".s16.f32\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_S16F32);') + elif '".f64.s32\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F64S32);') + elif '".s32.f64\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_S32F64);') + elif '".f64.u16\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F64U16);') + elif '".u16.f64\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_U16F64);') + elif '".f32.u16\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F32U16);') + elif '".u16.f32\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_U16F32);') + elif '".f64.u32\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F64U32);') + elif '".u32.f64\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_U32F64);') + elif '".f16.u32\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F16U32);') + elif '".u32.f16\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_U32F16);') + elif '".f16.u16\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_F16U16);') + elif '".u16.f16\\t"' in line2: + print_line(' ARM_addVectorDataType(MI, ARM_VECTORDATA_U16F16);') + elif '"\\tlr"' in line2: + print_line(' ARM_addReg(MI, ARM_REG_LR);') + elif '"\\tapsr_nzcv, fpscr"' in line2: + print_line(' ARM_addReg(MI, ARM_REG_APSR_NZCV);') + print_line(' ARM_addReg(MI, ARM_REG_FPSCR);') + elif '"\\tpc, lr"' in line2: + print_line(' ARM_addReg(MI, ARM_REG_PC);') + print_line(' ARM_addReg(MI, ARM_REG_LR);') + elif '"\\tfpscr, "' in line2: + print_line(' ARM_addReg(MI, ARM_REG_FPSCR);') + elif '"\\tfpexc, "' in line2: + print_line(' ARM_addReg(MI, ARM_REG_FPEXC);') + elif '"\\tfpinst, "' in line2: + print_line(' ARM_addReg(MI, ARM_REG_FPINST);') + elif '"\\tfpinst2, "' in line2: + print_line(' ARM_addReg(MI, ARM_REG_FPINST2);') + elif '"\\tfpsid, "' in line2: + print_line(' ARM_addReg(MI, ARM_REG_FPSID);') + elif '"\\tsp, "' in line2: + print_line(' ARM_addReg(MI, ARM_REG_SP);') + elif '"\\tsp!, "' in line2: + print_line(' ARM_addReg(MI, ARM_REG_SP);') + elif '", apsr"' in line2: + print_line(' ARM_addReg(MI, ARM_REG_APSR);') + elif '", spsr"' in line2: + print_line(' ARM_addReg(MI, ARM_REG_SPSR);') + elif '", fpscr"' in line2: + print_line(' ARM_addReg(MI, ARM_REG_FPSCR);') + elif '", fpscr"' in line2: + print_line(' ARM_addReg(MI, ARM_REG_FPSCR);') + elif '", fpexc"' in line2: + print_line(' ARM_addReg(MI, ARM_REG_FPEXC);') + elif '", fpinst"' in line2: + print_line(' ARM_addReg(MI, ARM_REG_FPINST);') + elif '", fpinst2"' in line2: + print_line(' ARM_addReg(MI, ARM_REG_FPINST2);') + elif '", fpsid"' in line2: + print_line(' ARM_addReg(MI, ARM_REG_FPSID);') + elif '", mvfr0"' in line2: + print_line(' ARM_addReg(MI, ARM_REG_MVFR0);') + elif '", mvfr1"' in line2: + print_line(' ARM_addReg(MI, ARM_REG_MVFR1);') + elif '", mvfr2"' in line2: + print_line(' ARM_addReg(MI, ARM_REG_MVFR2);') + elif '.8\\t' in line2: + print_line(' ARM_addVectorDataSize(MI, 8);') + elif '.16\\t' in line2: + print_line(' ARM_addVectorDataSize(MI, 16);') + elif '.32\\t' in line2: + print_line(' ARM_addVectorDataSize(MI, 32);') + elif '.64\\t' in line2: + print_line(' ARM_addVectorDataSize(MI, 64);') + elif '" ^"' in line2: + print_line(' ARM_addUserMode(MI);') + + if '.16b' in line2: + print_line(' arm64_op_addVectorArrSpecifier(MI, ARM64_VAS_16B);') + elif '.8b' in line2: + print_line(' arm64_op_addVectorArrSpecifier(MI, ARM64_VAS_8B);') + elif '.4b' in line2: + print_line(' arm64_op_addVectorArrSpecifier(MI, ARM64_VAS_4B);') + elif '.b' in line2: + print_line(' arm64_op_addVectorArrSpecifier(MI, ARM64_VAS_1B);') + elif '.8h' in line2: + print_line(' arm64_op_addVectorArrSpecifier(MI, ARM64_VAS_8H);') + elif '.4h' in line2: + print_line(' arm64_op_addVectorArrSpecifier(MI, ARM64_VAS_4H);') + elif '.2h' in line2: + print_line(' arm64_op_addVectorArrSpecifier(MI, ARM64_VAS_2H);') + elif '.h' in line2: + print_line(' arm64_op_addVectorArrSpecifier(MI, ARM64_VAS_1H);') + elif '.4s' in line2: + print_line(' arm64_op_addVectorArrSpecifier(MI, ARM64_VAS_4S);') + elif '.2s' in line2: + print_line(' arm64_op_addVectorArrSpecifier(MI, ARM64_VAS_2S);') + elif '.s' in line2: + print_line(' arm64_op_addVectorArrSpecifier(MI, ARM64_VAS_1S);') + elif '.2d' in line2: + print_line(' arm64_op_addVectorArrSpecifier(MI, ARM64_VAS_2D);') + elif '.1d' in line2: + print_line(' arm64_op_addVectorArrSpecifier(MI, ARM64_VAS_1D);') + elif '.1q' in line2: + print_line(' arm64_op_addVectorArrSpecifier(MI, ARM64_VAS_1Q);') + + if '#0.0' in line2: + print_line(' arm64_op_addFP(MI, 0);') + elif '#0' in line2: + print_line(' arm64_op_addImm(MI, 0);') + elif '#8' in line2: + print_line(' arm64_op_addImm(MI, 8);') + elif '#16' in line2: + print_line(' arm64_op_addImm(MI, 16);') + elif '#32' in line2: + print_line(' arm64_op_addImm(MI, 32);') + + # X86 + if '", %rax"' in line2 or '", rax"' in line2: + print_line(' op_addReg(MI, X86_REG_RAX);') + elif '", %eax"' in line2 or '", eax"' in line2: + print_line(' op_addReg(MI, X86_REG_EAX);') + elif '", %ax"' in line2 or '", ax"' in line2: + print_line(' op_addReg(MI, X86_REG_AX);') + elif '", %al"' in line2 or '", al"' in line2: + print_line(' op_addReg(MI, X86_REG_AL);') + elif '", %dx"' in line2 or '", dx"' in line2: + print_line(' op_addReg(MI, X86_REG_DX);') + elif '", %st(0)"' in line2 or '", st(0)"' in line2: + print_line(' op_addReg(MI, X86_REG_ST0);') + elif '", 1"' in line2: + print_line(' op_addImm(MI, 1);') + elif '", cl"' in line2: + print_line(' op_addReg(MI, X86_REG_CL);') + elif '"{1to2}, "' in line2: + print_line(' op_addAvxBroadcast(MI, X86_AVX_BCAST_2);') + elif '"{1to4}, "' in line2: + print_line(' op_addAvxBroadcast(MI, X86_AVX_BCAST_4);') + elif '"{1to8}, "' in line2: + print_line(' op_addAvxBroadcast(MI, X86_AVX_BCAST_8);') + elif '"{1to16}, "' in line2: + print_line(' op_addAvxBroadcast(MI, X86_AVX_BCAST_16);') + elif '{z}{sae}' in line2: + print_line(' op_addAvxSae(MI);') + print_line(' op_addAvxZeroOpmask(MI);') + elif ('{z}' in line2): + print_line(' op_addAvxZeroOpmask(MI);') + elif '{sae}' in line2: + print_line(' op_addAvxSae(MI);') + elif 'llvm_unreachable("Invalid command number.");' in line: + line2 = line.replace('llvm_unreachable("Invalid command number.");', '// unreachable') + print_line(line2) + elif ('assert(' in line) or ('assert (' in line): + pass + elif 'Invalid alt name index' in line: + pass + elif '::' in line and 'case ' in line: + #print_line(line2) + print_line(line) + elif 'MI->getNumOperands()' in line: + line2 = line.replace('MI->getNumOperands()', 'MCInst_getNumOperands(MI)') + print_line(line2) + elif 'const MCOperand &MCOp' in line: + line2 = line.replace('const MCOperand &MCOp', 'MCOperand *MCOp') + print_line(line2) + elif 'MI->getOperand(0).isImm()' in line: + line2 = line.replace('MI->getOperand(0).isImm()', 'MCOperand_isImm(MCInst_getOperand(MI, 0))') + print_line(line2) + elif 'MI->getOperand(1).isImm()' in line: + line2 = line.replace('MI->getOperand(1).isImm()', 'MCOperand_isImm(MCInst_getOperand(MI, 1))') + print_line(line2) + elif 'MI->getOperand(2).isImm()' in line: + line2 = line.replace('MI->getOperand(2).isImm()', 'MCOperand_isImm(MCInst_getOperand(MI, 2))') + print_line(line2) + elif 'MI->getOperand(3).isImm()' in line: + line2 = line.replace('MI->getOperand(3).isImm()', 'MCOperand_isImm(MCInst_getOperand(MI, 3))') + print_line(line2) + elif 'MI->getOperand(4).isImm()' in line: + line2 = line.replace('MI->getOperand(4).isImm()', 'MCOperand_isImm(MCInst_getOperand(MI, 4))') + print_line(line2) + elif 'MI->getOperand(5).isImm()' in line: + line2 = line.replace('MI->getOperand(5).isImm()', 'MCOperand_isImm(MCInst_getOperand(MI, 5))') + print_line(line2) + elif 'MI->getOperand(6).isImm()' in line: + line2 = line.replace('MI->getOperand(6).isImm()', 'MCOperand_isImm(MCInst_getOperand(MI, 6))') + print_line(line2) + elif 'MI->getOperand(7).isImm()' in line: + line2 = line.replace('MI->getOperand(7).isImm()', 'MCOperand_isImm(MCInst_getOperand(MI, 7))') + print_line(line2) + elif 'MI->getOperand(8).isImm()' in line: + line2 = line.replace('MI->getOperand(8).isImm()', 'MCOperand_isImm(MCInst_getOperand(MI, 8))') + print_line(line2) + elif 'MI->getOperand(0).getImm()' in line: + line2 = line.replace('MI->getOperand(0).getImm()', 'MCOperand_getImm(MCInst_getOperand(MI, 0))') + print_line(line2) + elif 'MI->getOperand(1).getImm()' in line: + line2 = line.replace('MI->getOperand(1).getImm()', 'MCOperand_getImm(MCInst_getOperand(MI, 1))') + print_line(line2) + elif 'MI->getOperand(2).getImm()' in line: + line2 = line.replace('MI->getOperand(2).getImm()', 'MCOperand_getImm(MCInst_getOperand(MI, 2))') + print_line(line2) + elif 'MI->getOperand(3).getImm()' in line: + line2 = line.replace('MI->getOperand(3).getImm()', 'MCOperand_getImm(MCInst_getOperand(MI, 3))') + print_line(line2) + elif 'MI->getOperand(4).getImm()' in line: + line2 = line.replace('MI->getOperand(4).getImm()', 'MCOperand_getImm(MCInst_getOperand(MI, 4))') + print_line(line2) + elif 'MI->getOperand(5).getImm()' in line: + line2 = line.replace('MI->getOperand(5).getImm()', 'MCOperand_getImm(MCInst_getOperand(MI, 5))') + print_line(line2) + elif 'MI->getOperand(6).getImm()' in line: + line2 = line.replace('MI->getOperand(6).getImm()', 'MCOperand_getImm(MCInst_getOperand(MI, 6))') + print_line(line2) + elif 'MI->getOperand(7).getImm()' in line: + line2 = line.replace('MI->getOperand(7).getImm()', 'MCOperand_getImm(MCInst_getOperand(MI, 7))') + print_line(line2) + elif 'MI->getOperand(8).getImm()' in line: + line2 = line.replace('MI->getOperand(8).getImm()', 'MCOperand_getImm(MCInst_getOperand(MI, 8))') + print_line(line2) + elif 'MRI.getRegClass(' in line: + classid = extract_paren(line, 'getRegClass(') + operand = extract_paren(line, 'getOperand') + line2 = line.replace('MI->getNumOperands()', 'MCInst_getNumOperands(MI)') + line2 = ' GETREGCLASS_CONTAIN(%s, %s)' %(classid, operand) + if line.endswith('())) {'): + line2 += ') {' + elif line.endswith(' {'): + line2 += ' {' + elif line.endswith(' &&'): + line2 += ' &&' + print_line(line2) + elif 'MI->getOperand(' in line and 'isReg' in line: + operand = extract_paren(line, 'getOperand') + line2 = ' MCOperand_isReg(MCInst_getOperand(MI, %s))' %(operand) + # MI->getOperand(1).isReg() && + if line.endswith(' {'): + line2 += ' {' + elif line.endswith(' &&'): + line2 += ' &&' + print_line(line2) + elif 'MI->getOperand(' in line and 'getReg' in line: + line2 = replace_getReg(line) + # one more time + line2 = replace_getReg(line2) + print_line(line2) + elif ' return false;' in line and in_printAliasInstr: + print_line(' return NULL;') + elif 'MCOp.isImm()' in line: + line2 = line.replace('MCOp.isImm()', 'MCOperand_isImm(MCOp)') + print_line(line2) + elif 'MCOp.getImm()' in line: + line2 = line.replace('MCOp.getImm()', 'MCOperand_getImm(MCOp)') + if 'int64_t Val =' in line: + line2 = line2.replace('int64_t Val =', 'Val =') + print_line(line2) + elif 'isSVEMaskOfIdenticalElements<' in line: + if 'int8' in line: + line2 = line.replace('isSVEMaskOfIdenticalElements', 'isSVEMaskOfIdenticalElements8') + line2 = line2.replace('', '') + elif 'int16' in line: + line2 = line.replace('isSVEMaskOfIdenticalElements', 'isSVEMaskOfIdenticalElements16') + line2 = line2.replace('', '') + elif 'int32' in line: + line2 = line.replace('isSVEMaskOfIdenticalElements', 'isSVEMaskOfIdenticalElements32') + line2 = line2.replace('', '') + else: + line2 = line.replace('isSVEMaskOfIdenticalElements', 'isSVEMaskOfIdenticalElements64') + line2 = line2.replace('', '') + print_line(line2) + elif 'switch (PredicateIndex) {' in line: + print_line(' int64_t Val;') + print_line(line) + elif 'unsigned I = 0;' in line and in_printAliasInstr: + print_line(""" + tmpString = cs_strdup(AsmString); + + while (AsmString[I] != ' ' && AsmString[I] != '\\t' && + AsmString[I] != '$' && AsmString[I] != '\\0') + ++I; + + tmpString[I] = 0; + SStream_concat0(OS, tmpString); + + if (AsmString[I] != '\\0') { + if (AsmString[I] == ' ' || AsmString[I] == '\\t') { + SStream_concat0(OS, " "); + ++I; + } + + do { + if (AsmString[I] == '$') { + ++I; + if (AsmString[I] == (char)0xff) { + ++I; + OpIdx = AsmString[I++] - 1; + PrintMethodIdx = AsmString[I++] - 1; + printCustomAliasOperand(MI, OpIdx, PrintMethodIdx, OS); + } else + printOperand(MI, (unsigned)(AsmString[I++]) - 1, OS); + } else { + SStream_concat1(OS, AsmString[I++]); + } + } while (AsmString[I] != '\\0'); + } + + return tmpString; +} + """) + in_printAliasInstr = False + # skip next few lines + skip_printing = True + elif '::printCustomAliasOperand' in line: + # print again + skip_printing = False + print_line('static void printCustomAliasOperand(') + elif 'const MCSubtargetInfo &STI' in line: + pass + elif 'const MCInst *MI' in line: + line2 = line.replace('const MCInst *MI', 'MCInst *MI') + print_line(line2) + elif 'llvm_unreachable("' in line: + if 'default: ' in line: + print_line(' default:') + elif 'llvm_unreachable("Unknown MCOperandPredicate kind")' in line: + print_line(' return false; // never reach') + else: + pass + elif 'raw_ostream &' in line: + line2 = line.replace('raw_ostream &', 'SStream *') + if line2.endswith(' {'): + line2 = line2.replace(' {', '\n{') + print_line(line2) + elif 'printPredicateOperand(' in line and 'STI, ' in line: + line2 = line.replace('STI, ', '') + print_line(line2) + elif '// Fragment ' in line: + # // Fragment 0 encoded into 6 bits for 51 unique commands. + tmp = line.strip().split(' ') + fragment_no = tmp[2] + print_line(line) + elif ('switch ((' in line or 'if ((' in line) and 'Bits' in line: + # switch ((Bits >> 14) & 63) { + bits = line.strip() + bits = bits.replace('switch ', '') + bits = bits.replace('if ', '') + bits = bits.replace('{', '') + bits = bits.strip() + print_line(' // printf("Fragment %s: %%"PRIu64"\\n", %s);' %(fragment_no, bits)) + print_line(line) + elif not skip_printing: + print_line(line) + + if line == ' };': + if need_endif and not in_getRegisterName: + # endif only for AsmStrs when we are not inside getRegisterName() + print_line("#endif") + need_endif = False + elif 'return AsmStrs+RegAsmOffset[RegNo-1];' in line: + if in_getRegisterName: + # return NULL for register name on Diet mode + print_line("#else") + print_line(" return NULL;") + print_line("#endif") + print_line("}") + need_endif = False + in_getRegisterName = False + # skip 1 line + skip_line = 1 + elif line == ' }': + # ARM64 + if in_getRegisterName: + # return NULL for register name on Diet mode + print_line("#else") + print_line(" return NULL;") + print_line("#endif") + print_line("}") + need_endif = False + in_getRegisterName = False + # skip 1 line + skip_line = 1 + elif 'default:' in line: + # ARM64 + if in_getRegisterName: + # get the size of RegAsmOffsetvreg[] + print_line(" return (const char *)(sizeof(RegAsmOffsetvreg)/sizeof(RegAsmOffsetvreg[0]));") + + +f1.close() +f2.close() diff --git a/capstone/suite/synctools/compare_mapping_insn.py b/capstone/suite/synctools/compare_mapping_insn.py new file mode 100755 index 000000000..7477be811 --- /dev/null +++ b/capstone/suite/synctools/compare_mapping_insn.py @@ -0,0 +1,38 @@ +#!/usr/bin/python +# compare instructions in 2 files of MappingInsn.inc +# find instructions in MappingInsn1, that does not exist in MappingInsn2 +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + +f = open(sys.argv[1]) +mapping1 = f.readlines() +f.close() + +f = open(sys.argv[2]) +mapping2 = f.readlines() +f.close() + +insn1 = [] +for line in mapping1: + if 'X86_INS_' in line: + tmp = line.split(',') + insn_id = tmp[0].strip() + insn1.append(insn_id) + +insn2 = [] +for line in mapping2: + if 'X86_INS_' in line: + tmp = line.split(',') + insn_id = tmp[0].strip() + insn2.append(insn_id) + +for insn_id in insn1: + if not insn_id in insn2: + print("instruction %s is not in list 2" %insn_id) + + diff --git a/capstone/suite/synctools/disassemblertables-arch.py b/capstone/suite/synctools/disassemblertables-arch.py new file mode 100755 index 000000000..55db77685 --- /dev/null +++ b/capstone/suite/synctools/disassemblertables-arch.py @@ -0,0 +1,258 @@ +#!/usr/bin/python +# convert LLVM GenDisassemblerTables.inc for Capstone disassembler. +# this just adds a header +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + +f = open(sys.argv[1]) +lines = f.readlines() +f.close() + +print("/* Capstone Disassembly Engine, http://www.capstone-engine.org */") +print("/* By Nguyen Anh Quynh , 2013-2019 */") +print("/* Automatically generated file, do not edit! */\n") +print('#include "../../MCInst.h"') +print('#include "../../LEB128.h"') +print("") + +print(""" +// Helper function for extracting fields from encoded instructions. + +//#if defined(_MSC_VER) && !defined(__clang__) +//__declspec(noinline) +//#endif + +#define FieldFromInstruction(fname, InsnType) \\ +static InsnType fname(InsnType insn, unsigned startBit, unsigned numBits) \\ +{ \\ + InsnType fieldMask; \\ + if (numBits == sizeof(InsnType) * 8) \\ + fieldMask = (InsnType)(-1LL); \\ + else \\ + fieldMask = (((InsnType)1 << numBits) - 1) << startBit; \\ + return (insn & fieldMask) >> startBit; \\ +} +""") + + +# extract text between <> +# printSVERegOp<'q'> +def extract_brackets(line): + return line[line.index('<')+1 : line.index('>')] + +# delete text between <>, including <> +# printSVERegOp<'q'> +def del_brackets(line): + return line[:line.index('<')] + line[line.index('>') + 1:] + + +# skip printing some lines? +skip_print = True +# adding slash at the end of the line for C macro? +adding_slash = False +# skip LLVM_DEBUG +llvm_debug = False + +def print_line(line): + if skip_print is True: + return + if adding_slash: + # skip blank line + if (len(line.strip()) == 0): + return + # // must be handled + if '//' in line: + line = line.replace('//', '/*') + line += ' */' + print(line + ' \\') + else: + print(line) + + +for line in lines: + line2 = line.rstrip() + + if '#include ' in line2: + continue + + # skip until the first decoder table + elif skip_print and 'static const uint8_t DecoderTable' in line2: + skip_print = False + + elif 'End llvm namespace' in line2: + # done + break + + elif 'llvm_unreachable' in line2: + line2 = line2.replace('llvm_unreachable', '/* llvm_unreachable') + line2 += '*/ ' + if '"Invalid index!"' in line2: + pass + #line2 += '\n return true;' + + elif 'Bits[' in line2: + if sys.argv[2] == 'ARM': + line2 = line2.replace('Bits[', 'ARM_getFeatureBits(MI->csh->mode, ') + line2 = line2.replace(']', ')') + elif sys.argv[2] == 'AArch64': + line2 = line2.replace('Bits[', 'AArch64_getFeatureBits(') + line2 = line2.replace(']', ')') + + elif 'static bool checkDecoderPredicate(unsigned Idx, const FeatureBitset& Bits) {' in line2: + line2 = 'static bool checkDecoderPredicate(unsigned Idx, MCInst *MI)\n{' + + elif 'checkDecoderPredicate(PIdx, ' in line2: + line2 = line2.replace(', Bits)', ', MI)') + + elif 'template' in line2: + continue + + elif 'static DecodeStatus decodeToMCInst' in line2: + line2 = '#define DecodeToMCInst(fname, fieldname, InsnType) \\\n' + \ + 'static DecodeStatus fname(DecodeStatus S, unsigned Idx, InsnType insn, MCInst *MI, \\\n' + \ + '\t\tuint64_t Address, bool *Decoder) \\\n{' + adding_slash = True + + elif 'fieldFromInstruction' in line2: + line2 = line2.replace('fieldFromInstruction', 'fieldname') + if 'InsnType FieldValue' in line2: + line2 = line2.replace('InsnType ', '') + + elif 'DecodeComplete = true;' in line2: + # dead code + continue + + elif 'bool &DecodeComplete) {' in line2: + continue + + elif line2 == '}': + if adding_slash: + adding_slash = False + + elif 'static DecodeStatus decodeInstruction' in line2: + line2 = '#define DecodeInstruction(fname, fieldname, decoder, InsnType) \\\n' + \ + 'static DecodeStatus fname(const uint8_t DecodeTable[], MCInst *MI, \\\n' + \ + '\t\tInsnType insn, uint64_t Address) \\\n{ \\\n' + \ + ' unsigned Start, Len, NumToSkip, PIdx, Opc, DecodeIdx; \\\n' + \ + ' InsnType Val, FieldValue, PositiveMask, NegativeMask; \\\n' + \ + ' bool Pred, Fail, DecodeComplete = true; \\\n' + \ + ' uint32_t ExpectedValue;' + + adding_slash = True + print_line(line2) + # skip printing few lines + skip_print = True + elif 'const MCSubtargetInfo &STI' in line2: + skip_print = False + # skip this line + continue + elif 'Bits = STI.getFeatureBits()' in line2: + # skip this line + continue + elif 'errs() << ' in line2: + continue + elif 'unsigned Start =' in line2: + line2 = line2.replace('unsigned ', '') + elif 'unsigned Len =' in line2: + line2 = line2.replace('unsigned ', '') + elif 'unsigned Len;' in line2: + continue + elif 'MCInst TmpMI;' in line2: + continue + elif 'bool Pred;' in line2: + continue + elif 'bool DecodeComplete;' in line2: + continue + elif 'unsigned NumToSkip =' in line2: + line2 = line2.replace('unsigned ', '') + elif 'unsigned PIdx =' in line2: + line2 = line2.replace('unsigned ', '') + elif 'unsigned Opc =' in line2: + line2 = line2.replace('unsigned ', '') + elif 'unsigned DecodeIdx =' in line2: + line2 = line2.replace('unsigned ', '') + elif 'InsnType Val =' in line2: + line2 = line2.replace('InsnType ', '') + elif 'bool Fail' in line2: + line2 = line2.replace('bool ', '') + elif 'InsnType PositiveMask =' in line2: + line2 = line2.replace('InsnType ', '') + elif 'InsnType NegativeMask =' in line2: + line2 = line2.replace('InsnType ', '') + elif 'uint32_t ExpectedValue' in line2: + line2 = line2.replace('uint32_t ', '') + elif 'ptrdiff_t Loc = ' in line2: + continue + elif 'LLVM_DEBUG(' in line2: + # just this line? + if ');' in line2: + continue + skip_print = True + llvm_debug = True + continue + elif skip_print and llvm_debug and ');' in line2: + llvm_debug = False + skip_print = False + continue + elif 'decodeToMCInst(' in line2: + line2 = line2.replace('decodeToMCInst', 'decoder') + line2 = line2.replace('DecodeComplete);', '&DecodeComplete);') + line2 = line2.replace(', DisAsm', '') + line2 = line2.replace(', TmpMI', ', MI') + elif 'TmpMI.setOpcode(Opc);' in line2: + line2 = ' MCInst_setOpcode(MI, Opc);' + elif 'MI.setOpcode(Opc);' in line2: + line2 = ' MCInst_setOpcode(MI, Opc);' + elif 'MI.clear();' in line2: + line2 = ' MCInst_clear(MI);' + elif 'assert(' in line2: + line2 = line2.replace('assert(', '/* assert(') + line2 += ' */' + elif 'Check(S, ' in line2: + line2 = line2.replace('Check(S, ', 'Check(&S, ') + if 'DecodeImm8OptLsl<' in line2: + param = extract_brackets(line2) + line2 = del_brackets(line2) + line2 = line2.replace(', Decoder)', ', Decoder, %s)' %param) + elif 'DecodeSImm<' in line2: + param = extract_brackets(line2) + line2 = del_brackets(line2) + line2 = line2.replace(', Decoder)', ', Decoder, %s)' %param) + if 'DecodeComplete = false; ' in line2: + line2 = line2.replace('DecodeComplete = false; ', '') + elif 'decodeUImmOperand<' in line2 or 'decodeSImmOperand<' in line2 : + # decodeUImmOperand<5>(MI, tmp, Address, Decoder) + param = extract_brackets(line2) + line2 = del_brackets(line2) + line2 = line2.replace(', Decoder)', ', Decoder, %s)' %param) + elif 'MI.addOperand(MCOperand::createImm(tmp));' in line2: + line2 = ' MCOperand_CreateImm0(MI, tmp);' + elif 'MI = TmpMI;' in line2: + line2 = '' + #line2 = line2.replace('TmpMI', '&TmpMI') + + line2 = line2.replace('::', '_') + print_line(line2) + +if sys.argv[2] == 'ARM': + print(""" +FieldFromInstruction(fieldFromInstruction_2, uint16_t) +DecodeToMCInst(decodeToMCInst_2, fieldFromInstruction_2, uint16_t) +DecodeInstruction(decodeInstruction_2, fieldFromInstruction_2, decodeToMCInst_2, uint16_t) + +FieldFromInstruction(fieldFromInstruction_4, uint32_t) +DecodeToMCInst(decodeToMCInst_4, fieldFromInstruction_4, uint32_t) +DecodeInstruction(decodeInstruction_4, fieldFromInstruction_4, decodeToMCInst_4, uint32_t) +""") + +if sys.argv[2] in ('AArch64', 'PPC'): + print(""" +FieldFromInstruction(fieldFromInstruction_4, uint32_t) +DecodeToMCInst(decodeToMCInst_4, fieldFromInstruction_4, uint32_t) +DecodeInstruction(decodeInstruction_4, fieldFromInstruction_4, decodeToMCInst_4, uint32_t) +""") diff --git a/capstone/suite/synctools/disassemblertables.py b/capstone/suite/synctools/disassemblertables.py new file mode 100755 index 000000000..1816121b4 --- /dev/null +++ b/capstone/suite/synctools/disassemblertables.py @@ -0,0 +1,45 @@ +#!/usr/bin/python +# convert LLVM GenDisassemblerTables.inc for Capstone disassembler. +# for X86, this separate ContextDecision tables into another file +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + +f = open(sys.argv[1]) +lines = f.readlines() +f.close() + +f1 = open(sys.argv[2], 'w+') + +f2 = open(sys.argv[3], 'w+') + +f1.write("/* Capstone Disassembly Engine, http://www.capstone-engine.org */\n") +f1.write("/* By Nguyen Anh Quynh , 2013-2019 */\n") +f1.write("\n") + +f2.write("/* Capstone Disassembly Engine, http://www.capstone-engine.org */\n") +f2.write("/* By Nguyen Anh Quynh , 2013-2019 */\n") +f2.write("\n") + +# static const struct ContextDecision x86DisassemblerOneByteOpcodes = { + +# static const struct ContextDecision x86DisassemblerXOP8Opcodes = { + +write_to_f2 = False + +for line in lines: + if 'ContextDecision x86DisassemblerOneByteOpcodes = {' in line: + # done with f1, start writing to f2 + write_to_f2 = True + + if write_to_f2: + f2.write(line) + else: + f1.write(line) + +f1.close() +f2.close() diff --git a/capstone/suite/synctools/disassemblertables2.c b/capstone/suite/synctools/disassemblertables2.c new file mode 100644 index 000000000..f336da103 --- /dev/null +++ b/capstone/suite/synctools/disassemblertables2.c @@ -0,0 +1,176 @@ +/* Capstone Disassembly Engine */ +/* By Nguyen Anh Quynh , 2013-2019 */ + +// this tool is to generate arch/X86/X86GenDisassemblerTables2.inc +// NOTE: this requires updated X86GenDisassemblerTables2 & X86GenDisassemblerTables2 +// generatedy by ./disassemblertables.py & disassemblertables_reduce.py + +#include +#include +#include + +// X86DisassemblerDecoderCommon.h is copied from Capstone src +#include "../../arch/X86/X86DisassemblerDecoderCommon.h" + +#define ARR_SIZE(a) (sizeof(a) / sizeof(a[0])) + +/// Specifies whether a ModR/M byte is needed and (if so) which +/// instruction each possible value of the ModR/M byte corresponds to. Once +/// this information is known, we have narrowed down to a single instruction. +struct ModRMDecision { + uint8_t modrm_type; + uint16_t instructionIDs; +}; + +/// Specifies which set of ModR/M->instruction tables to look at +/// given a particular opcode. +struct OpcodeDecision { + struct ModRMDecision modRMDecisions[256]; +}; + +/// Specifies which opcode->instruction tables to look at given +/// a particular context (set of attributes). Since there are many possible +/// contexts, the decoder first uses CONTEXTS_SYM to determine which context +/// applies given a specific set of attributes. Hence there are only IC_max +/// entries in this table, rather than 2^(ATTR_max). +struct ContextDecision { + struct OpcodeDecision opcodeDecisions[IC_max]; +}; + +#ifdef CAPSTONE_X86_REDUCE +#include "X86GenDisassemblerTables_reduce2.inc" +#else +#include "X86GenDisassemblerTables2.inc" +#endif + +static void index_OpcodeDecision(const struct OpcodeDecision *decisions, size_t size, + const struct OpcodeDecision *emptyDecision, unsigned int *index_table, + const char *opcodeTable, + const char *index_opcodeTable) +{ + unsigned int i, count = 0; + + for (i = 0; i < size; i++) { + if (memcmp((const void *)&decisions[i], + emptyDecision, sizeof(*emptyDecision)) != 0) { + // this is a non-zero entry + // index_table entry must be != 0 + index_table[i] = count + 1; + count++; + } else + // empty entry + index_table[i] = 0; + } + + printf("static const unsigned char %s[] = {\n", index_opcodeTable); + + for (i = 0; i < size; i++) { + printf(" %u,\n", index_table[i]); + } + + printf("};\n\n"); + + printf("static const struct OpcodeDecision %s[] = {\n", opcodeTable); + for (i = 0; i < size; i++) { + if (index_table[i]) { + unsigned int j; + const struct OpcodeDecision *decision; + + // print out this non-zero entry + printf(" { {\n"); + decision = &decisions[i]; + + for(j = 0; j < ARR_SIZE(emptyDecision->modRMDecisions); j++) { + const char *modrm; + + switch(decision->modRMDecisions[j].modrm_type) { + default: + modrm = "MODRM_ONEENTRY"; + break; + case 1: + modrm = "MODRM_SPLITRM"; + break; + case 2: + modrm = "MODRM_SPLITMISC"; + break; + case 3: + modrm = "MODRM_SPLITREG"; + break; + case 4: + modrm = "MODRM_FULL"; + break; + } + printf(" { %s, %u },\n", + modrm, decision->modRMDecisions[j].instructionIDs); + } + printf(" } },\n"); + } + } + + printf("};\n\n"); +} + + +int main(int argc, char **argv) +{ + unsigned int index_table[ARR_SIZE(x86DisassemblerOneByteOpcodes.opcodeDecisions)]; + const struct OpcodeDecision emptyDecision; + + memset((void *)&emptyDecision, 0, sizeof(emptyDecision)); + + printf("/* Capstone Disassembly Engine, http://www.capstone-engine.org */\n"); + printf("/* By Nguyen Anh Quynh , 2013-2019 */\n"); + printf("\n"); + + index_OpcodeDecision(x86DisassemblerOneByteOpcodes.opcodeDecisions, + ARR_SIZE(x86DisassemblerOneByteOpcodes.opcodeDecisions), + &emptyDecision, index_table, + "x86DisassemblerOneByteOpcodes", + "index_x86DisassemblerOneByteOpcodes"); + + index_OpcodeDecision(x86DisassemblerTwoByteOpcodes.opcodeDecisions, + ARR_SIZE(x86DisassemblerTwoByteOpcodes.opcodeDecisions), + &emptyDecision, index_table, + "x86DisassemblerTwoByteOpcodes", + "index_x86DisassemblerTwoByteOpcodes"); + + index_OpcodeDecision(x86DisassemblerThreeByte38Opcodes.opcodeDecisions, + ARR_SIZE(x86DisassemblerThreeByte38Opcodes.opcodeDecisions), + &emptyDecision, index_table, + "x86DisassemblerThreeByte38Opcodes", + "index_x86DisassemblerThreeByte38Opcodes"); + + index_OpcodeDecision(x86DisassemblerThreeByte3AOpcodes.opcodeDecisions, + ARR_SIZE(x86DisassemblerThreeByte3AOpcodes.opcodeDecisions), + &emptyDecision, index_table, + "x86DisassemblerThreeByte3AOpcodes", + "index_x86DisassemblerThreeByte3AOpcodes"); + +#ifndef CAPSTONE_X86_REDUCE + index_OpcodeDecision(x86DisassemblerXOP8Opcodes.opcodeDecisions, + ARR_SIZE(x86DisassemblerXOP8Opcodes.opcodeDecisions), + &emptyDecision, index_table, + "x86DisassemblerXOP8Opcodes", + "index_x86DisassemblerXOP8Opcodes"); + + index_OpcodeDecision(x86DisassemblerXOP9Opcodes.opcodeDecisions, + ARR_SIZE(x86DisassemblerXOP9Opcodes.opcodeDecisions), + &emptyDecision, index_table, + "x86DisassemblerXOP9Opcodes", + "index_x86DisassemblerXOP9Opcodes"); + + index_OpcodeDecision(x86DisassemblerXOPAOpcodes.opcodeDecisions, + ARR_SIZE(x86DisassemblerXOPAOpcodes.opcodeDecisions), + &emptyDecision, index_table, + "x86DisassemblerXOPAOpcodes", + "index_x86DisassemblerXOPAOpcodes"); + + index_OpcodeDecision(x86Disassembler3DNowOpcodes.opcodeDecisions, + ARR_SIZE(x86Disassembler3DNowOpcodes.opcodeDecisions), + &emptyDecision, index_table, + "x86Disassembler3DNowOpcodes", + "index_x86Disassembler3DNowOpcodes"); +#endif + + return 0; +} diff --git a/capstone/suite/synctools/disassemblertables_reduce.py b/capstone/suite/synctools/disassemblertables_reduce.py new file mode 100755 index 000000000..674608c0c --- /dev/null +++ b/capstone/suite/synctools/disassemblertables_reduce.py @@ -0,0 +1,50 @@ +#!/usr/bin/python +# convert LLVM GenDisassemblerTables.inc for Capstone disassembler. +# this just adds a header +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + +f = open(sys.argv[1]) +lines = f.readlines() +f.close() + +f1 = open(sys.argv[2], 'w+') + +f2 = open(sys.argv[3], 'w+') + +f1.write("/* Capstone Disassembly Engine, http://www.capstone-engine.org */\n") +f1.write("/* By Nguyen Anh Quynh , 2013-2019 */\n") +f1.write("\n") + +f2.write("/* Capstone Disassembly Engine, http://www.capstone-engine.org */\n") +f2.write("/* By Nguyen Anh Quynh , 2013-2019 */\n") +f2.write("\n") + +# static const struct ContextDecision x86DisassemblerOneByteOpcodes = { + +# static const struct ContextDecision x86DisassemblerXOP8Opcodes = { + +write_to_f2 = False + +for line in lines: + # ignore all tables from XOP onwards + if 'ContextDecision x86DisassemblerXOP8Opcodes = {' in line: + # done + break + + if 'ContextDecision x86DisassemblerOneByteOpcodes = {' in line: + # done with f1, start writing to f2 + write_to_f2 = True + + if write_to_f2: + f2.write(line) + else: + f1.write(line) + +f1.close() +f2.close() diff --git a/capstone/suite/synctools/genall-arch.sh b/capstone/suite/synctools/genall-arch.sh new file mode 100755 index 000000000..f7a633ebe --- /dev/null +++ b/capstone/suite/synctools/genall-arch.sh @@ -0,0 +1,70 @@ +#!/bin/sh +# generate all ARCH*.inc files for Capstone, by Nguyen Anh Quynh + +# Syntax: genall-arch.sh + +# ./genall-arch.sh tablegen ~/projects/capstone.git/arch/ARM ARM +# ./genall-arch.sh tablegen ~/projects/capstone.git/arch/ARM AArch64 +# ./genall-arch.sh tablegen ~/projects/capstone.git/arch/ARM PowerPC + +ARCH=$3 + +echo "Generating ${ARCH}GenAsmWriter.inc" +./asmwriter.py $1/${ARCH}GenAsmWriter.inc ${ARCH}GenAsmWriter.inc ${ARCH}GenRegisterName.inc ${ARCH} + +echo "Generating ${ARCH}MappingInsnName.inc" +./mapping_insn_name-arch.py $1/${ARCH}GenAsmMatcher.inc > ${ARCH}MappingInsnName.inc +#./mapping_insn_name-arch.py tablegen/ARMGenAsmMatcher.inc + +echo "Generating ${ARCH}MappingInsn.inc" +./mapping_insn-arch.py $1/${ARCH}GenAsmMatcher.inc $1/${ARCH}GenInstrInfo.inc $2/${ARCH}MappingInsn.inc > ${ARCH}MappingInsn.inc + +echo "Generating ${ARCH}GenInstrInfo.inc" +./instrinfo-arch.py $1/${ARCH}GenInstrInfo.inc ${ARCH} > ${ARCH}GenInstrInfo.inc + +echo "Generating ${ARCH}GenDisassemblerTables.inc" +./disassemblertables-arch.py $1/${ARCH}GenDisassemblerTables.inc ${ARCH} > ${ARCH}GenDisassemblerTables.inc + +echo "Generating ${ARCH}GenRegisterInfo.inc" +./registerinfo.py $1/${ARCH}GenRegisterInfo.inc ${ARCH} > ${ARCH}GenRegisterInfo.inc + +echo "Generating ${ARCH}GenSubtargetInfo.inc" +./subtargetinfo.py $1/${ARCH}GenSubtargetInfo.inc ${ARCH} > ${ARCH}GenSubtargetInfo.inc + +case $3 in + ARM) + # for ARM only + echo "Generating ${ARCH}GenAsmWriter-digit.inc" + ./asmwriter.py $1/${ARCH}GenAsmWriter-digit.inc ${ARCH}GenAsmWriter.inc ${ARCH}GenRegisterName_digit.inc ${ARCH} + echo "Generating ${ARCH}GenSystemRegister.inc" + ./systemregister.py $1/${ARCH}GenSystemRegister.inc > ${ARCH}GenSystemRegister.inc + echo "Generating instruction enum in insn_list.txt (for include/capstone/.h)" + ./insn.py $1/${ARCH}GenAsmMatcher.inc $1/${ARCH}GenInstrInfo.inc $2/${ARCH}MappingInsn.inc > insn_list.txt + # then copy these instructions to include/capstone/.h + echo "Generating ${ARCH}MappingInsnOp.inc" + ./mapping_insn_op-arch.py $1/${ARCH}GenAsmMatcher.inc $1/${ARCH}GenInstrInfo.inc $2/${ARCH}MappingInsnOp.inc > ${ARCH}MappingInsnOp.inc + echo "Generating ${ARCH}GenSystemRegister.inc" + ./systemregister.py $1/${ARCH}GenSystemRegister.inc > ${ARCH}GenSystemRegister.inc + ;; + AArch64) + echo "Generating ${ARCH}GenSystemOperands.inc" + ./systemoperand.py tablegen/AArch64GenSystemOperands.inc AArch64GenSystemOperands.inc AArch64GenSystemOperands_enum.inc + echo "Generating instruction enum in insn_list.txt (for include/capstone/.h)" + ./insn.py $1/${ARCH}GenAsmMatcher.inc $1/${ARCH}GenInstrInfo.inc $2/${ARCH}MappingInsn.inc > insn_list.txt + # then copy these instructions to include/capstone/.h + ./arm64_gen_vreg > AArch64GenRegisterV.inc + echo "Generating ${ARCH}MappingInsnOp.inc" + ./mapping_insn_op-arch.py $1/${ARCH}GenAsmMatcher.inc $1/${ARCH}GenInstrInfo.inc $2/${ARCH}MappingInsnOp.inc > ${ARCH}MappingInsnOp.inc + make arm64 + ;; + PowerPC) + # PowerPC + ./insn3.py $1/${ARCH}GenAsmMatcher.inc > insn_list.txt + # then copy these instructions to include/capstone/arch.h + ;; + *) + echo "Generating instruction enum in insn_list.txt (for include/capstone/.h)" + ./insn.py $1/${ARCH}GenAsmMatcher.inc $1/${ARCH}GenInstrInfo.inc $2/${ARCH}MappingInsn.inc > insn_list.txt + ;; +esac + diff --git a/capstone/suite/synctools/genall-full.sh b/capstone/suite/synctools/genall-full.sh new file mode 100755 index 000000000..7cb03c43e --- /dev/null +++ b/capstone/suite/synctools/genall-full.sh @@ -0,0 +1,33 @@ +#!/bin/sh +# generate all X86*.inc files for Capstone, by Nguyen Anh Quynh + +# Syntax: genall.sh + +# ./genall-full.sh tablegen ~/projects/capstone.git/arch/X86 + +echo "Generating GenAsmWriter.inc" +./asmwriter.py $1/X86GenAsmWriter.inc X86GenAsmWriter.inc X86GenRegisterName.inc X86 + +echo "Generating GenAsmWriter1.inc" +./asmwriter.py $1/X86GenAsmWriter1.inc X86GenAsmWriter1.inc X86GenRegisterName1.inc X86 + +echo "Generating instruction enum in insn_list.txt (for include/capstone/.h)" +./insn.py $1/X86GenAsmMatcher.inc $1/X86GenInstrInfo.inc $2/X86MappingInsn.inc > insn_list.txt +# then copy these instructions to include/capstone/x86.h + +echo "Generating MappingInsnName.inc" +./mapping_insn_name.py $1/X86GenAsmMatcher.inc $1/X86GenInstrInfo.inc $2/X86MappingInsn.inc > X86MappingInsnName.inc + +echo "Generating MappingInsn.inc" +./mapping_insn.py $1/X86GenAsmMatcher.inc $1/X86GenInstrInfo.inc $2/X86MappingInsn.inc > X86MappingInsn.inc + +echo "Generating MappingInsnOp.inc" +./mapping_insn_op.py $1/X86GenAsmMatcher.inc $1/X86GenInstrInfo.inc $2/X86MappingInsnOp.inc > X86MappingInsnOp.inc + +echo "Generating GenInstrInfo.inc" +./instrinfo.py $1/X86GenInstrInfo.inc $1/X86GenAsmMatcher.inc > X86GenInstrInfo.inc + +echo "Generating GenDisassemblerTables.inc & X86GenDisassemblerTables2.inc" +./disassemblertables.py $1/X86GenDisassemblerTables.inc X86GenDisassemblerTables.inc X86GenDisassemblerTables2.inc + +make x86 diff --git a/capstone/suite/synctools/genall-reduce.sh b/capstone/suite/synctools/genall-reduce.sh new file mode 100755 index 000000000..6f8aafc4c --- /dev/null +++ b/capstone/suite/synctools/genall-reduce.sh @@ -0,0 +1,28 @@ +#!/bin/sh +# generate all X86*reduce.inc files for Capstone, by Nguyen Anh Quynh + +# Syntax: genall.sh + +# ./genall-reduce.sh tablegen ~/projects/capstone.git/arch/X86 + +echo "Generating GenAsmWriter_reduce.inc" +./asmwriter.py $1/X86GenAsmWriter_reduce.inc X86GenAsmWriter_reduce.inc X86GenRegisterName.inc X86 + +echo "Generating GenAsmWriter1_reduce.inc" +./asmwriter.py $1/X86GenAsmWriter1_reduce.inc X86GenAsmWriter1_reduce.inc X86GenRegisterName1.inc X86 + +echo "Generating MappingInsnName_reduce.inc" +./mapping_insn_name.py $1/X86GenAsmMatcher_reduce.inc $1/X86GenInstrInfo_reduce.inc $2/X86MappingInsn_reduce.inc > X86MappingInsnName_reduce.inc + +echo "Generating MappingInsn_reduce.inc" +./mapping_insn.py $1/X86GenAsmMatcher_reduce.inc $1/X86GenInstrInfo_reduce.inc $2/X86MappingInsn_reduce.inc > X86MappingInsn_reduce.inc + +echo "Generating MappingInsnOp_reduce.inc" +./mapping_insn_op.py $1/X86GenAsmMatcher.inc $1/X86GenInstrInfo_reduce.inc $2/X86MappingInsnOp_reduce.inc > X86MappingInsnOp_reduce.inc + +echo "Generating GenInstrInfo_reduce.inc" +./instrinfo.py $1/X86GenInstrInfo_reduce.inc $1/X86GenAsmMatcher_reduce.inc > X86GenInstrInfo_reduce.inc + +echo "Generating GenDisassemblerTables_reduce.inc & GenDisassemblerTables_reduce2.inc" +./disassemblertables_reduce.py $1/X86GenDisassemblerTables_reduce.inc X86GenDisassemblerTables_reduce.inc X86GenDisassemblerTables_reduce2.inc + diff --git a/capstone/suite/synctools/insn.py b/capstone/suite/synctools/insn.py new file mode 100755 index 000000000..9e4612766 --- /dev/null +++ b/capstone/suite/synctools/insn.py @@ -0,0 +1,324 @@ +#!/usr/bin/python +# print list of instructions LLVM inc files, for Capstone disassembler. +# this will be put into capstone/.h +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s MappingInsn.inc" %sys.argv[0]) + sys.exit(1) + +# MappingInsn.inc +f = open(sys.argv[3]) +mapping = f.readlines() +f.close() + +print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */ +/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */ +/* By Nguyen Anh Quynh , 2013-2019 */ +""") + +# lib/Target/X86/X86GenAsmMatcher.inc +# static const MatchEntry MatchTable1[] = { +# { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, }, + +# extract insn from GenAsmMatcher Table +# return (arch, mnem, insn_id) +def extract_insn(line): + tmp = line.split(',') + insn_raw = tmp[1].strip() + insn_mnem = tmp[0].split(' ')[3] + # X86 mov.s + if '.' in insn_mnem: + tmp = insn_mnem.split('.') + insn_mnem = tmp[0] + tmp = insn_raw.split('::') + arch = tmp[0] + # AArch64 -> ARM64 + if arch.upper() == 'AArch64': + arch = 'ARM64' + return (arch, insn_mnem, tmp[1]) + + + +# extract all insn lines from GenAsmMatcher +# return arch, insn_id_list, insn_lines +def extract_matcher(filename): + f = open(filename) + lines = f.readlines() + f.close() + + match_count = 0 + #insn_lines = [] + insn_id_list = {} + arch = None + first_insn = None + + pattern = None + # first we try to find Table1, or Table0 + for line in lines: + if 'MatchEntry MatchTable0[] = {' in line.strip(): + pattern = 'MatchEntry MatchTable0[] = {' + elif 'MatchEntry MatchTable1[] = {' in line.strip(): + pattern = 'MatchEntry MatchTable1[] = {' + # last pattern, done + break + + # 1st enum is register enum + for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if pattern in line.strip(): + match_count += 1 + #print(line.strip()) + continue + + line = line.strip() + if match_count == 1: + if line == '};': + # done with first enum + break + else: + _arch, mnem, insn_id = extract_insn(line) + if not mnem.startswith('__'): + if not first_insn: + arch, first_insn = _arch, insn_id + if not insn_id in insn_id_list: + # print("***", arch, mnem, insn_id) + insn_id_list[insn_id] = mnem + #insn_lines.append(line) + + #return arch, first_insn, insn_id_list, insn_lines + return arch, first_insn, insn_id_list + +# GenAsmMatcher.inc +#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1]) +arch, first_insn, insn_id_list = extract_matcher(sys.argv[1]) +arch = arch.upper() + +#for line in insn_id_list: +# print(line) + + +insn_list = [] +#{ +# X86_AAA, X86_INS_AAA, +##ifndef CAPSTONE_DIET +# { 0 }, { 0 }, { X86_GRP_NOT64BITMODE, 0 }, 0, 0 +##endif +#}, +def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong): + print(arch, insn_id, mnem, mnem_can_be_wrong) + if not mnem_can_be_wrong: + insn = "%s_INS_%s" %(arch.upper(), mnem.upper()) + if insn in insn_list: + return + print("%s," %insn) + insn_list.append(insn) + return + + insn = "%s_%s" %(arch.upper(), insn_id) + # so mnem can be wrong, we need to verify with MappingInsn.inc + # first, try to find this entry in old MappingInsn.inc file + for i in range(len(mapping)): + tmp = mapping[i].split(',') + if tmp[0].strip() == insn: + insn = tmp[1].strip() + if insn in insn_list: + return + #print("==== get below from MappingInsn.inc file: %s" %insn) + print("%s," %insn) + insn_list.append(insn) + return + + +# extract from GenInstrInfo.inc, because the insn id is in order +enum_count = 0 +meet_insn = False + +# GenInstrInfo.inc +f = open(sys.argv[2]) +lines = f.readlines() +f.close() + +count = 0 +last_mnem = None + +# 1st enum is register enum +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum {': + enum_count += 1 + #print(line.strip()) + continue + + line = line.strip() + if enum_count == 1: + if 'INSTRUCTION_LIST_END' in line: + break + else: + insn = None + if meet_insn: + # enum items + insn = line.split('=')[0].strip() + if 'CALLSTACK' in insn or 'TAILJUMP' in insn: + # pseudo instruction + insn = None + + elif line.startswith(first_insn): + insn = line.split('=')[0].strip() + meet_insn = True + + if insn: + count += 1 + if insn == 'BSWAP16r_BAD': + last_mnem = 'BSWAP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVNP_Fp32': + last_mnem = 'FCMOVNP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVP_Fp3': + last_mnem = 'FCMOVP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSDrm_Int': + last_mnem = 'CMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'MOVSX16rm16': + last_mnem = 'MOVSX' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'MOVZX16rm16': + last_mnem = 'MOVZX' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'ST_Fp32m': + last_mnem = 'FST' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVNP_Fp64': + last_mnem = 'FCMOVNU' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSDrr_Int': + last_mnem = 'CMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSSrm_Int': + last_mnem = 'CMPSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCMPSDrm_Int': + last_mnem = 'VCMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCMPSSrm_Int': + last_mnem = 'VCMPSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPCMOVYrrr_REV': + last_mnem = 'VPCMOV' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VRNDSCALESDZm': + last_mnem = 'VRNDSCALESD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VRNDSCALESSZm': + last_mnem = 'VRNDSCALESS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCPDZ128rm': + last_mnem = 'VMAXPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCPSZ128rm': + last_mnem = 'VMAXPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCSDZrm': + last_mnem = 'VMAXSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCSSZrm': + last_mnem = 'VMAXSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCPDZ128rm': + last_mnem = 'VMINPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCPSZ128rm': + last_mnem = 'VMINPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCSDZrm': + last_mnem = 'VMINSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCSSZrm': + last_mnem = 'VMINSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMOV64toPQIZrm': + last_mnem = 'VMOVQ' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPERMIL2PDYrr_REV': + last_mnem = 'VPERMILPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPERMIL2PSYrr_REV': + last_mnem = 'VPERMILPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSD2SI64Zrm_Int': + last_mnem = 'VCVTSD2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSD2SSrm_Int': + last_mnem = 'VCVTSD2SS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSS2SI64Zrm_Int': + last_mnem = 'VCVTSS2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTTSD2SI64Zrm_Int': + last_mnem = 'VCVTTSD2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTTSS2SI64Zrm_Int': + last_mnem = 'VCVTTSS2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMSUBADD'): + if insn[len('VFMSUBADD')].isdigit(): + last_mnem = insn[:len('VFMSUBADD123xy')] + else: + last_mnem = insn[:len('VFMSUBADDSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMADDSUB'): + if insn[len('VFMADDSUB')].isdigit(): + last_mnem = insn[:len('VFMADDSUB123xy')] + else: + last_mnem = insn[:len('VFMADDSUBSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMADD'): + if insn[len('VFMADD')].isdigit(): + last_mnem = insn[:len('VFMADD123PD')] + else: + last_mnem = insn[:len('VFMADDPD')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMSUB'): + if insn[len('VFMSUB')].isdigit(): + last_mnem = insn[:len('VFMSUB123PD')] + else: + last_mnem = insn[:len('VFMSUBPD')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFNMADD'): + if insn[len('VFNMADD')].isdigit(): + last_mnem = insn[:len('VFNMADD123xy')] + else: + last_mnem = insn[:len('VFNMADDSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFNMSUB'): + if insn[len('VFNMSUB')].isdigit(): + last_mnem = insn[:len('VFNMSUB123xy')] + else: + last_mnem = insn[:len('VFNMSUBSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn in insn_id_list: + # trust old mapping table + last_mnem = insn_id_list[insn].upper() + print_entry(arch.upper(), insn, last_mnem, mapping, False) + else: + # the last option when we cannot find mnem: use the last good mnem + print_entry(arch.upper(), insn, last_mnem, mapping, True) diff --git a/capstone/suite/synctools/insn3.py b/capstone/suite/synctools/insn3.py new file mode 100755 index 000000000..fb7b576c5 --- /dev/null +++ b/capstone/suite/synctools/insn3.py @@ -0,0 +1,104 @@ +#!/usr/bin/python +# print list of instructions LLVM inc files, for Capstone disassembler. +# this will be put into capstone/.h +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + +print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */ +/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */ +/* By Nguyen Anh Quynh , 2013-2019 */ +""") + +# lib/Target/X86/X86GenAsmMatcher.inc +# static const MatchEntry MatchTable1[] = { +# { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, }, + +# extract insn from GenAsmMatcher Table +# return (arch, mnem, insn_id) +def extract_insn(line): + tmp = line.split(',') + insn_raw = tmp[1].strip() + insn_mnem = tmp[0].split(' ')[3] + # X86 mov.s + if '.' in insn_mnem: + tmp = insn_mnem.split('.') + insn_mnem = tmp[0] + tmp = insn_raw.split('::') + arch = tmp[0] + # AArch64 -> ARM64 + if arch.upper() == 'AARCH64': + arch = 'ARM64' + return (arch, insn_mnem, tmp[1]) + + + +# extract all insn lines from GenAsmMatcher +# return arch, first_insn, insn_id_list +def extract_matcher(filename): + f = open(filename) + lines = f.readlines() + f.close() + + match_count = 0 + mnem_list = [] + insn_id_list = {} + arch = None + first_insn = None + + pattern = None + # first we try to find Table1, or Table0 + for line in lines: + if 'MatchEntry MatchTable0[] = {' in line.strip(): + pattern = 'MatchEntry MatchTable0[] = {' + elif 'MatchEntry MatchTable1[] = {' in line.strip(): + pattern = 'MatchEntry MatchTable1[] = {' + # last pattern, done + break + + # 1st enum is register enum + for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if pattern in line.strip(): + match_count += 1 + #print(line.strip()) + continue + + line = line.strip() + if match_count == 1: + if line == '};': + # done with first enum + break + else: + _arch, mnem, insn_id = extract_insn(line) + # skip pseudo instructions + if not mnem.startswith('__'): + # PPC + if mnem.endswith('-') or mnem.endswith('+'): + mnem = mnem[:-1] + + if not first_insn: + arch, first_insn = _arch, insn_id + + if not insn_id in insn_id_list: + # save this + insn_id_list[insn_id] = mnem + + if not mnem in mnem_list: + print("%s_INS_%s," %(arch, mnem.upper())) + mnem_list.append(mnem) + + #return arch, first_insn, insn_id_list + return arch, first_insn, insn_id_list + +# GenAsmMatcher.inc +#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1]) +arch, first_insn, insn_id_list = extract_matcher(sys.argv[1]) diff --git a/capstone/suite/synctools/insn_check.py b/capstone/suite/synctools/insn_check.py new file mode 100755 index 000000000..6fe5e57b5 --- /dev/null +++ b/capstone/suite/synctools/insn_check.py @@ -0,0 +1,25 @@ +#!/usr/bin/python +# check MappingInsn.inc to find potential incorrect mapping - for Capstone disassembler. +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + +# ARM_CMPri, ARM_INS_CMN, +f = open(sys.argv[1]) +lines = f.readlines() +f.close() + +for line in lines: + if '_INS_' in line: + tmp = line.strip().split(',') + if len(tmp) == 3 and tmp[2] == '': + id_private = tmp[0].strip() + id_public = tmp[1].strip() + pos = id_public.find('_INS_') + mnem = id_public[pos + len('_INS_'):] + if not mnem in id_private: + print("%s -> %s" %(id_private, id_public)) diff --git a/capstone/suite/synctools/instrinfo-arch.py b/capstone/suite/synctools/instrinfo-arch.py new file mode 100755 index 000000000..fed572eaa --- /dev/null +++ b/capstone/suite/synctools/instrinfo-arch.py @@ -0,0 +1,161 @@ +#!/usr/bin/python +# convert LLVM GenInstrInfo.inc for Capstone disassembler. +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + + +# lib/Target/X86/X86GenAsmMatcher.inc +# static const MatchEntry MatchTable1[] = { +# { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, }, + +# return (arch, mnem) +def extract_insn(line): + tmp = line.split(',') + insn_raw = tmp[1].strip() + insn_mnem = tmp[0].split(' ')[3] + # X86 mov.s + if '.' in insn_mnem: + tmp = insn_mnem.split('.') + insn_mnem = tmp[0] + tmp = insn_raw.split('::') + arch = tmp[0] + # AArch64 -> ARM64 + if arch.upper() == 'AArch64': + arch = 'ARM64' + return (arch, insn_mnem) + +# get (arch, first insn) from MatchTable +def get_first_insn(filename): + f = open(filename) + lines = f.readlines() + f.close() + count = 0 + for line in lines: + line = line.strip() + + if len(line) == 0: + continue + + # Intel syntax in Table1 + if 'MatchEntry MatchTable1[] = {' in line: + count += 1 + #print(line.strip()) + continue + + if count == 1: + arch, mnem = extract_insn(line) + return (arch, mnem) + + return (None, None) + + +#arch, first_insn = get_first_insn(sys.argv[2]) +#first_insn = first_insn.upper() +#print(arch, first_insn) + +arch = sys.argv[2].upper() + +if arch.upper() == 'AARCH64': + arch = 'AArch64' +elif arch.upper() == 'ARM64': + arch = 'AArch64' + +print(""" +/* Capstone Disassembly Engine, http://www.capstone-engine.org */ +/* By Nguyen Anh Quynh , 2013-2019 */ + +/*===- TableGen'erated file -------------------------------------*- C++ -*-===*\ +|* *| +|* Target Instruction Enum Values and Descriptors *| +|* *| +|* Automatically generated file, do not edit! *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifdef GET_INSTRINFO_ENUM +#undef GET_INSTRINFO_ENUM +""") + +enum_count = 0 + +f = open(sys.argv[1]) +lines = f.readlines() +f.close() + +# 1st enum is register enum +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum {': + enum_count += 1 + print(line.strip()) + continue + + line = line.strip() + if enum_count == 1: + if line == '};': + # done with first enum + break + else: + # skip pseudo instructions + if '__' in line or 'setjmp' in line or 'longjmp' in line or 'Pseudo' in line: + pass + else: + print("\t%s_%s" %(arch, line)) + +print('};\n') + +print("#endif // GET_INSTRINFO_ENUM") + +if arch == 'ARM64': + sys.exit(0) + +print("") +print("#ifdef GET_INSTRINFO_MC_DESC") +print("#undef GET_INSTRINFO_MC_DESC") +print("") +print("#define nullptr 0") +print("") + +in_insts = False + +for line in lines: + if line.strip() == '': + continue + + line = line.rstrip() + + if 'static const MCOperandInfo ' in line: + line2 = line.replace('::', '_') + print(line2) + + elif 'Insts[] = {' in line: + # extern const MCInstrDesc ARMInsts[] = { + line2 = line.replace('extern const ', 'static const ') + print("") + print(line2) + in_insts = True + + elif in_insts: + if line == '};': + print(line) + break + # { 0, 1, 1, 0, 0, 0|(1ULL< " %sys.argv[0]) + sys.exit(1) + + +# lib/Target/X86/X86GenAsmMatcher.inc +# static const MatchEntry MatchTable1[] = { +# { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, }, + +# return (arch, mnem) +def extract_insn(line): + tmp = line.split(',') + insn_raw = tmp[1].strip() + insn_mnem = tmp[0].split(' ')[3] + # X86 mov.s + if '.' in insn_mnem: + tmp = insn_mnem.split('.') + insn_mnem = tmp[0] + tmp = insn_raw.split('::') + arch = tmp[0] + # AArch64 -> ARM64 + if arch.upper() == 'AArch64': + arch = 'ARM64' + return (arch, insn_mnem) + +# get (arch, first insn) from MatchTable +def get_first_insn(filename): + f = open(filename) + lines = f.readlines() + f.close() + count = 0 + for line in lines: + line = line.strip() + + if len(line) == 0: + continue + + # Intel syntax in Table1 + if 'MatchEntry MatchTable1[] = {' in line: + count += 1 + #print(line.strip()) + continue + + if count == 1: + arch, mnem = extract_insn(line) + return (arch, mnem) + + return (None, None) + + +arch, first_insn = get_first_insn(sys.argv[2]) +first_insn = first_insn.upper() +arch = arch.upper() +#print(arch, first_insn) + +print(""" +/* Capstone Disassembly Engine, http://www.capstone-engine.org */ +/* By Nguyen Anh Quynh , 2013-2019 */ + +/*===- TableGen'erated file -------------------------------------*- C++ -*-===*\ +|* *| +|* Target Instruction Enum Values and Descriptors *| +|* *| +|* Automatically generated file, do not edit! *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifdef GET_INSTRINFO_ENUM +#undef GET_INSTRINFO_ENUM +""") + +enum_count = 0 +meet_insn = False + +f = open(sys.argv[1]) +lines = f.readlines() +f.close() + +# 1st enum is register enum +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum {': + enum_count += 1 + print(line.strip()) + continue + + line = line.strip() + if enum_count == 1: + if line == '};': + # done with first enum + break + else: + insn = None + if meet_insn: + # enum items + insn = line + elif line.startswith(first_insn): + insn = line + meet_insn = True + if insn: + print("\t%s_%s" %(arch, line)) + +print('};\n') + +print("#endif // GET_INSTRINFO_ENUM") diff --git a/capstone/suite/synctools/mapping_insn-arch.py b/capstone/suite/synctools/mapping_insn-arch.py new file mode 100755 index 000000000..0bf5a30ec --- /dev/null +++ b/capstone/suite/synctools/mapping_insn-arch.py @@ -0,0 +1,362 @@ +#!/usr/bin/python +# print MappingInsn.inc file from LLVM GenAsmMatcher.inc, for Capstone disassembler. +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s MappingInsn.inc" %sys.argv[0]) + sys.exit(1) + +f = open(sys.argv[3]) +mapping = f.readlines() +f.close() + +print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */ +/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */ +/* By Nguyen Anh Quynh , 2013-2019 */ +""") + +# lib/Target/X86/X86GenAsmMatcher.inc +# static const MatchEntry MatchTable1[] = { +# { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, }, + +# extract insn from GenAsmMatcher Table +# return (arch, mnem, insn_id) +def extract_insn(line): + tmp = line.split(',') + insn_raw = tmp[1].strip() + insn_mnem = tmp[0].split(' ')[3] + # X86 mov.s + if '.' in insn_mnem: + tmp = insn_mnem.split('.') + insn_mnem = tmp[0] + tmp = insn_raw.split('::') + arch = tmp[0] + # AArch64 -> ARM64 + #if arch.upper() == 'AARCH64': + # arch = 'ARM64' + return (arch, insn_mnem, tmp[1]) + + +# extract all insn lines from GenAsmMatcher +# return arch, first_insn, insn_id_list +def extract_matcher(filename): + f = open(filename) + lines = f.readlines() + f.close() + + match_count = 0 + insn_id_list = {} + arch = None + first_insn = None + + pattern = None + # first we try to find Table1, or Table0 + for line in lines: + if 'MatchEntry MatchTable0[] = {' in line.strip(): + pattern = 'MatchEntry MatchTable0[] = {' + elif 'AArch64::' in line and pattern: + # We do not care about Apple Assembly + break + elif 'MatchEntry MatchTable1[] = {' in line.strip(): + pattern = 'MatchEntry MatchTable1[] = {' + # last pattern, done + break + + for line in lines: + line = line.rstrip() + + # skip empty line + if len(line.strip()) == 0: + continue + + if pattern in line.strip(): + match_count += 1 + #print(line.strip()) + continue + + line = line.strip() + if match_count == 1: + if line == '};': + # done with first enum + break + else: + _arch, mnem, insn_id = extract_insn(line) + # skip pseudo instructions + if not mnem.startswith('__'): + # PPC + if mnem.endswith('-') or mnem.endswith('+'): + mnem = mnem[:-1] + + if not first_insn: + arch, first_insn = _arch, insn_id + + if not insn_id in insn_id_list: + # save this + insn_id_list[insn_id] = mnem + + #return arch, first_insn, insn_id_list + return arch, first_insn, insn_id_list + + +#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1]) +arch, first_insn, insn_id_list = extract_matcher(sys.argv[1]) +#arch = arch.upper() +#print("first insn = %s" %first_insn) + +#for line in insn_id_list: +# print(line) + + +#{ +# X86_AAA, X86_INS_AAA, +##ifndef CAPSTONE_DIET +# { 0 }, { 0 }, { X86_GRP_NOT64BITMODE, 0 }, 0, 0 +##endif +#}, +def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong): + #insn = "%s_%s" %(arch.upper(), insn_id) + insn = "%s_%s" %(arch, insn_id) + arch1 = arch + if arch.upper() == 'AARCH64': + arch1 = 'ARM64' + #if '64' in insn_id: + # is64bit = '1' + #else: + # is64bit = '0' + # first, try to find this entry in old MappingInsn.inc file + for i in range(len(mapping)): + tmp = mapping[i].split(',') + if tmp[0].strip() == insn: + if not mnem_can_be_wrong: + print(''' +{ +\t%s, %s_INS_%s, +#ifndef CAPSTONE_DIET +\t%s +#endif +},'''% (insn, arch1, mnem, mapping[i + 2].strip())) + else: # ATTENTION: mnem can be wrong + if not tmp[1].endswith(mnem): + #print("======== cannot find %s, mapping to %s (instead of %s)" %(insn, tmp[1].strip(), mnem)) + pass + print(''' +{ +\t%s, %s, +#ifndef CAPSTONE_DIET +\t%s +#endif +},'''% (insn, tmp[1].strip(), mapping[i + 2].strip())) + + return + + if mnem_can_be_wrong: + #print("======== CANNOT FIND %s, mapping to %s" %(insn, mnem)) + return + pass + + # this insn does not exist in mapping table + print(''' +{ +\t%s, %s_INS_%s, +#ifndef CAPSTONE_DIET +\t{ 0 }, { 0 }, { 0 }, 0, 0 +#endif +},'''% (insn, arch1, mnem)) + + +# extract from GenInstrInfo.inc, because the insn id is in order +enum_count = 0 +meet_insn = False + +f = open(sys.argv[2]) +lines = f.readlines() +f.close() + +count = 0 +last_mnem = None + +# 1st enum is register enum +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + # skip pseudo instructions + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum {': + enum_count += 1 + #print(line.strip()) + continue + + line = line.strip() + if enum_count == 1: + # skip pseudo instructions + if '__' in line or 'setjmp' in line or 'longjmp' in line or 'Pseudo' in line: + continue + elif 'INSTRUCTION_LIST_END' in line: + break + else: + insn = line.split('=')[0].strip() + ''' + insn = None + if meet_insn: + # enum items + insn = line.split('=')[0].strip() + if 'CALLSTACK' in insn or 'TAILJUMP' in insn: + # pseudo instruction + insn = None + elif line.startswith(first_insn): + insn = line.split('=')[0].strip() + meet_insn = True + + if insn: + count += 1 + if insn == 'BSWAP16r_BAD': + last_mnem = 'BSWAP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVNP_Fp32': + last_mnem = 'FCMOVNP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVP_Fp3': + last_mnem = 'FCMOVP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSDrm_Int': + last_mnem = 'CMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'MOVSX16rm16': + last_mnem = 'MOVSX' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'MOVZX16rm16': + last_mnem = 'MOVZX' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'ST_Fp32m': + last_mnem = 'FST' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVNP_Fp64': + last_mnem = 'FCMOVNU' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSDrr_Int': + last_mnem = 'CMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSSrm_Int': + last_mnem = 'CMPSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCMPSDrm_Int': + last_mnem = 'VCMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCMPSSrm_Int': + last_mnem = 'VCMPSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPCMOVYrrr_REV': + last_mnem = 'VPCMOV' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VRNDSCALESDZm': + last_mnem = 'VRNDSCALESD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VRNDSCALESSZm': + last_mnem = 'VRNDSCALESS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCPDZ128rm': + last_mnem = 'VMAXPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCPSZ128rm': + last_mnem = 'VMAXPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCSDZrm': + last_mnem = 'VMAXSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCSSZrm': + last_mnem = 'VMAXSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCPDZ128rm': + last_mnem = 'VMINPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCPSZ128rm': + last_mnem = 'VMINPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCSDZrm': + last_mnem = 'VMINSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCSSZrm': + last_mnem = 'VMINSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMOV64toPQIZrm': + last_mnem = 'VMOVQ' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPERMIL2PDYrr_REV': + last_mnem = 'VPERMILPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPERMIL2PSYrr_REV': + last_mnem = 'VPERMILPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSD2SI64Zrm_Int': + last_mnem = 'VCVTSD2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSD2SSrm_Int': + last_mnem = 'VCVTSD2SS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSS2SI64Zrm_Int': + last_mnem = 'VCVTSS2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTTSD2SI64Zrm_Int': + last_mnem = 'VCVTTSD2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTTSS2SI64Zrm_Int': + last_mnem = 'VCVTTSS2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMSUBADD'): + if insn[len('VFMSUBADD')].isdigit(): + last_mnem = insn[:len('VFMSUBADD123xy')] + else: + last_mnem = insn[:len('VFMSUBADDSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMADDSUB'): + if insn[len('VFMADDSUB')].isdigit(): + last_mnem = insn[:len('VFMADDSUB123xy')] + else: + last_mnem = insn[:len('VFMADDSUBSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMADD'): + if insn[len('VFMADD')].isdigit(): + last_mnem = insn[:len('VFMADD123PD')] + else: + last_mnem = insn[:len('VFMADDPD')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMSUB'): + if insn[len('VFMSUB')].isdigit(): + last_mnem = insn[:len('VFMSUB123PD')] + else: + last_mnem = insn[:len('VFMSUBPD')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFNMADD'): + if insn[len('VFNMADD')].isdigit(): + last_mnem = insn[:len('VFNMADD123xy')] + else: + last_mnem = insn[:len('VFNMADDSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFNMSUB'): + if insn[len('VFNMSUB')].isdigit(): + last_mnem = insn[:len('VFNMSUB123xy')] + else: + last_mnem = insn[:len('VFNMSUBSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + ''' + + if insn in insn_id_list: + # trust old mapping table + last_mnem = insn_id_list[insn].upper() + print_entry(arch, insn, insn_id_list[insn].upper(), mapping, False) + else: + # the last option when we cannot find mnem: use the last good mnem + print_entry(arch, insn, last_mnem, mapping, True) diff --git a/capstone/suite/synctools/mapping_insn.py b/capstone/suite/synctools/mapping_insn.py new file mode 100755 index 000000000..c32ddced0 --- /dev/null +++ b/capstone/suite/synctools/mapping_insn.py @@ -0,0 +1,332 @@ +#!/usr/bin/python +# print MappingInsn.inc file from LLVM GenAsmMatcher.inc, for Capstone disassembler. +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s MappingInsn.inc" %sys.argv[0]) + sys.exit(1) + +f = open(sys.argv[3]) +mapping = f.readlines() +f.close() + +print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */ +/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */ +/* By Nguyen Anh Quynh , 2013-2019 */ +""") + +# lib/Target/X86/X86GenAsmMatcher.inc +# static const MatchEntry MatchTable1[] = { +# { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, }, + +# extract insn from GenAsmMatcher Table +# return (arch, mnem, insn_id) +def extract_insn(line): + tmp = line.split(',') + insn_raw = tmp[1].strip() + insn_mnem = tmp[0].split(' ')[3] + # X86 mov.s + if '.' in insn_mnem: + tmp = insn_mnem.split('.') + insn_mnem = tmp[0] + tmp = insn_raw.split('::') + arch = tmp[0] + # AArch64 -> ARM64 + if arch.upper() == 'AARCH64': + arch = 'ARM64' + return (arch, insn_mnem, tmp[1]) + + + +# extract all insn lines from GenAsmMatcher +# return arch, insn_id_list, insn_lines +def extract_matcher(filename): + f = open(filename) + lines = f.readlines() + f.close() + + match_count = 0 + count = 0 + #insn_lines = [] + insn_id_list = {} + arch = None + first_insn = None + + # 1st enum is register enum + for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'MatchEntry MatchTable1[] = {' in line.strip(): + match_count += 1 + #print(line.strip()) + continue + + line = line.strip() + if match_count == 1: + count += 1 + if line == '};': + # done with first enum + break + else: + _arch, mnem, insn_id = extract_insn(line) + if count == 1: + arch, first_insn = _arch, insn_id + + if not insn_id in insn_id_list: + # print("***", arch, mnem, insn_id) + insn_id_list[insn_id] = mnem + #insn_lines.append(line) + + #return arch, first_insn, insn_id_list, insn_lines + return arch, first_insn, insn_id_list + + +#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1]) +arch, first_insn, insn_id_list = extract_matcher(sys.argv[1]) +arch = arch.upper() + +#for line in insn_id_list: +# print(line) + + +#{ +# X86_AAA, X86_INS_AAA, +##ifndef CAPSTONE_DIET +# { 0 }, { 0 }, { X86_GRP_NOT64BITMODE, 0 }, 0, 0 +##endif +#}, +def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong): + insn = "%s_%s" %(arch.upper(), insn_id) + if '64' in insn_id: + is64bit = '1' + else: + is64bit = '0' + # first, try to find this entry in old MappingInsn.inc file + for i in range(len(mapping)): + tmp = mapping[i].split(',') + if tmp[0].strip() == insn: + if not mnem_can_be_wrong: + print(''' +{ +\t%s_%s, %s_INS_%s, %s, +#ifndef CAPSTONE_DIET +\t%s +#endif +},'''% (arch, insn_id, arch, mnem, is64bit, mapping[i + 2].strip())) + else: + if not tmp[1].endswith(mnem): + #print("======== cannot find %s, mapping to %s (instead of %s)" %(insn, tmp[1].strip(), mnem)) + pass + print(''' +{ +\t%s_%s, %s, %s, +#ifndef CAPSTONE_DIET +\t%s +#endif +},'''% (arch, insn_id, tmp[1].strip(), is64bit, mapping[i + 2].strip())) + + return + + if mnem_can_be_wrong: + #print("======== CANNOT FIND %s, mapping to %s" %(insn, mnem)) + pass + + print(''' +{ +\t%s_%s, %s_INS_%s, %s, +#ifndef CAPSTONE_DIET +\t{ 0 }, { 0 }, { 0 }, 0, 0 +#endif +},'''% (arch, insn_id, arch, mnem, is64bit)) + + +# extract from GenInstrInfo.inc, because the insn id is in order +enum_count = 0 +meet_insn = False + +f = open(sys.argv[2]) +lines = f.readlines() +f.close() + + +count = 0 +last_mnem = None + +# 1st enum is register enum +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum {': + enum_count += 1 + #print(line.strip()) + continue + + line = line.strip() + if enum_count == 1: + if 'INSTRUCTION_LIST_END' in line: + break + else: + insn = None + if meet_insn: + # enum items + insn = line.split('=')[0].strip() + if 'CALLSTACK' in insn or 'TAILJUMP' in insn: + # pseudo instruction + insn = None + elif line.startswith(first_insn): + insn = line.split('=')[0].strip() + meet_insn = True + + if insn: + count += 1 + if insn == 'BSWAP16r_BAD': + last_mnem = 'BSWAP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVNP_Fp32': + last_mnem = 'FCMOVNP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVP_Fp3': + last_mnem = 'FCMOVP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSDrm_Int': + last_mnem = 'CMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'MOVSX16rm16': + last_mnem = 'MOVSX' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'MOVZX16rm16': + last_mnem = 'MOVZX' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'ST_Fp32m': + last_mnem = 'FST' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVNP_Fp64': + last_mnem = 'FCMOVNU' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSDrr_Int': + last_mnem = 'CMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSSrm_Int': + last_mnem = 'CMPSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCMPSDrm_Int': + last_mnem = 'VCMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCMPSSrm_Int': + last_mnem = 'VCMPSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPCMOVYrrr_REV': + last_mnem = 'VPCMOV' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VRNDSCALESDZm': + last_mnem = 'VRNDSCALESD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VRNDSCALESSZm': + last_mnem = 'VRNDSCALESS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCPDZ128rm': + last_mnem = 'VMAXPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCPSZ128rm': + last_mnem = 'VMAXPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCSDZrm': + last_mnem = 'VMAXSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCSSZrm': + last_mnem = 'VMAXSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCPDZ128rm': + last_mnem = 'VMINPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCPSZ128rm': + last_mnem = 'VMINPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCSDZrm': + last_mnem = 'VMINSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCSSZrm': + last_mnem = 'VMINSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMOV64toPQIZrm': + last_mnem = 'VMOVQ' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPERMIL2PDYrr_REV': + last_mnem = 'VPERMILPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPERMIL2PSYrr_REV': + last_mnem = 'VPERMILPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSD2SI64Zrm_Int': + last_mnem = 'VCVTSD2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSD2SSrm_Int': + last_mnem = 'VCVTSD2SS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSS2SI64Zrm_Int': + last_mnem = 'VCVTSS2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTTSD2SI64Zrm_Int': + last_mnem = 'VCVTTSD2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTTSS2SI64Zrm_Int': + last_mnem = 'VCVTTSS2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMSUBADD'): + if insn[len('VFMSUBADD')].isdigit(): + last_mnem = insn[:len('VFMSUBADD123xy')] + else: + last_mnem = insn[:len('VFMSUBADDSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMADDSUB'): + if insn[len('VFMADDSUB')].isdigit(): + last_mnem = insn[:len('VFMADDSUB123xy')] + else: + last_mnem = insn[:len('VFMADDSUBSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMADD'): + if insn[len('VFMADD')].isdigit(): + last_mnem = insn[:len('VFMADD123PD')] + else: + last_mnem = insn[:len('VFMADDPD')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMSUB'): + if insn[len('VFMSUB')].isdigit(): + last_mnem = insn[:len('VFMSUB123PD')] + else: + last_mnem = insn[:len('VFMSUBPD')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFNMADD'): + if insn[len('VFNMADD')].isdigit(): + last_mnem = insn[:len('VFNMADD123xy')] + else: + last_mnem = insn[:len('VFNMADDSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFNMSUB'): + if insn[len('VFNMSUB')].isdigit(): + last_mnem = insn[:len('VFNMSUB123xy')] + else: + last_mnem = insn[:len('VFNMSUBSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn in insn_id_list: + # trust old mapping table + last_mnem = insn_id_list[insn].upper() + print_entry(arch.upper(), insn, insn_id_list[insn].upper(), mapping, False) + else: + # the last option when we cannot find mnem: use the last good mnem + print_entry(arch.upper(), insn, last_mnem, mapping, True) diff --git a/capstone/suite/synctools/mapping_insn_name-arch.py b/capstone/suite/synctools/mapping_insn_name-arch.py new file mode 100755 index 000000000..417ea0374 --- /dev/null +++ b/capstone/suite/synctools/mapping_insn_name-arch.py @@ -0,0 +1,104 @@ +#!/usr/bin/python +# print list of instructions LLVM inc files, for Capstone disassembler. +# this will be put into capstone/.h +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + +print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */ +/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */ +/* By Nguyen Anh Quynh , 2013-2019 */ +""") + +# lib/Target/X86/X86GenAsmMatcher.inc +# static const MatchEntry MatchTable1[] = { +# { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, }, + +# extract insn from GenAsmMatcher Table +# return (arch, mnem, insn_id) +def extract_insn(line): + tmp = line.split(',') + insn_raw = tmp[1].strip() + insn_mnem = tmp[0].split(' ')[3] + # X86 mov.s + if '.' in insn_mnem: + tmp = insn_mnem.split('.') + insn_mnem = tmp[0] + tmp = insn_raw.split('::') + arch = tmp[0] + # AArch64 -> ARM64 + if arch.upper() == 'AARCH64': + arch = 'ARM64' + return (arch, insn_mnem, tmp[1]) + + + +# extract all insn lines from GenAsmMatcher +# return arch, first_insn, insn_id_list +def extract_matcher(filename): + f = open(filename) + lines = f.readlines() + f.close() + + match_count = 0 + mnem_list = [] + insn_id_list = {} + arch = None + first_insn = None + + pattern = None + # first we try to find Table1, or Table0 + for line in lines: + if 'MatchEntry MatchTable0[] = {' in line.strip(): + pattern = 'MatchEntry MatchTable0[] = {' + elif 'MatchEntry MatchTable1[] = {' in line.strip(): + pattern = 'MatchEntry MatchTable1[] = {' + # last pattern, done + break + + # 1st enum is register enum + for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if pattern in line.strip(): + match_count += 1 + #print(line.strip()) + continue + + line = line.strip() + if match_count == 1: + if line == '};': + # done with first enum + break + else: + _arch, mnem, insn_id = extract_insn(line) + # skip pseudo instructions + if not mnem.startswith('__'): + # PPC + if mnem.endswith('-') or mnem.endswith('+'): + mnem = mnem[:-1] + + if not first_insn: + arch, first_insn = _arch, insn_id + + if not insn_id in insn_id_list: + # save this + insn_id_list[insn_id] = mnem + + if not mnem in mnem_list: + print('\t"%s", // %s_INS_%s,' %(mnem.lower(), arch, mnem.upper())) + mnem_list.append(mnem) + + #return arch, first_insn, insn_id_list + return arch, first_insn, insn_id_list + +# GenAsmMatcher.inc +#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1]) +arch, first_insn, insn_id_list = extract_matcher(sys.argv[1]) diff --git a/capstone/suite/synctools/mapping_insn_name.py b/capstone/suite/synctools/mapping_insn_name.py new file mode 100755 index 000000000..5f51c3b12 --- /dev/null +++ b/capstone/suite/synctools/mapping_insn_name.py @@ -0,0 +1,314 @@ +#!/usr/bin/python +# print list of instructions LLVM inc files, for Capstone disassembler. +# this will be put into capstone/.h +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s MappingInsn.inc" %sys.argv[0]) + sys.exit(1) + +f = open(sys.argv[3]) +mapping = f.readlines() +f.close() + +print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */ +/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */ +/* By Nguyen Anh Quynh , 2013-2019 */ +""") + +# lib/Target/X86/X86GenAsmMatcher.inc +# static const MatchEntry MatchTable1[] = { +# { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, }, + +# extract insn from GenAsmMatcher Table +# return (arch, mnem, insn_id) +def extract_insn(line): + tmp = line.split(',') + insn_raw = tmp[1].strip() + insn_mnem = tmp[0].split(' ')[3] + # X86 mov.s + if '.' in insn_mnem: + tmp = insn_mnem.split('.') + insn_mnem = tmp[0] + tmp = insn_raw.split('::') + arch = tmp[0] + # AArch64 -> ARM64 + if arch.upper() == 'AARCH64': + arch = 'ARM64' + return (arch, insn_mnem, tmp[1]) + + + +# extract all insn lines from GenAsmMatcher +# return arch, insn_id_list, insn_lines +def extract_matcher(filename): + f = open(filename) + lines = f.readlines() + f.close() + + match_count = 0 + count = 0 + #insn_lines = [] + insn_id_list = {} + arch = None + first_insn = None + + # 1st enum is register enum + for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'MatchEntry MatchTable1[] = {' in line.strip(): + match_count += 1 + #print(line.strip()) + continue + + line = line.strip() + if match_count == 1: + count += 1 + if line == '};': + # done with first enum + break + else: + _arch, mnem, insn_id = extract_insn(line) + if count == 1: + arch, first_insn = _arch, insn_id + + if not insn_id in insn_id_list: + # print("***", arch, mnem, insn_id) + insn_id_list[insn_id] = mnem + #insn_lines.append(line) + + #return arch, first_insn, insn_id_list, insn_lines + return arch, first_insn, insn_id_list + + +#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1]) +arch, first_insn, insn_id_list = extract_matcher(sys.argv[1]) +arch = arch.upper() + +#for line in insn_id_list: +# print(line) + + +insn_list = [] +#{ +# X86_AAA, X86_INS_AAA, +##ifndef CAPSTONE_DIET +# { 0 }, { 0 }, { X86_GRP_NOT64BITMODE, 0 }, 0, 0 +##endif +#}, +def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong): + if not mnem_can_be_wrong: + insn = "%s_INS_%s" %(arch.upper(), mnem.upper()) + if insn in insn_list: + return + print('\t"%s", // %s' %(mnem.lower(), insn)) + insn_list.append(insn) + return + + insn = "%s_%s" %(arch.upper(), insn_id) + # so mnem can be wrong, we need to verify with MappingInsn.inc + # first, try to find this entry in old MappingInsn.inc file + for i in range(len(mapping)): + tmp = mapping[i].split(',') + if tmp[0].strip() == insn: + insn = tmp[1].strip() + if insn in insn_list: + return + mnem = insn[len("%s_INS_" %(arch)):] + #print("==== get below from MappingInsn.inc file: %s" %insn) + print('\t"%s", // %s' %(mnem.lower(), insn)) + insn_list.append(insn) + return + + +# extract from GenInstrInfo.inc, because the insn id is in order +enum_count = 0 +meet_insn = False + +f = open(sys.argv[2]) +lines = f.readlines() +f.close() + + +count = 0 +last_mnem = None + +# 1st enum is register enum +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum {': + enum_count += 1 + #print(line.strip()) + continue + + line = line.strip() + if enum_count == 1: + if 'INSTRUCTION_LIST_END' in line: + break + else: + insn = None + if meet_insn: + # enum items + insn = line.split('=')[0].strip() + if 'CALLSTACK' in insn or 'TAILJUMP' in insn: + # pseudo instruction + insn = None + elif line.startswith(first_insn): + insn = line.split('=')[0].strip() + meet_insn = True + + if insn: + count += 1 + if insn == 'BSWAP16r_BAD': + last_mnem = 'BSWAP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVNP_Fp32': + last_mnem = 'FCMOVNP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVP_Fp3': + last_mnem = 'FCMOVP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSDrm_Int': + last_mnem = 'CMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'MOVSX16rm16': + last_mnem = 'MOVSX' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'MOVZX16rm16': + last_mnem = 'MOVZX' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'ST_Fp32m': + last_mnem = 'FST' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVNP_Fp64': + last_mnem = 'FCMOVNU' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSDrr_Int': + last_mnem = 'CMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSSrm_Int': + last_mnem = 'CMPSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCMPSDrm_Int': + last_mnem = 'VCMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCMPSSrm_Int': + last_mnem = 'VCMPSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPCMOVYrrr_REV': + last_mnem = 'VPCMOV' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VRNDSCALESDZm': + last_mnem = 'VRNDSCALESD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VRNDSCALESSZm': + last_mnem = 'VRNDSCALESS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCPDZ128rm': + last_mnem = 'VMAXPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCPSZ128rm': + last_mnem = 'VMAXPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCSDZrm': + last_mnem = 'VMAXSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCSSZrm': + last_mnem = 'VMAXSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCPDZ128rm': + last_mnem = 'VMINPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCPSZ128rm': + last_mnem = 'VMINPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCSDZrm': + last_mnem = 'VMINSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCSSZrm': + last_mnem = 'VMINSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMOV64toPQIZrm': + last_mnem = 'VMOVQ' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPERMIL2PDYrr_REV': + last_mnem = 'VPERMILPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPERMIL2PSYrr_REV': + last_mnem = 'VPERMILPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSD2SI64Zrm_Int': + last_mnem = 'VCVTSD2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSD2SSrm_Int': + last_mnem = 'VCVTSD2SS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSS2SI64Zrm_Int': + last_mnem = 'VCVTSS2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTTSD2SI64Zrm_Int': + last_mnem = 'VCVTTSD2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTTSS2SI64Zrm_Int': + last_mnem = 'VCVTTSS2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMSUBADD'): + if insn[len('VFMSUBADD')].isdigit(): + last_mnem = insn[:len('VFMSUBADD123xy')] + else: + last_mnem = insn[:len('VFMSUBADDSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMADDSUB'): + if insn[len('VFMADDSUB')].isdigit(): + last_mnem = insn[:len('VFMADDSUB123xy')] + else: + last_mnem = insn[:len('VFMADDSUBSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMADD'): + if insn[len('VFMADD')].isdigit(): + last_mnem = insn[:len('VFMADD123PD')] + else: + last_mnem = insn[:len('VFMADDPD')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMSUB'): + if insn[len('VFMSUB')].isdigit(): + last_mnem = insn[:len('VFMSUB123PD')] + else: + last_mnem = insn[:len('VFMSUBPD')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFNMADD'): + if insn[len('VFNMADD')].isdigit(): + last_mnem = insn[:len('VFNMADD123xy')] + else: + last_mnem = insn[:len('VFNMADDSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFNMSUB'): + if insn[len('VFNMSUB')].isdigit(): + last_mnem = insn[:len('VFNMSUB123xy')] + else: + last_mnem = insn[:len('VFNMSUBSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn in insn_id_list: + # trust old mapping table + last_mnem = insn_id_list[insn].upper() + print_entry(arch.upper(), insn, last_mnem, mapping, False) + else: + # the last option when we cannot find mnem: use the last good mnem + print_entry(arch.upper(), insn, last_mnem, mapping, True) diff --git a/capstone/suite/synctools/mapping_insn_op-arch.py b/capstone/suite/synctools/mapping_insn_op-arch.py new file mode 100755 index 000000000..060dbc84e --- /dev/null +++ b/capstone/suite/synctools/mapping_insn_op-arch.py @@ -0,0 +1,379 @@ +#!/usr/bin/python +# print MappingInsn.inc file from LLVM GenAsmMatcher.inc, for Capstone disassembler. +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + +f = open(sys.argv[3]) +mapping = f.readlines() +f.close() + +print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */ +/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */ +/* By Nguyen Anh Quynh , 2013-2019 */ +""") + +# lib/Target/X86/X86GenAsmMatcher.inc +# static const MatchEntry MatchTable1[] = { +# { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, }, + +# extract insn from GenAsmMatcher Table +# return (arch, mnem, insn_id) +def extract_insn(line): + tmp = line.split(',') + insn_raw = tmp[1].strip() + insn_mnem = tmp[0].split(' ')[3] + # X86 mov.s + if '.' in insn_mnem: + tmp = insn_mnem.split('.') + insn_mnem = tmp[0] + tmp = insn_raw.split('::') + arch = tmp[0] + # AArch64 -> ARM64 + #if arch.upper() == 'AARCH64': + # arch = 'ARM64' + return (arch, insn_mnem, tmp[1]) + + +# extract all insn lines from GenAsmMatcher +# return arch, first_insn, insn_id_list +def extract_matcher(filename): + f = open(filename) + lines = f.readlines() + f.close() + + match_count = 0 + insn_id_list = {} + arch = None + first_insn = None + + pattern = None + # first we try to find Table1, or Table0 + for line in lines: + if 'MatchEntry MatchTable0[] = {' in line.strip(): + pattern = 'MatchEntry MatchTable0[] = {' + elif 'AArch64::' in line and pattern: + # We do not care about Apple Assembly + break + elif 'MatchEntry MatchTable1[] = {' in line.strip(): + pattern = 'MatchEntry MatchTable1[] = {' + # last pattern, done + break + + for line in lines: + line = line.rstrip() + + # skip empty line + if len(line.strip()) == 0: + continue + + if pattern in line.strip(): + match_count += 1 + #print(line.strip()) + continue + + line = line.strip() + if match_count == 1: + if line == '};': + # done with first enum + break + else: + _arch, mnem, insn_id = extract_insn(line) + # skip pseudo instructions + if not mnem.startswith('__'): + if not first_insn: + arch, first_insn = _arch, insn_id + if not insn_id in insn_id_list: + # save this + insn_id_list[insn_id] = mnem + + #return arch, first_insn, insn_id_list + return arch, first_insn, insn_id_list + + +#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1]) +arch, first_insn, insn_id_list = extract_matcher(sys.argv[1]) +#arch = arch.upper() + +#for line in insn_id_list: +# print(line) + +#{ /* X86_AAA, X86_INS_AAA: aaa */ +# X86_EFLAGS_UNDEFINED_OF | X86_EFLAGS_UNDEFINED_SF | X86_EFLAGS_UNDEFINED_ZF | X86_EFLAGS_MODIFY_AF | X86_EFLAGS_UNDEFINED_PF | X86_EFLAGS_MODIFY_CF, +# { 0 } +#}, + +#{ /* ARM_ADCri, ARM_INS_ADC: adc${s}${p} $rd, $rn, $imm */ +# { CS_AC_WRITE, CS_AC_READ, 0 } +#}, + +def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong): + insn = "%s_%s" %(arch, insn_id) + arch1 = arch + if arch.upper() == 'AARCH64': + arch1 = 'ARM64' + # first, try to find this entry in old MappingInsn.inc file + for i in range(len(mapping)): + if mapping[i].startswith('{') and '/*' in mapping[i]: + #print(mapping[i]) + tmp = mapping[i].split('/*') + tmp = tmp[1].strip() + tmp = tmp.split(',') + #print("insn2 = |%s|" %tmp.strip()) + if tmp[0].strip() == insn: + if not mnem_can_be_wrong: + if arch.upper() == 'ARM': + print(''' +{\t/* %s, %s_INS_%s: %s */ +\t%s +},'''% (insn, arch1, mnem, mnem.lower(), mapping[i + 1].strip())) + else: # ARM64 + print(''' +{\t/* %s, %s_INS_%s: %s */ +\t%s +\t%s +},'''% (insn, arch, mnem, mnem.lower(), mapping[i + 1].strip(), mapping[i + 2].strip())) + else: + if arch.upper() == 'ARM': + print(''' +{\t/* %s, %s +\t%s +},'''% (insn, ''.join(tmp[1:]), mapping[i + 1].strip())) + else: # ARM64 + print(''' +{\t/* %s, %s +\t%s +\t%s +},'''% (insn, ''.join(tmp[1:]), mapping[i + 1].strip(), mapping[i + 2].strip())) + + return + + if mnem_can_be_wrong: + #print("======== CANNOT FIND %s, mapping to %s" %(insn, mnem)) + return + pass + + # this insn does not exist in mapping table + if arch.upper() == 'ARM': + print(''' +{\t/* %s, %s_INS_%s: %s */ +\t{ 0 } +},'''% (insn, arch1, mnem, mnem.lower())) + else: + print(''' +{\t/* %s, %s_INS_%s: %s */ +\t0, +\t{ 0 } +},'''% (insn, arch, mnem, mnem.lower())) + + +# extract from GenInstrInfo.inc, because the insn id is in order +enum_count = 0 +meet_insn = False + +f = open(sys.argv[2]) +lines = f.readlines() +f.close() + + +count = 0 +last_mnem = None + + +def is_pseudo_insn(insn, lines): + return False + for line in lines: + tmp = '= %s' %insn + if tmp in line and 'MCID::Pseudo' in line: + return True + return False + + +# 1st enum is register enum +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum {': + enum_count += 1 + #print(line.strip()) + continue + + line = line.strip() + if enum_count == 1: + # skip pseudo instructions + if '__' in line or 'setjmp' in line or 'longjmp' in line or 'Pseudo' in line: + continue + elif 'INSTRUCTION_LIST_END' in line: + break + else: + insn = line.split('=')[0].strip() + + # skip more pseudo instruction + if is_pseudo_insn(insn, lines): + continue + ''' + insn = None + if meet_insn: + # enum items + insn = line.split('=')[0].strip() + if 'CALLSTACK' in insn or 'TAILJUMP' in insn: + # pseudo instruction + insn = None + elif line.startswith(first_insn): + insn = line.split('=')[0].strip() + meet_insn = True + + if insn: + count += 1 + if insn == 'BSWAP16r_BAD': + last_mnem = 'BSWAP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVNP_Fp32': + last_mnem = 'FCMOVNP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVP_Fp3': + last_mnem = 'FCMOVP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSDrm_Int': + last_mnem = 'CMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'MOVSX16rm16': + last_mnem = 'MOVSX' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'MOVZX16rm16': + last_mnem = 'MOVZX' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'ST_Fp32m': + last_mnem = 'FST' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVNP_Fp64': + last_mnem = 'FCMOVNU' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSDrr_Int': + last_mnem = 'CMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSSrm_Int': + last_mnem = 'CMPSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCMPSDrm_Int': + last_mnem = 'VCMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCMPSSrm_Int': + last_mnem = 'VCMPSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPCMOVYrrr_REV': + last_mnem = 'VPCMOV' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VRNDSCALESDZm': + last_mnem = 'VRNDSCALESD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VRNDSCALESSZm': + last_mnem = 'VRNDSCALESS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCPDZ128rm': + last_mnem = 'VMAXPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCPSZ128rm': + last_mnem = 'VMAXPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCSDZrm': + last_mnem = 'VMAXSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCSSZrm': + last_mnem = 'VMAXSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCPDZ128rm': + last_mnem = 'VMINPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCPSZ128rm': + last_mnem = 'VMINPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCSDZrm': + last_mnem = 'VMINSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCSSZrm': + last_mnem = 'VMINSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMOV64toPQIZrm': + last_mnem = 'VMOVQ' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPERMIL2PDYrr_REV': + last_mnem = 'VPERMILPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPERMIL2PSYrr_REV': + last_mnem = 'VPERMILPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSD2SI64Zrm_Int': + last_mnem = 'VCVTSD2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSD2SSrm_Int': + last_mnem = 'VCVTSD2SS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSS2SI64Zrm_Int': + last_mnem = 'VCVTSS2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTTSD2SI64Zrm_Int': + last_mnem = 'VCVTTSD2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTTSS2SI64Zrm_Int': + last_mnem = 'VCVTTSS2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMSUBADD'): + if insn[len('VFMSUBADD')].isdigit(): + last_mnem = insn[:len('VFMSUBADD123xy')] + else: + last_mnem = insn[:len('VFMSUBADDSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMADDSUB'): + if insn[len('VFMADDSUB')].isdigit(): + last_mnem = insn[:len('VFMADDSUB123xy')] + else: + last_mnem = insn[:len('VFMADDSUBSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMADD'): + if insn[len('VFMADD')].isdigit(): + last_mnem = insn[:len('VFMADD123PD')] + else: + last_mnem = insn[:len('VFMADDPD')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMSUB'): + if insn[len('VFMSUB')].isdigit(): + last_mnem = insn[:len('VFMSUB123PD')] + else: + last_mnem = insn[:len('VFMSUBPD')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFNMADD'): + if insn[len('VFNMADD')].isdigit(): + last_mnem = insn[:len('VFNMADD123xy')] + else: + last_mnem = insn[:len('VFNMADDSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFNMSUB'): + if insn[len('VFNMSUB')].isdigit(): + last_mnem = insn[:len('VFNMSUB123xy')] + else: + last_mnem = insn[:len('VFNMSUBSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + ''' + + if insn in insn_id_list: + # trust old mapping table + last_mnem = insn_id_list[insn].upper() + print_entry(arch, insn, insn_id_list[insn].upper(), mapping, False) + else: + #pass + # the last option when we cannot find mnem: use the last good mnem + print_entry(arch, insn, last_mnem, mapping, True) diff --git a/capstone/suite/synctools/mapping_insn_op.py b/capstone/suite/synctools/mapping_insn_op.py new file mode 100755 index 000000000..95c46f1be --- /dev/null +++ b/capstone/suite/synctools/mapping_insn_op.py @@ -0,0 +1,318 @@ +#!/usr/bin/python +# print MappingInsn.inc file from LLVM GenAsmMatcher.inc, for Capstone disassembler. +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + +f = open(sys.argv[3]) +mapping = f.readlines() +f.close() + +print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */ +/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */ +/* By Nguyen Anh Quynh , 2013-2019 */ +""") + +# lib/Target/X86/X86GenAsmMatcher.inc +# static const MatchEntry MatchTable1[] = { +# { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, }, + +# extract insn from GenAsmMatcher Table +# return (arch, mnem, insn_id) +def extract_insn(line): + tmp = line.split(',') + insn_raw = tmp[1].strip() + insn_mnem = tmp[0].split(' ')[3] + # X86 mov.s + if '.' in insn_mnem: + tmp = insn_mnem.split('.') + insn_mnem = tmp[0] + tmp = insn_raw.split('::') + arch = tmp[0] + # AArch64 -> ARM64 + if arch.upper() == 'AARCH64': + arch = 'ARM64' + return (arch, insn_mnem, tmp[1]) + + + +# extract all insn lines from GenAsmMatcher +# return arch, insn_id_list, insn_lines +def extract_matcher(filename): + f = open(filename) + lines = f.readlines() + f.close() + + match_count = 0 + count = 0 + #insn_lines = [] + insn_id_list = {} + arch = None + first_insn = None + + # 1st enum is register enum + for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'MatchEntry MatchTable1[] = {' in line.strip(): + match_count += 1 + #print(line.strip()) + continue + + line = line.strip() + if match_count == 1: + count += 1 + if line == '};': + # done with first enum + break + else: + _arch, mnem, insn_id = extract_insn(line) + if count == 1: + arch, first_insn = _arch, insn_id + + if not insn_id in insn_id_list: + # print("***", arch, mnem, insn_id) + insn_id_list[insn_id] = mnem + #insn_lines.append(line) + + #return arch, first_insn, insn_id_list, insn_lines + return arch, first_insn, insn_id_list + + +#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1]) +arch, first_insn, insn_id_list = extract_matcher(sys.argv[1]) +arch = arch.upper() + +#for line in insn_id_list: +# print(line) + +#{ /* X86_AAA, X86_INS_AAA: aaa */ +# X86_EFLAGS_UNDEFINED_OF | X86_EFLAGS_UNDEFINED_SF | X86_EFLAGS_UNDEFINED_ZF | X86_EFLAGS_MODIFY_AF | X86_EFLAGS_UNDEFINED_PF | X86_EFLAGS_MODIFY_CF, +# { 0 } +#}, +def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong): + insn = "%s_%s" %(arch, insn_id) + # first, try to find this entry in old MappingInsn.inc file + for i in range(len(mapping)): + if mapping[i].startswith('{') and '/*' in mapping[i]: + #print(mapping[i]) + tmp = mapping[i].split('/*') + tmp = tmp[1].strip() + tmp = tmp.split(',') + #print("insn2 = |%s|" %tmp.strip()) + if tmp[0].strip() == insn: + if not mnem_can_be_wrong: + print(''' +{\t/* %s, %s_INS_%s: %s */ +\t%s +\t%s +},'''% (insn, arch, mnem, mnem.lower(), mapping[i + 1].strip(), mapping[i + 2].strip())) + else: + print(''' +{\t/* %s, %s +\t%s +\t%s +},'''% (insn, ''.join(tmp[1:]).strip(), mapping[i + 1].strip(), mapping[i + 2].strip())) + + return + + print(''' +{\t/* %s, %s_INS_%s: %s */ +\t0, +\t{ 0 } +},'''% (insn, arch, mnem, mnem.lower())) + + + +# extract from GenInstrInfo.inc, because the insn id is in order +enum_count = 0 +meet_insn = False + +f = open(sys.argv[2]) +lines = f.readlines() +f.close() + + +count = 0 +last_mnem = None + +# 1st enum is register enum +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum {': + enum_count += 1 + #print(line.strip()) + continue + + line = line.strip() + if enum_count == 1: + if 'INSTRUCTION_LIST_END' in line: + break + else: + insn = None + if meet_insn: + # enum items + insn = line.split('=')[0].strip() + if 'CALLSTACK' in insn or 'TAILJUMP' in insn: + # pseudo instruction + insn = None + elif line.startswith(first_insn): + insn = line.split('=')[0].strip() + meet_insn = True + + if insn: + count += 1 + if insn == 'BSWAP16r_BAD': + last_mnem = 'BSWAP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVNP_Fp32': + last_mnem = 'FCMOVNP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVP_Fp3': + last_mnem = 'FCMOVP' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSDrm_Int': + last_mnem = 'CMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'MOVSX16rm16': + last_mnem = 'MOVSX' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'MOVZX16rm16': + last_mnem = 'MOVZX' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'ST_Fp32m': + last_mnem = 'FST' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMOVNP_Fp64': + last_mnem = 'FCMOVNU' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSDrr_Int': + last_mnem = 'CMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'CMPSSrm_Int': + last_mnem = 'CMPSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCMPSDrm_Int': + last_mnem = 'VCMPSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCMPSSrm_Int': + last_mnem = 'VCMPSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPCMOVYrrr_REV': + last_mnem = 'VPCMOV' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VRNDSCALESDZm': + last_mnem = 'VRNDSCALESD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VRNDSCALESSZm': + last_mnem = 'VRNDSCALESS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCPDZ128rm': + last_mnem = 'VMAXPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCPSZ128rm': + last_mnem = 'VMAXPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCSDZrm': + last_mnem = 'VMAXSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMAXCSSZrm': + last_mnem = 'VMAXSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCPDZ128rm': + last_mnem = 'VMINPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCPSZ128rm': + last_mnem = 'VMINPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCSDZrm': + last_mnem = 'VMINSD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMINCSSZrm': + last_mnem = 'VMINSS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VMOV64toPQIZrm': + last_mnem = 'VMOVQ' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPERMIL2PDYrr_REV': + last_mnem = 'VPERMILPD' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VPERMIL2PSYrr_REV': + last_mnem = 'VPERMILPS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSD2SI64Zrm_Int': + last_mnem = 'VCVTSD2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSD2SSrm_Int': + last_mnem = 'VCVTSD2SS' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTSS2SI64Zrm_Int': + last_mnem = 'VCVTSS2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTTSD2SI64Zrm_Int': + last_mnem = 'VCVTTSD2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + elif insn == 'VCVTTSS2SI64Zrm_Int': + last_mnem = 'VCVTTSS2SI' + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMSUBADD'): + if insn[len('VFMSUBADD')].isdigit(): + last_mnem = insn[:len('VFMSUBADD123xy')] + else: + last_mnem = insn[:len('VFMSUBADDSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMADDSUB'): + if insn[len('VFMADDSUB')].isdigit(): + last_mnem = insn[:len('VFMADDSUB123xy')] + else: + last_mnem = insn[:len('VFMADDSUBSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMADD'): + if insn[len('VFMADD')].isdigit(): + last_mnem = insn[:len('VFMADD123PD')] + else: + last_mnem = insn[:len('VFMADDPD')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFMSUB'): + if insn[len('VFMSUB')].isdigit(): + last_mnem = insn[:len('VFMSUB123PD')] + else: + last_mnem = insn[:len('VFMSUBPD')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFNMADD'): + if insn[len('VFNMADD')].isdigit(): + last_mnem = insn[:len('VFNMADD123xy')] + else: + last_mnem = insn[:len('VFNMADDSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn.startswith('VFNMSUB'): + if insn[len('VFNMSUB')].isdigit(): + last_mnem = insn[:len('VFNMSUB123xy')] + else: + last_mnem = insn[:len('VFNMSUBSS')] + print_entry(arch.upper(), insn, last_mnem, mapping, False) + + elif insn in insn_id_list: + # trust old mapping table + last_mnem = insn_id_list[insn].upper() + print_entry(arch.upper(), insn, insn_id_list[insn].upper(), mapping, False) + else: + # the last option when we cannot find mnem: use the last good mnem + print_entry(arch.upper(), insn, last_mnem, mapping, True) diff --git a/capstone/suite/synctools/mapping_reg.py b/capstone/suite/synctools/mapping_reg.py new file mode 100755 index 000000000..0d061c551 --- /dev/null +++ b/capstone/suite/synctools/mapping_reg.py @@ -0,0 +1,53 @@ +#!/usr/bin/python +# print out all registers from LLVM GenRegisterInfo.inc for Capstone disassembler. +# NOTE: the list then must be filtered, manually. +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + +f = open(sys.argv[1]) +lines = f.readlines() +f.close() + +arch = sys.argv[2].upper() + +enum_count = 0 + +print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */ +/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */ +/* By Nguyen Anh Quynh , 2013-2019 */ +""") + +# 1st enum is register enum +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum {': + enum_count += 1 + continue + + if enum_count == 1: + if line == '};': + # done with first enum + break + else: + # enum items + if 'NoRegister' in line or 'TARGET_REGS' in line: + continue + reg = line.strip().split('=')[0].strip() + if reg.startswith('H') or reg.endswith('PH') or reg.endswith('IH') or reg.endswith('WH'): + print("{ %s_%s, 0 }," %(arch, reg)) + elif 'K' in reg or 'BND' in reg: + print("{ %s_%s, 0 }," %(arch, reg)) + elif reg in ('DF', 'SSP', 'R8BH', 'R9BH', 'R10BH', 'R11BH', 'R12BH', 'R13BH', 'R14BH', 'R15BH'): + print("{ %s_%s, 0 }," %(arch, reg)) + else: + print("{ %s_%s, %s_REG_%s }," %(arch, reg, arch, reg)) + diff --git a/capstone/suite/synctools/register.py b/capstone/suite/synctools/register.py new file mode 100755 index 000000000..3176f695f --- /dev/null +++ b/capstone/suite/synctools/register.py @@ -0,0 +1,48 @@ +#!/usr/bin/python +# print out all registers from LLVM GenRegisterInfo.inc for Capstone disassembler. +# NOTE: the list then must be filtered, manually. +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + +f = open(sys.argv[1]) +lines = f.readlines() +f.close() + +arch = sys.argv[2].upper() + +enum_count = 0 + +# 1st enum is register enum +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum {': + enum_count += 1 + continue + + if enum_count == 1: + if line == '};': + # done with first enum + break + else: + # enum items + if 'NoRegister' in line or 'TARGET_REGS' in line: + continue + reg = line.strip().split('=')[0].strip() + if reg.startswith('H') or reg.endswith('PH') or or reg.endswith('IH') or or reg.endswith('WH'): + print(" %s_REG_%s = REMOVE," %(arch, reg)) + elif 'K' in reg or 'BND' in reg: + print(" %s_REG_%s = REMOVE," %(arch, reg)) + elif reg in ('DF', 'SSP', 'R8BH', 'R9BH', 'R10BH', 'R11BH', 'R12BH', 'R13BH', 'R14BH', 'R15BH'): + print(" %s_REG_%s = REMOVE," %(arch, reg)) + else: + print(" %s_REG_%s," %(arch, reg)) + diff --git a/capstone/suite/synctools/registerinfo.py b/capstone/suite/synctools/registerinfo.py new file mode 100755 index 000000000..27e05d287 --- /dev/null +++ b/capstone/suite/synctools/registerinfo.py @@ -0,0 +1,286 @@ +#!/usr/bin/python +# convert LLVM GenRegisterInfo.inc for Capstone disassembler. +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + +f = open(sys.argv[1]) +lines = f.readlines() +f.close() + +arch = sys.argv[2] + +print(""" +/* Capstone Disassembly Engine, http://www.capstone-engine.org */ +/* By Nguyen Anh Quynh , 2013-2019 */ + +/*===- TableGen'erated file -------------------------------------*- C++ -*-===*\\ +|* *| +|* Target Register Enum Values *| +|* *| +|* Automatically generated file, do not edit! *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifdef GET_REGINFO_ENUM +#undef GET_REGINFO_ENUM +""") + +enum_count = 0 + +# 1st enum is register enum +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum {': + enum_count += 1 + print(line) + continue + + if enum_count == 1: + if line.strip() == '};': + print(line) + # done with first enum + break + else: + # enum items + print(" %s_%s" %(arch, line.strip())) + +# 2nd enum is register class +enum_count = 0 +print("\n// Register classes") +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum {': + enum_count += 1 + if enum_count == 2: + print(line) + continue + + if enum_count == 2: + if line.strip() == '};': + # done with 2nd enum + print(line.strip()) + break + else: + # enum items + print(" %s_%s" %(arch, line.strip())) + +if arch.upper() == 'ARM': + # 3rd enum is Subregister indices + enum_count = 0 + print("\n// Subregister indices") + for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum {': + enum_count += 1 + if enum_count == 3: + print(line) + continue + + if enum_count == 3: + if line.strip() == '};': + # done with 2nd enum + print(line.strip()) + break + else: + # enum items + print(" %s_%s" %(arch, line.strip())) + +if arch.upper() == 'AARCH64': + # 3rd enum is Register alternate name indices + enum_count = 0 + print("\n// Register alternate name indices") + for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum {': + enum_count += 1 + if enum_count == 3: + print(line) + continue + + if enum_count == 3: + if line.strip() == '};': + # done with 2nd enum + print(line.strip()) + break + else: + # enum items + print(" %s_%s" %(arch, line.strip())) + + # 4th enum is Subregister indices + enum_count = 0 + print("\n// Subregister indices") + for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum {': + enum_count += 1 + if enum_count == 4: + print(line) + continue + + if enum_count == 4: + if line.strip() == '};': + # done with 2nd enum + print(line.strip()) + break + else: + # enum items + print(" %s_%s" %(arch, line.strip())) + +# end of enum +print("") +print("#endif // GET_REGINFO_ENUM") + +print(""" +#ifdef GET_REGINFO_MC_DESC +#undef GET_REGINFO_MC_DESC + +""") + +# extract RegDiffLists +finding_struct = True +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if arch + 'RegDiffLists' in line: + finding_struct = False + print("static const MCPhysReg " + arch + "RegDiffLists[] = {") + continue + + if finding_struct: + continue + else: + print(line) + if line == '};': + # done with this struct + print("") + break + +# extract SubRegIdxLists +finding_struct = True +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if arch + 'SubRegIdxLists' in line: + finding_struct = False + print("static const uint16_t " + arch + "SubRegIdxLists[] = {") + continue + + if finding_struct: + continue + else: + print(line) + if line == '};': + # done with this struct + print("") + break + +# extract RegDesc +finding_struct = True +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if arch + 'RegDesc' in line: + finding_struct = False + print("static const MCRegisterDesc " + arch + "RegDesc[] = {") + continue + + if finding_struct: + continue + else: + print(line) + if line == '};': + # done with this struct + print("") + break + +# extract register classes +finding_struct = True +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'Register classes' in line and 'namespace' in line: + finding_struct = False + continue + + if finding_struct: + continue + else: + if 'const' in line: + line2 = line.replace('const', 'static const') + print(line2) + elif '::' in line: + line2 = line.replace('::', '_') + print(line2) + elif 'end anonymous namespace' in line: + # done with this struct + break + else: + print(line) + +print("\n") + +# extract MCRegisterClasses +finding_struct = True +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'MCRegisterClass ' + arch + 'MCRegisterClasses[] = {' in line: + finding_struct = False + print("static const MCRegisterClass " + arch + "MCRegisterClasses[] = {") + continue + + if finding_struct: + continue + else: + if line == '};': + # done with this struct + print('};\n') + break + elif '::' in line: + line = line.replace('::', '_') + + # { GR8, GR8Bits, 130, 20, sizeof(GR8Bits), X86_GR8RegClassID, 1, 1, 1, 1 }, + tmp = line.split(',') + print(" %s, %s, %s }," %(tmp[0].strip(), tmp[1].strip(), tmp[4].strip())) + +print("#endif // GET_REGINFO_MC_DESC") diff --git a/capstone/suite/synctools/strinforeduce/Makefile b/capstone/suite/synctools/strinforeduce/Makefile new file mode 100644 index 000000000..d533f9a11 --- /dev/null +++ b/capstone/suite/synctools/strinforeduce/Makefile @@ -0,0 +1,10 @@ +all: full reduce + +full: + g++ strinforeduce.cpp -o strinforeduce + +reduce: + g++ -DCAPSTONE_X86_REDUCE strinforeduce.cpp -o strinforeduce_reduce + +clean: + rm -rf strinforeduce strinforeduce_reduce diff --git a/capstone/suite/synctools/strinforeduce/README b/capstone/suite/synctools/strinforeduce/README new file mode 100644 index 000000000..e4ba5cf9d --- /dev/null +++ b/capstone/suite/synctools/strinforeduce/README @@ -0,0 +1,15 @@ +- Run instroinfo2.py on X86GenInstrInfo.inc & X86GenInstrInfo_reduce.inc + + $ ./instrinfo2.py ../tablegen/X86GenInstrInfo.inc > X86GenInstrInfo.inc + $ ./instrinfo2.py ../tablegen/X86GenInstrInfo_reduce.inc > X86GenInstrInfo_reduce.inc + +- Compile + + $ make + +- Run + + $ ./strinforeduce > X86Lookup16.inc + $ ./strinforeduce_reduce > X86Lookup16_reduce.inc + +- Then copy X86Lookup16*.inc to Capstone dir arch/X86/ diff --git a/capstone/suite/synctools/strinforeduce/instrinfo2.py b/capstone/suite/synctools/strinforeduce/instrinfo2.py new file mode 100755 index 000000000..6c52bdcd7 --- /dev/null +++ b/capstone/suite/synctools/strinforeduce/instrinfo2.py @@ -0,0 +1,55 @@ +#!/usr/bin/python +# convert LLVM GenInstrInfo.inc for Capstone disassembler. +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + + +count = 0 +last_line = None + +f = open(sys.argv[1]) +lines = f.readlines() +f.close() + +# 1st enum is register enum +for line in lines: + line = line.rstrip() + + # skip all MCPhysReg line + if 'static const MCPhysReg ' in line: + continue + + # skip all MCOperandInfo line + if 'static const MCOperandInfo ' in line: + continue + + # skip InitX86MCInstrInfo() + if 'static inline void InitX86MCInstrInfo' in line: + continue + + if 'II->InitMCInstrInfo' in line: + last_line = line + continue + + # skip the next line after II->InitMCInstrInfo + if last_line: + last_line = None + continue + + + if 'extern const MCInstrDesc ' in line: + count += 1 + continue + + if count == 1: + if line == '};': + # done with first enum + count += 1 + continue + else: + print(line) diff --git a/capstone/suite/synctools/strinforeduce/strinforeduce.cpp b/capstone/suite/synctools/strinforeduce/strinforeduce.cpp new file mode 100644 index 000000000..e44110520 --- /dev/null +++ b/capstone/suite/synctools/strinforeduce/strinforeduce.cpp @@ -0,0 +1,183 @@ +// By Martin Tofall, Obsidium Software +#define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_DESC + +#ifdef CAPSTONE_X86_REDUCE +#include "X86GenInstrInfo_reduce.inc" +#else +#include "X86GenInstrInfo.inc" +#endif + +#include +#include +#include +#include + +static const char *x86DisassemblerGetInstrName(unsigned Opcode) +{ + return &llvm::X86InstrNameData[llvm::X86InstrNameIndices[Opcode]]; +} + +static bool is16BitEquivalent(const char* orig, const char* equiv) +{ + size_t i; + + for (i = 0;; i++) { + if (orig[i] == '\0' && equiv[i] == '\0') + return true; + + if (orig[i] == '\0' || equiv[i] == '\0') + return false; + + if (orig[i] != equiv[i]) { + if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') + continue; + + if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') + continue; + + if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') + continue; + + return false; + } + } +} + +// static const char *header = "#ifdef GET_INSTRINFO_MC_DESC\n#undef GET_INSTRINFO_MC_DESC\n\n" +static const char *header = + "typedef struct x86_op_id_pair {\n"\ + "\tuint16_t first;\n" \ + "\tuint16_t second;\n" \ + "} x86_op_id_pair;\n\n" \ + "static const x86_op_id_pair x86_16_bit_eq_tbl[] = {\n"; +static const char *footer = "};\n\n"; + +static const char *header_lookup = "static const uint16_t x86_16_bit_eq_lookup[] = {\n"; +//static const char *footer_lookup = "};\n\n#endif\n"; +static const char *footer_lookup = "};\n"; + +static bool is16BitEquivalent_old(unsigned id1, unsigned id2) +{ + return (is16BitEquivalent(x86DisassemblerGetInstrName(id1), x86DisassemblerGetInstrName(id2))) != false; +} + +//#include "reduced.h" + +#if 0 +static bool is16BitEquivalent_new(unsigned orig, unsigned equiv) +{ + size_t i; + uint16_t idx; + + if (orig == equiv) + return true; // emulate old behaviour + + if ((idx = x86_16_bit_eq_lookup[orig]) != 0) { + for (i = idx - 1; x86_16_bit_eq_tbl[i].first == orig; ++i) { + if (x86_16_bit_eq_tbl[i].second == equiv) + return true; + } + } + + return false; +} +#endif + +int main() +{ + size_t size_names = sizeof(llvm::X86InstrNameData); + size_t size_indices = sizeof(llvm::X86InstrNameIndices); + size_t size_total = size_names + size_indices; + +#if 1 + printf("%s", header); + + size_t eq_count = 0; + std::string str_lookup; + bool got_i = false; + + for (size_t i = 0; i < llvm::X86::INSTRUCTION_LIST_END; ++i) { + const char *name1 = x86DisassemblerGetInstrName(i); + for (size_t j = 0; j < llvm::X86::INSTRUCTION_LIST_END; ++j) { + const char *name2 = x86DisassemblerGetInstrName(j); + if (i != j && is16BitEquivalent(name1, name2) != false) { + //printf("Found equivalent %d and %d\n", i, j); + printf("\t{ %zu, %zu },\n", i, j); + if (!got_i) { + char buf[16]; + sprintf(buf, "\t%zu,\n", eq_count + 1); + str_lookup += buf; + + got_i = true; + } + ++eq_count; + } + } + + if (!got_i) { + //char buf[32]; + //sprintf(buf, "\t0, //%d\n", i); + //str_lookup += buf; + str_lookup += "\t0,\n"; + } + + // reset got_i + got_i = false; + } + + printf("%s", footer); + printf("%s", header_lookup); + printf("%s", str_lookup.c_str()); + printf("%s", footer_lookup); + + // printf("%zu equivalents total\n", eq_count); + // size_t size_new = eq_count * 4 + llvm::X86::INSTRUCTION_LIST_END * 2; + // printf("before: %zu, after: %zu, %zu bytes saved\n", size_total, size_new, size_total - size_new); +#endif + +#if 0 + for (size_t i = 0; i < llvm::X86::INSTRUCTION_LIST_END; ++i) { + for (size_t j = 0; j < llvm::X86::INSTRUCTION_LIST_END; ++j) { + if (is16BitEquivalent_new(i, j) != is16BitEquivalent_old(i, j)) { + bool old_result = is16BitEquivalent_old(i, j); + bool new_result = is16BitEquivalent_new(i, j); + printf("ERROR!\n"); + } + } + } +#endif + +#if 0 + static const size_t BENCH_LOOPS = 50; + + size_t eq_count = 0; + DWORD time = GetTickCount(); + for (size_t l = 0; l < BENCH_LOOPS; ++l) { + for (size_t i = 0; i < llvm::X86::INSTRUCTION_LIST_END; ++i) { + for (size_t j = 0; j < llvm::X86::INSTRUCTION_LIST_END; ++j) + if (is16BitEquivalent_new(i, j)) + ++eq_count; + } + } + + time = GetTickCount() - time; + printf("new: %f msecs\n", static_cast(time) / static_cast(BENCH_LOOPS)); + + eq_count = 0; + time = GetTickCount(); + for (size_t l = 0; l < BENCH_LOOPS; ++l) { + for (size_t i = 0; i < llvm::X86::INSTRUCTION_LIST_END; ++i) { + for (size_t j = 0; j < llvm::X86::INSTRUCTION_LIST_END; ++j) + if (is16BitEquivalent_old(i, j)) + ++eq_count; + } + } + + time = GetTickCount() - time; + printf("old: %f msecs\n", static_cast(time) / static_cast(BENCH_LOOPS)); +#endif + + return 0; +} + diff --git a/capstone/suite/synctools/subtargetinfo.py b/capstone/suite/synctools/subtargetinfo.py new file mode 100755 index 000000000..e9e87ca84 --- /dev/null +++ b/capstone/suite/synctools/subtargetinfo.py @@ -0,0 +1,53 @@ +#!/usr/bin/python +# convert LLVM GenSubtargetInfo.inc for Capstone disassembler. +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + +f = open(sys.argv[1]) +lines = f.readlines() +f.close() + +arch = sys.argv[2] + +print(""" +/* Capstone Disassembly Engine, http://www.capstone-engine.org */ +/* By Nguyen Anh Quynh , 2013-2019 */ + +/*===- TableGen'erated file -------------------------------------*- C++ -*-===*\ +|* *| +|* Subtarget Enumeration Source Fragment *| +|* *| +|* Automatically generated file, do not edit! *| +|* *| +\*===----------------------------------------------------------------------===*/ + +""") + +count = 0 + +# 1st enum is subtarget enum +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum {': + count += 1 + print(line) + continue + + if count == 1: + if line.strip() == '};': + # done with first enum + break + else: + # enum items + print(" %s_%s" %(arch, line.strip())) + +print('};\n') diff --git a/capstone/suite/synctools/systemoperand.py b/capstone/suite/synctools/systemoperand.py new file mode 100755 index 000000000..cd462af7f --- /dev/null +++ b/capstone/suite/synctools/systemoperand.py @@ -0,0 +1,987 @@ +#!/usr/bin/python +# convert LLVM GenSystemOperands.inc of AArch64 for Capstone disassembler. +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + +f = open(sys.argv[1]) +lines = f.readlines() +f.close() + +f1 = open(sys.argv[2], 'w+') + +f2 = open(sys.argv[3], 'w+') + +f1.write("/* Capstone Disassembly Engine, http://www.capstone-engine.org */\n") +f1.write("/* By Nguyen Anh Quynh , 2013-2019 */\n") +f1.write("\n") + +f2.write("/* Capstone Disassembly Engine, http://www.capstone-engine.org */\n") +f2.write("/* By Nguyen Anh Quynh , 2013-2019 */\n") +f2.write("\n") + +# extract PStateValues enum +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum PStateValues {': + count += 1 + f2.write(line.strip() + "\n") + continue + + line = line.strip() + if count == 1: + if line == '};': + # done with first enum + f2.write(line + "\n") + f2.write("\n") + break + else: + # skip pseudo instructions + f2.write(" AArch64PState_%s\n" %(line)) + +def print_line(line): + f1.write(line + "\n") + +# extract ExactFPImmValues enum +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum ExactFPImmValues {': + count += 1 + f2.write(line.strip() + "\n") + continue + + line = line.strip() + if count == 1: + if line == '};': + # done with first enum + f2.write(line + "\n") + f2.write("\n") + break + else: + # skip pseudo instructions + f2.write(" AArch64ExactFPImm_%s\n" %(line)) + +# extract ATsList[] +count = 0 +c = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'const AT ATsList[] = {': + count += 1 + print_line('static const AT ATsList[] = {') + continue + + line = line.strip() + if count == 1: + if line == '};': + # done with first enum + print_line('};\n') + break + else: + # skip pseudo instructions + line = line.replace('::', '_') + #line = line.replace('{}', '{ 0 }') + line = line.replace('{}', '') + tmp = line.split(',') + print_line(" %s, %s }, // %u" %(tmp[0].lower(), tmp[1], c)) + c += 1 + +# lookupATByEncoding +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'lookupATByEncoding' in line and '{' in line: + count += 1 + print_line('const AT *lookupATByEncoding(uint16_t Encoding)\n{') + print_line(' unsigned int i;') + continue + + if count == 1 and 'IndexType Index[] = {' in line: + count += 1 + + if count == 2: + if line.strip() == '};': + # done with array, or this function? + print_line(line) + break + else: + # enum items + print_line(line) + +print_line(""" + i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), Encoding); + if (i == -1) + return NULL; + else + return &ATsList[Index[i].index]; +} +""") + + +# extract DBsList[] +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'const DB DBsList[] = {': + count += 1 + print_line('static const DB DBsList[] = {') + continue + + line = line.strip() + if count == 1: + if line == '};': + # done with first enum + print_line('};\n') + break + else: + # skip pseudo instructions + line = line.replace('::', '_') + #line = line.replace('{}', '{ 0 }') + line = line.replace('{}', '') + print_line(" %s" %(line)) + +# lookupDBByEncoding +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'lookupDBByEncoding' in line and '{' in line: + count += 1 + print_line('const DB *lookupDBByEncoding(uint16_t Encoding)\n{') + print_line(' unsigned int i;') + continue + + if count == 1 and 'IndexType Index[] = {' in line: + count += 1 + + if count == 2: + if line.strip() == '};': + # done with array, or this function? + print_line(line) + break + else: + # enum items + print_line(line) + +print_line(""" + i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), Encoding); + if (i == -1) + return NULL; + else + return &DBsList[Index[i].index]; +} +""") + + +# extract DCsList[] +count = 0 +c = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'const DC DCsList[] = {': + count += 1 + print_line('static const DC DCsList[] = {') + continue + + line = line.strip() + if count == 1: + if line == '};': + # done with first enum + print_line('};\n') + break + else: + # skip pseudo instructions + line = line.replace('::', '_') + #line = line.replace('{}', '{ 0 }') + line = line.replace('{}', '') + tmp = line.split(',') + print_line(" %s, %s }, // %u" %(tmp[0].lower(), tmp[1], c)) + c += 1 + +# lookupDCByEncoding +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'lookupDCByEncoding' in line and '{' in line: + count += 1 + print_line('const DC *lookupDCByEncoding(uint16_t Encoding)\n{') + print_line(' unsigned int i;') + continue + + if count == 1 and 'IndexType Index[] = {' in line: + count += 1 + + if count == 2: + if line.strip() == '};': + # done with array, or this function? + print_line(line) + break + else: + # enum items + print_line(line) + +print_line(""" + i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), Encoding); + if (i == -1) + return NULL; + else + return &DCsList[Index[i].index]; +} +""") + + +# extract ICsList +count = 0 +c = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'const IC ICsList[] = {': + count += 1 + print_line('static const IC ICsList[] = {') + continue + + line = line.strip() + if count == 1: + if line == '};': + # done with first enum + print_line('};\n') + break + else: + # skip pseudo instructions + line = line.replace('::', '_') + #line = line.replace('{}', '{ 0 }') + line = line.replace('{}', '') + #tmp = line.split(',') + #print_line(" %s, %s }, // %u" %(tmp[0].lower(), tmp[1], c)) + print_line(" %s" %line.lower()) + c += 1 + +# lookupICByEncoding +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'lookupICByEncoding' in line and '{' in line: + count += 1 + print_line('const IC *lookupICByEncoding(uint16_t Encoding)\n{') + print_line(' unsigned int i;') + continue + + if count == 1 and 'IndexType Index[] = {' in line: + count += 1 + + if count == 2: + if line.strip() == '};': + # done with array, or this function? + print_line(line) + break + else: + # enum items + print_line(line) + +print_line(""" + i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), Encoding); + if (i == -1) + return NULL; + else + return &ICsList[Index[i].index]; +} +""") + + +# extract TLBIsList +count = 0 +c = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'const TLBI TLBIsList[] = {': + count += 1 + print_line('static const TLBI TLBIsList[] = {') + continue + + line = line.strip() + if count == 1: + if line == '};': + # done with first enum + print_line('};\n') + break + else: + # skip pseudo instructions + line = line.replace('::', '_') + #line = line.replace('{}', '{ 0 }') + line = line.replace('{}', '') + tmp = line.split(',') + print_line(" %s, %s, %s }, // %u" %(tmp[0].lower(), tmp[1], tmp[2], c)) + #print_line(" %s" %line.lower()) + c += 1 + +# lookupTLBIByEncoding +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'lookupTLBIByEncoding' in line and '{' in line: + count += 1 + print_line('const TLBI *lookupTLBIByEncoding(uint16_t Encoding)\n{') + print_line(' unsigned int i;') + continue + + if count == 1 and 'IndexType Index[] = {' in line: + count += 1 + + if count == 2: + if line.strip() == '};': + # done with array, or this function? + print_line(line) + break + else: + # enum items + print_line(line) + +print_line(""" + i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), Encoding); + if (i == -1) + return NULL; + else + return &TLBIsList[Index[i].index]; +} +""") + +# extract SVEPRFMsList +count = 0 +c = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'const SVEPRFM SVEPRFMsList[] = {': + count += 1 + print_line('static const SVEPRFM SVEPRFMsList[] = {') + continue + + line = line.strip() + if count == 1: + if line == '};': + # done with first enum + print_line('};\n') + break + else: + # skip pseudo instructions + line = line.replace('::', '_') + #line = line.replace('{}', '{ 0 }') + line = line.replace('{}', '') + tmp = line.split(',') + print_line(" %s, %s }, // %u" %(tmp[0].lower(), tmp[1], c)) + #print_line(" %s" %line.lower()) + c += 1 + +# lookupSVEPRFMByEncoding +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'lookupSVEPRFMByEncoding' in line and '{' in line: + count += 1 + print_line('const SVEPRFM *lookupSVEPRFMByEncoding(uint16_t Encoding)\n{') + print_line(' unsigned int i;') + continue + + if count == 1 and 'IndexType Index[] = {' in line: + count += 1 + + if count == 2: + if line.strip() == '};': + # done with array, or this function? + print_line(line) + break + else: + # enum items + print_line(line) + +print_line(""" + i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), Encoding); + if (i == -1) + return NULL; + else + return &SVEPRFMsList[Index[i].index]; +} +""") + + +# extract PRFMsList +count = 0 +c = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'const PRFM PRFMsList[] = {': + count += 1 + print_line('static const PRFM PRFMsList[] = {') + continue + + line = line.strip() + if count == 1: + if line == '};': + # done with first enum + print_line('};\n') + break + else: + # skip pseudo instructions + line = line.replace('::', '_') + #line = line.replace('{}', '{ 0 }') + line = line.replace('{}', '') + #tmp = line.split(',') + #print_line(" %s, %s }, // %u" %(tmp[0].lower(), tmp[1], c)) + print_line(" %s" %line.lower()) + c += 1 + +# lookupPRFMByEncoding +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'lookupPRFMByEncoding' in line and '{' in line: + count += 1 + print_line('const PRFM *lookupPRFMByEncoding(uint16_t Encoding)\n{') + print_line(' unsigned int i;') + continue + + if count == 1 and 'IndexType Index[] = {' in line: + count += 1 + + if count == 2: + if line.strip() == '};': + # done with array, or this function? + print_line(line) + break + else: + # enum items + print_line(line) + +print_line(""" + i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), Encoding); + if (i == -1) + return NULL; + else + return &PRFMsList[Index[i].index]; +} +""") + + +# extract PSBsList +count = 0 +c = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'const PSB PSBsList[] = {': + count += 1 + print_line('static const PSB PSBsList[] = {') + continue + + line = line.strip() + if count == 1: + if line == '};': + # done with first enum + print_line('};\n') + break + else: + # skip pseudo instructions + line = line.replace('::', '_') + #line = line.replace('{}', '{ 0 }') + line = line.replace('{}', '') + #tmp = line.split(',') + #print_line(" %s, %s }, // %u" %(tmp[0].lower(), tmp[1], c)) + print_line(" %s" %line.lower()) + c += 1 + +# lookupPSBByEncoding +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'lookupPSBByEncoding' in line and '{' in line: + count += 1 + print_line('const PSB *AArch64PSBHint_lookupPSBByEncoding(uint16_t Encoding)\n{') + print_line(' unsigned int i;') + continue + + if count == 1 and 'IndexType Index[] = {' in line: + count += 1 + + if count == 2: + if line.strip() == '};': + # done with array, or this function? + print_line(line) + break + else: + # enum items + print_line(line) + +print_line(""" + i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), Encoding); + if (i == -1) + return NULL; + else + return &PSBsList[Index[i].index]; +} +""") + + +# extract ISBsList +count = 0 +c = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'const ISB ISBsList[] = {': + count += 1 + print_line('static const ISB ISBsList[] = {') + continue + + line = line.strip() + if count == 1: + if line == '};': + # done with first enum + print_line('};\n') + break + else: + # skip pseudo instructions + line = line.replace('::', '_') + #line = line.replace('{}', '{ 0 }') + line = line.replace('{}', '') + #tmp = line.split(',') + #print_line(" %s, %s }, // %u" %(tmp[0].lower(), tmp[1], c)) + print_line(" %s" %line.lower()) + c += 1 + +# lookupISBByName +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'lookupISBByEncoding' in line and '{' in line: + count += 1 + print_line('const ISB *lookupISBByEncoding(uint16_t Encoding)\n{') + print_line(' unsigned int i;') + continue + + if count == 1 and 'IndexType Index[] = {' in line: + count += 1 + + if count == 2: + if line.strip() == '};': + # done with array, or this function? + print_line(line) + break + else: + # enum items + print_line(line) + +print_line(""" + i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), Encoding); + if (i == -1) + return NULL; + else + return &ISBsList[Index[i].index]; +} +""") + + +# extract TSBsList +count = 0 +c = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'const TSB TSBsList[] = {': + count += 1 + print_line('static const TSB TSBsList[] = {') + continue + + line = line.strip() + if count == 1: + if line == '};': + # done with first enum + print_line('};\n') + break + else: + # skip pseudo instructions + line = line.replace('::', '_') + #line = line.replace('{}', '{ 0 }') + line = line.replace('{}', '') + tmp = line.split(',') + print_line(" %s, %s }, // %u" %(tmp[0].lower(), tmp[1], c)) + #print_line(" %s" %line.lower()) + c += 1 + +# lookupTSBByEncoding +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'lookupTSBByEncoding' in line and '{' in line: + count += 1 + print_line('const TSB *lookupTSBByEncoding(uint16_t Encoding)\n{') + continue + + if count == 1 and 'IndexType Index[] = {' in line: + count += 1 + + if count == 2: + if line.strip() == '};': + # done with array, or this function? + print_line(line) + break + else: + # enum items + print_line(line) + +print_line(""" + if (Encoding >= ARR_SIZE(TSBsList)) + return NULL; + else + return &TSBsList[Index[Encoding].index]; +} +""") + + +# extract SysRegsList +count = 0 +c = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'const SysReg SysRegsList[] = {': + count += 1 + print_line('static const SysReg SysRegsList[] = {') + continue + + line = line.strip() + if count == 1: + if line == '};': + # done with first enum + print_line('};\n') + break + else: + # skip pseudo instructions + line = line.replace('::', '_') + #line = line.replace('{}', '{ 0 }') + line = line.replace('{}', '') + tmp = line.split(',') + print_line(" %s, %s, %s, %s }, // %u" %(tmp[0].lower(), tmp[1], tmp[2], tmp[3], c)) + #print_line(" %s" %line.lower()) + c += 1 + +# lookupSysRegByEncoding +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'lookupSysRegByEncoding' in line and '{' in line: + count += 1 + print_line('const SysReg *lookupSysRegByEncoding(uint16_t Encoding)\n{') + print_line(' unsigned int i;') + continue + + if count == 1 and 'IndexType Index[] = {' in line: + count += 1 + + if count == 2: + if line.strip() == '};': + # done with array, or this function? + print_line(line) + break + else: + # enum items + print_line(line) + +print_line(""" + i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), Encoding); + if (i == -1) + return NULL; + else + return &SysRegsList[Index[i].index]; +} +""") + +# extract PStatesList +count = 0 +c = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'const PState PStatesList[] = {': + count += 1 + print_line('static const PState PStatesList[] = {') + continue + + line = line.strip() + if count == 1: + if line == '};': + # done with first enum + print_line('};\n') + break + else: + # skip pseudo instructions + line = line.replace('::', '_') + #line = line.replace('{}', '{ 0 }') + line = line.replace('{}', '') + tmp = line.split(',') + print_line(" %s, %s }, // %u" %(tmp[0].lower(), tmp[1], c)) + #print_line(" %s" %line.lower()) + c += 1 + +# lookupPStateByEncoding +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'lookupPStateByEncoding' in line and '{' in line: + count += 1 + print_line('const PState *lookupPStateByEncoding(uint16_t Encoding)\n{') + print_line(' unsigned int i;') + continue + + if count == 1 and 'IndexType Index[] = {' in line: + count += 1 + + if count == 2: + if line.strip() == '};': + # done with array, or this function? + print_line(line) + break + else: + # enum items + print_line(line) + +print_line(""" + i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), Encoding); + if (i == -1) + return NULL; + else + return &PStatesList[Index[i].index]; +} +""") + +# extract SVEPREDPATsList +count = 0 +c = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'const SVEPREDPAT SVEPREDPATsList[] = {': + count += 1 + print_line('static const SVEPREDPAT SVEPREDPATsList[] = {') + continue + + line = line.strip() + if count == 1: + if line == '};': + # done with first enum + print_line('};\n') + break + else: + # skip pseudo instructions + line = line.replace('::', '_') + #line = line.replace('{}', '{ 0 }') + line = line.replace('{}', '') + tmp = line.split(',') + #print_line(" %s, %s }, // %u" %(tmp[0].lower(), tmp[1], c)) + print_line(" %s" %line.lower()) + c += 1 + +# lookupSVEPREDPATByEncoding +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'lookupSVEPREDPATByEncoding' in line and '{' in line: + count += 1 + print_line('const SVEPREDPAT *lookupSVEPREDPATByEncoding(uint16_t Encoding)\n{') + print_line(' unsigned int i;') + continue + + if count == 1 and 'IndexType Index[] = {' in line: + count += 1 + + if count == 2: + if line.strip() == '};': + # done with array, or this function? + print_line(line) + break + else: + # enum items + print_line(line) + +print_line(""" + i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), Encoding); + if (i == -1) + return NULL; + else + return &SVEPREDPATsList[Index[i].index]; +} +""") + + +# extract ExactFPImmsList +count = 0 +c = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'const ExactFPImm ExactFPImmsList[] = {': + count += 1 + print_line('static const ExactFPImm ExactFPImmsList[] = {') + continue + + line = line.strip() + if count == 1: + if line == '};': + # done with first enum + print_line('};\n') + break + else: + # skip pseudo instructions + line = line.replace('::', '_') + #line = line.replace('{}', '{ 0 }') + line = line.replace('{}', '') + tmp = line.split(',') + #print_line(" %s, %s }, // %u" %(tmp[0].lower(), tmp[1], c)) + print_line(" %s" %line.lower()) + c += 1 + +# lookupExactFPImmByEnum +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'lookupExactFPImmByEnum' in line and '{' in line: + count += 1 + print_line('const ExactFPImm *lookupExactFPImmByEnum(uint16_t Encoding)\n{') + continue + + if count == 1 and 'IndexType Index[] = {' in line: + count += 1 + + if count == 2: + if line.strip() == '};': + # done with array, or this function? + print_line(line) + break + else: + # enum items + print_line(line) + +print_line(""" + if (Encoding >= ARR_SIZE(ExactFPImmsList)) + return NULL; + else + return &ExactFPImmsList[Index[Encoding].index]; +} +""") + diff --git a/capstone/suite/synctools/systemregister.py b/capstone/suite/synctools/systemregister.py new file mode 100755 index 000000000..ed3e94352 --- /dev/null +++ b/capstone/suite/synctools/systemregister.py @@ -0,0 +1,213 @@ +#!/usr/bin/python +# convert LLVM GenSystemRegister.inc for Capstone disassembler. +# by Nguyen Anh Quynh, 2019 + +import sys + +if len(sys.argv) == 1: + print("Syntax: %s " %sys.argv[0]) + sys.exit(1) + +f = open(sys.argv[1]) +lines = f.readlines() +f.close() + +#arch = sys.argv[2].upper() + +print(""" +/* Capstone Disassembly Engine, http://www.capstone-engine.org */ +/* By Nguyen Anh Quynh , 2013-2019 */ + +/*===- TableGen'erated file -------------------------------------*- C++ -*-===*\ +|* *| +|* GenSystemRegister Source Fragment *| +|* *| +|* Automatically generated file, do not edit! *| +|* *| +\*===----------------------------------------------------------------------===*/ + +""") + +# extract BankedRegValues enum +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if line.strip() == 'enum BankedRegValues {': + count += 1 + print(line.strip()) + continue + + line = line.strip() + if count == 1: + if line == '};': + # done with first enum + break + else: + # skip pseudo instructions + print("\t%s" %(line)) + +print('};\n') + +# extract MClassSysRegsList +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'MClassSysRegsList[]' in line: + count += 1 + print('static const MClassSysReg MClassSysRegsList[] = {') + continue + + if count == 1: + if line.strip() == '};': + # done with first enum + break + else: + # enum items + # { "apsr_g", 0x400, 0x0, 0x400, {ARM::FeatureDSP} }, // 0 + line2 = line.replace('::', '_') + sysreg = line2[line2.index('"') + 1 : line2.index('",')] + tmp = line2.split(',') + print("%s, ARM_SYSREG_%s%s" %(line2[:line2.index('",') + 1], sysreg.upper(), line2[line2.index('",') + 1 :])) + +print('};\n') + +# extract BankedRegsList +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'BankedRegsList[]' in line: + count += 1 + print('static const BankedReg BankedRegsList[] = {') + continue + + if count == 1: + if line.strip() == '};': + # done with first enum + break + else: + # enum items + line2 = line.replace('::', '_') + sysreg = line2[line2.index('"') + 1 : line2.index('",')] + tmp = line2.split(',') + print("%s, ARM_SYSREG_%s%s" %(line2[:line2.index('",') + 1], sysreg.upper(), line2[line2.index('",') + 1 :])) + +print('};\n') + +# lookupMClassSysRegByM2M3Encoding8 +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'lookupMClassSysRegByM2M3Encoding8' in line and '{' in line: + count += 1 + print('const MClassSysReg *lookupMClassSysRegByM2M3Encoding8(uint16_t encoding)\n{') + print(' unsigned int i;') + continue + + if count == 1 and 'IndexType Index[] = {' in line: + count += 1 + + if count == 2: + if line.strip() == '};': + # done with array, or this function? + print(line) + break + else: + # enum items + print(line) + +print(""" + i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), encoding); + if (i == -1) + return NULL; + else + return &MClassSysRegsList[Index[i].index]; +} +""") + + +# lookupMClassSysRegByM1Encoding12 +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'lookupMClassSysRegByM1Encoding12' in line and '{' in line: + count += 1 + print('const MClassSysReg *lookupMClassSysRegByM1Encoding12(uint16_t encoding)\n{') + print(' unsigned int i;') + continue + + if count == 1 and 'IndexType Index[] = {' in line: + count += 1 + + if count == 2: + if line.strip() == '};': + # done with array, or this function? + print(line) + break + else: + # enum items + print(line) + +print(""" + i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), encoding); + if (i == -1) + return NULL; + else + return &MClassSysRegsList[Index[i].index]; +} +""") + +# lookupBankedRegByEncoding +count = 0 +for line in lines: + line = line.rstrip() + + if len(line.strip()) == 0: + continue + + if 'lookupBankedRegByEncoding' in line and '{' in line: + count += 1 + print('const BankedReg *lookupBankedRegByEncoding(uint8_t encoding)\n{') + print(' unsigned int i;') + continue + + if count == 1 and 'IndexType Index[] = {' in line: + count += 1 + + if count == 2: + if line.strip() == '};': + # done with array, or this function? + print(line) + break + else: + # enum items + print(line) + +print(""" + i = binsearch_IndexTypeEncoding(Index, ARR_SIZE(Index), encoding); + if (i == -1) + return NULL; + else + return &BankedRegsList[Index[i].index]; +} +""") + diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64.td b/capstone/suite/synctools/tablegen/AArch64/AArch64.td new file mode 100644 index 000000000..a69d38144 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64.td @@ -0,0 +1,579 @@ +//=- AArch64.td - Describe the AArch64 Target Machine --------*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing. +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// AArch64 Subtarget features. +// + +def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", + "Enable ARMv8 FP">; + +def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", + "Enable Advanced SIMD instructions", [FeatureFPARMv8]>; + +def FeatureSM4 : SubtargetFeature< + "sm4", "HasSM4", "true", + "Enable SM3 and SM4 support", [FeatureNEON]>; + +def FeatureSHA2 : SubtargetFeature< + "sha2", "HasSHA2", "true", + "Enable SHA1 and SHA256 support", [FeatureNEON]>; + +def FeatureSHA3 : SubtargetFeature< + "sha3", "HasSHA3", "true", + "Enable SHA512 and SHA3 support", [FeatureNEON, FeatureSHA2]>; + +def FeatureAES : SubtargetFeature< + "aes", "HasAES", "true", + "Enable AES support", [FeatureNEON]>; + +// Crypto has been split up and any combination is now valid (see the +// crypto defintions above). Also, crypto is now context sensitive: +// it has a different meaning for e.g. Armv8.4 than it has for Armv8.2. +// Therefore, we rely on Clang, the user interacing tool, to pass on the +// appropriate crypto options. But here in the backend, crypto has very little +// meaning anymore. We kept the Crypto defintion here for backward +// compatibility, and now imply features SHA2 and AES, which was the +// "traditional" meaning of Crypto. +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable cryptographic instructions", [FeatureNEON, FeatureSHA2, FeatureAES]>; + +def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", + "Enable ARMv8 CRC-32 checksum instructions">; + +def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", + "Enable ARMv8 Reliability, Availability and Serviceability Extensions">; + +def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true", + "Enable ARMv8.1 Large System Extension (LSE) atomic instructions">; + +def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true", + "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">; + +def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", + "Enable ARMv8 PMUv3 Performance Monitors extension">; + +def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", + "Full FP16", [FeatureFPARMv8]>; + +def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true", + "Enable Statistical Profiling extension">; + +def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true", + "Enable Scalable Vector Extension (SVE) instructions">; + +/// Cyclone has register move instructions which are "free". +def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", + "Has zero-cycle register moves">; + +/// Cyclone has instructions which zero registers for "free". +def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", + "Has zero-cycle zeroing instructions">; + +/// ... but the floating-point version doesn't quite work in rare cases on older +/// CPUs. +def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround", + "HasZeroCycleZeroingFPWorkaround", "true", + "The zero-cycle floating-point zeroing instruction has a bug">; + +def FeatureStrictAlign : SubtargetFeature<"strict-align", + "StrictAlign", "true", + "Disallow all unaligned memory " + "access">; + +def FeatureReserveX18 : SubtargetFeature<"reserve-x18", "ReserveX18", "true", + "Reserve X18, making it unavailable " + "as a GPR">; + +def FeatureReserveX20 : SubtargetFeature<"reserve-x20", "ReserveX20", "true", + "Reserve X20, making it unavailable " + "as a GPR">; + +def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", + "Use alias analysis during codegen">; + +def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops", "BalanceFPOps", + "true", + "balance mix of odd and even D-registers for fp multiply(-accumulate) ops">; + +def FeaturePredictableSelectIsExpensive : SubtargetFeature< + "predictable-select-expensive", "PredictableSelectIsExpensive", "true", + "Prefer likely predicted branches over selects">; + +def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move", + "CustomAsCheapAsMove", "true", + "Use custom code for TargetInstrInfo::isAsCheapAsAMove()">; + +def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move", + "ExynosAsCheapAsMove", "true", + "Use Exynos specific code in TargetInstrInfo::isAsCheapAsAMove()", + [FeatureCustomCheapAsMoveHandling]>; + +def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", + "UsePostRAScheduler", "true", "Schedule again after register allocation">; + +def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store", + "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">; + +def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128", + "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">; + +def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "STRQroIsSlow", + "true", "STR of Q register with register offset is slow">; + +def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature< + "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern", + "true", "Use alternative pattern for sextload convert to f32">; + +def FeatureArithmeticBccFusion : SubtargetFeature< + "arith-bcc-fusion", "HasArithmeticBccFusion", "true", + "CPU fuses arithmetic+bcc operations">; + +def FeatureArithmeticCbzFusion : SubtargetFeature< + "arith-cbz-fusion", "HasArithmeticCbzFusion", "true", + "CPU fuses arithmetic + cbz/cbnz operations">; + +def FeatureFuseAddress : SubtargetFeature< + "fuse-address", "HasFuseAddress", "true", + "CPU fuses address generation and memory operations">; + +def FeatureFuseAES : SubtargetFeature< + "fuse-aes", "HasFuseAES", "true", + "CPU fuses AES crypto operations">; + +def FeatureFuseCCSelect : SubtargetFeature< + "fuse-csel", "HasFuseCCSelect", "true", + "CPU fuses conditional select operations">; + +def FeatureFuseLiterals : SubtargetFeature< + "fuse-literals", "HasFuseLiterals", "true", + "CPU fuses literal generation operations">; + +def FeatureDisableLatencySchedHeuristic : SubtargetFeature< + "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", + "Disable latency scheduling heuristic">; + +def FeatureRCPC : SubtargetFeature<"rcpc", "HasRCPC", "true", + "Enable support for RCPC extension">; + +def FeatureUseRSqrt : SubtargetFeature< + "use-reciprocal-square-root", "UseRSqrt", "true", + "Use the reciprocal square root approximation">; + +def FeatureDotProd : SubtargetFeature< + "dotprod", "HasDotProd", "true", + "Enable dot product support">; + +def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", + "NegativeImmediates", "false", + "Convert immediates and instructions " + "to their negated or complemented " + "equivalent when the immediate does " + "not fit in the encoding.">; + +def FeatureLSLFast : SubtargetFeature< + "lsl-fast", "HasLSLFast", "true", + "CPU has a fastpath logical shift of up to 3 places">; + +def FeatureAggressiveFMA : + SubtargetFeature<"aggressive-fma", + "HasAggressiveFMA", + "true", + "Enable Aggressive FMA for floating-point.">; + +//===----------------------------------------------------------------------===// +// Architectures. +// + +def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", + "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM]>; + +def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", + "Support ARM v8.2a instructions", [HasV8_1aOps, FeatureRAS]>; + +def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", + "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC]>; + +def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true", + "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd]>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "AArch64RegisterInfo.td" +include "AArch64RegisterBanks.td" +include "AArch64CallingConvention.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "AArch64Schedule.td" +include "AArch64InstrInfo.td" + +def AArch64InstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// Named operands for MRS/MSR/TLBI/... +//===----------------------------------------------------------------------===// + +include "AArch64SystemOperands.td" + +//===----------------------------------------------------------------------===// +// AArch64 Processors supported. +// +include "AArch64SchedA53.td" +include "AArch64SchedA57.td" +include "AArch64SchedCyclone.td" +include "AArch64SchedFalkor.td" +include "AArch64SchedKryo.td" +include "AArch64SchedExynosM1.td" +include "AArch64SchedExynosM3.td" +include "AArch64SchedThunderX.td" +include "AArch64SchedThunderX2T99.td" + +def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", + "Cortex-A35 ARM processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureNEON, + FeaturePerfMon + ]>; + +def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", + "Cortex-A53 ARM processors", [ + FeatureBalanceFPOps, + FeatureCRC, + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeatureUseAA + ]>; + +def ProcA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", + "Cortex-A55 ARM processors", [ + HasV8_2aOps, + FeatureCrypto, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeatureFullFP16, + FeatureDotProd, + FeatureRCPC, + FeaturePerfMon + ]>; + +def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", + "Cortex-A57 ARM processors", [ + FeatureBalanceFPOps, + FeatureCRC, + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureFuseAES, + FeatureFuseLiterals, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive + ]>; + +def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", + "Cortex-A72 ARM processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon + ]>; + +def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", + "Cortex-A73 ARM processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon + ]>; + +def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", + "Cortex-A75 ARM processors", [ + HasV8_2aOps, + FeatureCrypto, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeatureFullFP16, + FeatureDotProd, + FeatureRCPC, + FeaturePerfMon + ]>; + +// Note that cyclone does not fuse AES instructions, but newer apple chips do +// perform the fusion and cyclone is used by default when targetting apple OSes. +def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone", + "Cyclone", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureCrypto, + FeatureDisableLatencySchedHeuristic, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon, + FeatureZCRegMove, + FeatureZCZeroing, + FeatureZCZeroingFPWorkaround + ]>; + +def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", + "Samsung Exynos-M1 processors", + [FeatureSlowPaired128, + FeatureCRC, + FeatureCrypto, + FeatureExynosCheapAsMoveHandling, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeatureSlowMisaligned128Store, + FeatureUseRSqrt, + FeatureZCZeroing]>; + +def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1", + "Samsung Exynos-M2 processors", + [FeatureSlowPaired128, + FeatureCRC, + FeatureCrypto, + FeatureExynosCheapAsMoveHandling, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeatureSlowMisaligned128Store, + FeatureZCZeroing]>; + +def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", + "Samsung Exynos-M3 processors", + [FeatureCRC, + FeatureCrypto, + FeatureExynosCheapAsMoveHandling, + FeatureFPARMv8, + FeatureFuseAddress, + FeatureFuseAES, + FeatureFuseCCSelect, + FeatureFuseLiterals, + FeatureLSLFast, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing]>; + +def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", + "Qualcomm Kryo processors", [ + FeatureCRC, + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing, + FeatureLSLFast + ]>; + +def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", + "Qualcomm Falkor processors", [ + FeatureCRC, + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureRDM, + FeatureZCZeroing, + FeatureLSLFast, + FeatureSlowSTRQro + ]>; + +def ProcSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira", + "Qualcomm Saphira processors", [ + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureNEON, + FeatureSPE, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing, + FeatureLSLFast, + HasV8_3aOps]>; + +def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", + "ThunderX2T99", + "Cavium ThunderX2 processors", [ + FeatureAggressiveFMA, + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureArithmeticBccFusion, + FeatureNEON, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureLSE, + HasV8_1aOps]>; + +def ProcThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX", + "Cavium ThunderX processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureNEON]>; + +def ProcThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily", + "ThunderXT88", + "Cavium ThunderX processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureNEON]>; + +def ProcThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily", + "ThunderXT81", + "Cavium ThunderX processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureNEON]>; + +def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily", + "ThunderXT83", + "Cavium ThunderX processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureNEON]>; + +def : ProcessorModel<"generic", NoSchedModel, [ + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler + ]>; + +// FIXME: Cortex-A35 and Cortex-A55 are currently modeled as a Cortex-A53. +def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>; +def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; +def : ProcessorModel<"cortex-a55", CortexA53Model, [ProcA55]>; +def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>; +// FIXME: Cortex-A72, Cortex-A73 and Cortex-A75 are currently modeled as a Cortex-A57. +def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA72]>; +def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>; +def : ProcessorModel<"cortex-a75", CortexA57Model, [ProcA75]>; +def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>; +def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>; +def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>; +def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>; +def : ProcessorModel<"exynos-m4", ExynosM3Model, [ProcExynosM3]>; +def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>; +def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>; +def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>; +// Cavium ThunderX/ThunderX T8X Processors +def : ProcessorModel<"thunderx", ThunderXT8XModel, [ProcThunderX]>; +def : ProcessorModel<"thunderxt88", ThunderXT8XModel, [ProcThunderXT88]>; +def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>; +def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>; +// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan. +def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>; + +//===----------------------------------------------------------------------===// +// Assembly parser +//===----------------------------------------------------------------------===// + +def GenericAsmParserVariant : AsmParserVariant { + int Variant = 0; + string Name = "generic"; + string BreakCharacters = "."; + string TokenizingCharacters = "[]*!/"; +} + +def AppleAsmParserVariant : AsmParserVariant { + int Variant = 1; + string Name = "apple-neon"; + string BreakCharacters = "."; + string TokenizingCharacters = "[]*!/"; +} + +//===----------------------------------------------------------------------===// +// Assembly printer +//===----------------------------------------------------------------------===// +// AArch64 Uses the MC printer for asm output, so make sure the TableGen +// AsmWriter bits get associated with the correct class. +def GenericAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 1; + int Variant = 0; + bit isMCAsmWriter = 1; +} + +def AppleAsmWriter : AsmWriter { + let AsmWriterClassName = "AppleInstPrinter"; + int PassSubtarget = 1; + int Variant = 1; + int isMCAsmWriter = 1; +} + +//===----------------------------------------------------------------------===// +// Target Declaration +//===----------------------------------------------------------------------===// + +def AArch64 : Target { + let InstructionSet = AArch64InstrInfo; + let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant]; + let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter]; + let AllowRegisterRenaming = 1; +} diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td b/capstone/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td new file mode 100644 index 000000000..30492003d --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td @@ -0,0 +1,366 @@ +//=- AArch64CallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for AArch64 architecture. +// +//===----------------------------------------------------------------------===// + +/// CCIfAlign - Match of the original alignment of the arg +class CCIfAlign : + CCIf; +/// CCIfBigEndian - Match only if we're in big endian mode. +class CCIfBigEndian : + CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>; + +//===----------------------------------------------------------------------===// +// ARM AAPCS64 Calling Convention +//===----------------------------------------------------------------------===// + +def CC_AArch64_AAPCS : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType>, + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32], CCBitConvertToType>, + + // Big endian vectors must be passed as if they were 1-element vectors so that + // their lanes are in a consistent order. + CCIfBigEndian>>, + CCIfBigEndian>>, + + // An SRet is passed in X8, not X0 like a normal pointer parameter. + CCIfSRet>>, + + // Put ByVal arguments directly on the stack. Minimum size and alignment of a + // slot is 64-bit. + CCIfByVal>, + + // The 'nest' parameter, if any, is passed in X18. + // Darwin uses X18 as the platform register and hence 'nest' isn't currently + // supported there. + CCIfNest>, + + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf>>, + + // A SwiftError is passed in X21. + CCIfSwiftError>>, + + CCIfConsecutiveRegs>, + + // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, + // up to eight each of GPR and FPR. + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], + [X0, X1, X2, X3, X4, X5, X6, X7]>>, + // i128 is split to two i64s, we can't fit half to register X7. + CCIfType<[i64], CCIfSplit>>, + + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfType<[i64], CCIfSplit>>, + + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], + [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], + CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + // If more than will fit in registers, pass them on the stack instead. + CCIfType<[i1, i8, i16, f16], CCAssignToStack<8, 8>>, + CCIfType<[i32, f32], CCAssignToStack<8, 8>>, + CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16], + CCAssignToStack<8, 8>>, + CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], + CCAssignToStack<16, 16>> +]>; + +def RetCC_AArch64_AAPCS : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType>, + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32], CCBitConvertToType>, + + CCIfSwiftError>>, + + // Big endian vectors must be passed as if they were 1-element vectors so that + // their lanes are in a consistent order. + CCIfBigEndian>>, + CCIfBigEndian>>, + + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], + [X0, X1, X2, X3, X4, X5, X6, X7]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], + [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], + CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> +]>; + +// Vararg functions on windows pass floats in integer registers +def CC_AArch64_Win64_VarArg : CallingConv<[ + CCIfType<[f16, f32], CCPromoteToType>, + CCIfType<[f64], CCBitConvertToType>, + CCDelegateTo +]>; + + +// Darwin uses a calling convention which differs in only two ways +// from the standard one at this level: +// + i128s (i.e. split i64s) don't need even registers. +// + Stack slots are sized as needed rather than being at least 64-bit. +def CC_AArch64_DarwinPCS : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType>, + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, + + // An SRet is passed in X8, not X0 like a normal pointer parameter. + CCIfSRet>>, + + // Put ByVal arguments directly on the stack. Minimum size and alignment of a + // slot is 64-bit. + CCIfByVal>, + + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf>>, + + // A SwiftError is passed in X21. + CCIfSwiftError>>, + + CCIfConsecutiveRegs>, + + // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, + // up to eight each of GPR and FPR. + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], + [X0, X1, X2, X3, X4, X5, X6, X7]>>, + // i128 is split to two i64s, we can't fit half to register X7. + CCIfType<[i64], + CCIfSplit>>, + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfType<[i64], CCIfSplit>>, + + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], + [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], + CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + // If more than will fit in registers, pass them on the stack instead. + CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>, + CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>, + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16], + CCAssignToStack<8, 8>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], + CCAssignToStack<16, 16>> +]>; + +def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType>, + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, + + CCIfConsecutiveRegs>, + + // Handle all scalar types as either i64 or f64. + CCIfType<[i8, i16, i32], CCPromoteToType>, + CCIfType<[f16, f32], CCPromoteToType>, + + // Everything is on the stack. + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfType<[i64], CCIfSplit>>, + CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], + CCAssignToStack<8, 8>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], + CCAssignToStack<16, 16>> +]>; + +// The WebKit_JS calling convention only passes the first argument (the callee) +// in register and the remaining arguments on stack. We allow 32bit stack slots, +// so that WebKit can write partial values in the stack and define the other +// 32bit quantity as undef. +def CC_AArch64_WebKit_JS : CallingConv<[ + // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0). + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>, + + // Pass the remaining arguments on the stack instead. + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[i64, f64], CCAssignToStack<8, 8>> +]>; + +def RetCC_AArch64_WebKit_JS : CallingConv<[ + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], + [X0, X1, X2, X3, X4, X5, X6, X7]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], + [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> +]>; + +//===----------------------------------------------------------------------===// +// ARM64 Calling Convention for GHC +//===----------------------------------------------------------------------===// + +// This calling convention is specific to the Glasgow Haskell Compiler. +// The only documentation is the GHC source code, specifically the C header +// file: +// +// https://github.com/ghc/ghc/blob/master/includes/stg/MachRegs.h +// +// which defines the registers for the Spineless Tagless G-Machine (STG) that +// GHC uses to implement lazy evaluation. The generic STG machine has a set of +// registers which are mapped to appropriate set of architecture specific +// registers for each CPU architecture. +// +// The STG Machine is documented here: +// +// https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/GeneratedCode +// +// The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI +// register mapping". + +def CC_AArch64_GHC : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType>, + + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType>, + + CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, + CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>, + CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>, + + // Promote i8/i16/i32 arguments to i64. + CCIfType<[i8, i16, i32], CCPromoteToType>, + + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim + CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>> +]>; + +// FIXME: LR is only callee-saved in the sense that *we* preserve it and are +// presumably a callee to someone. External functions may not do so, but this +// is currently safe since BL has LR as an implicit-def and what happens after a +// tail call doesn't matter. +// +// It would be better to model its preservation semantics properly (create a +// vreg on entry, use it in RET & tail call generation; make that vreg def if we +// end up saving LR as part of a call frame). Watch this space... +def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, + X23, X24, X25, X26, X27, X28, + D8, D9, D10, D11, + D12, D13, D14, D15)>; + +// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since +// 'this' and the pointer return value are both passed in X0 in these cases, +// this can be partially modelled by treating X0 as a callee-saved register; +// only the resulting RegMask is used; the SaveList is ignored +// +// (For generic ARM 64-bit ABI code, clang will not generate constructors or +// destructors with 'this' returns, so this RegMask will not be used in that +// case) +def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>; + +def CSR_AArch64_AAPCS_SwiftError + : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>; + +// The function used by Darwin to obtain the address of a thread-local variable +// guarantees more than a normal AAPCS function. x16 and x17 are used on the +// fast path for calculation, but other registers except X0 (argument/return) +// and LR (it is a call, after all) are preserved. +def CSR_AArch64_TLS_Darwin + : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17), + FP, + (sequence "Q%u", 0, 31))>; + +// We can only handle a register pair with adjacent registers, the register pair +// should belong to the same class as well. Since the access function on the +// fast path calls a function that follows CSR_AArch64_TLS_Darwin, +// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin. +def CSR_AArch64_CXX_TLS_Darwin + : CalleeSavedRegs<(add CSR_AArch64_AAPCS, + (sub (sequence "X%u", 1, 28), X15, X16, X17, X18), + (sequence "D%u", 0, 31))>; + +// CSRs that are handled by prologue, epilogue. +def CSR_AArch64_CXX_TLS_Darwin_PE + : CalleeSavedRegs<(add LR, FP)>; + +// CSRs that are handled explicitly via copies. +def CSR_AArch64_CXX_TLS_Darwin_ViaCopy + : CalleeSavedRegs<(sub CSR_AArch64_CXX_TLS_Darwin, LR, FP)>; + +// The ELF stub used for TLS-descriptor access saves every feasible +// register. Only X0 and LR are clobbered. +def CSR_AArch64_TLS_ELF + : CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP, + (sequence "Q%u", 0, 31))>; + +def CSR_AArch64_AllRegs + : CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP, + (sequence "X%u", 0, 28), FP, LR, SP, + (sequence "B%u", 0, 31), (sequence "H%u", 0, 31), + (sequence "S%u", 0, 31), (sequence "D%u", 0, 31), + (sequence "Q%u", 0, 31))>; + +def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>; + +def CSR_AArch64_RT_MostRegs : CalleeSavedRegs<(add CSR_AArch64_AAPCS, + (sequence "X%u", 9, 15))>; + +def CSR_AArch64_StackProbe_Windows + : CalleeSavedRegs<(add (sequence "X%u", 0, 15), + (sequence "X%u", 18, 28), FP, SP, + (sequence "Q%u", 0, 31))>; + +// Variants of the standard calling conventions for shadow call stack. +// These all preserve x18 in addition to any other registers. +def CSR_AArch64_NoRegs_SCS + : CalleeSavedRegs<(add CSR_AArch64_NoRegs, X18)>; +def CSR_AArch64_AllRegs_SCS + : CalleeSavedRegs<(add CSR_AArch64_AllRegs, X18)>; +def CSR_AArch64_CXX_TLS_Darwin_SCS + : CalleeSavedRegs<(add CSR_AArch64_CXX_TLS_Darwin, X18)>; +def CSR_AArch64_AAPCS_SwiftError_SCS + : CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>; +def CSR_AArch64_RT_MostRegs_SCS + : CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>; +def CSR_AArch64_AAPCS_SCS + : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>; diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td new file mode 100644 index 000000000..35cd7735c --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td @@ -0,0 +1,426 @@ +//=- AArch64InstrAtomics.td - AArch64 Atomic codegen support -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AArch64 Atomic operand code-gen constructs. +// +//===----------------------------------------------------------------------===// + +//===---------------------------------- +// Atomic fences +//===---------------------------------- +let AddedComplexity = 15, Size = 0 in +def CompilerBarrier : Pseudo<(outs), (ins i32imm:$ordering), + [(atomic_fence imm:$ordering, 0)]>, Sched<[]>; +def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>; +def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>; + +//===---------------------------------- +// Atomic loads +//===---------------------------------- + +// When they're actually atomic, only one addressing mode (GPR64sp) is +// supported, but when they're relaxed and anything can be used, all the +// standard modes would be valid and may give efficiency gains. + +// A atomic load operation that actually needs acquire semantics. +class acquiring_load + : PatFrag<(ops node:$ptr), (base node:$ptr)> { + let IsAtomic = 1; + let IsAtomicOrderingAcquireOrStronger = 1; +} + +// An atomic load operation that does not need either acquire or release +// semantics. +class relaxed_load + : PatFrag<(ops node:$ptr), (base node:$ptr)> { + let IsAtomic = 1; + let IsAtomicOrderingAcquireOrStronger = 0; +} + +// 8-bit loads +def : Pat<(acquiring_load GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; +def : Pat<(relaxed_load (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend8:$offset)), + (LDRBBroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)>; +def : Pat<(relaxed_load (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend8:$offset)), + (LDRBBroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$offset)>; +def : Pat<(relaxed_load (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)), + (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(relaxed_load + (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; + +// 16-bit loads +def : Pat<(acquiring_load GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; +def : Pat<(relaxed_load (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend)), + (LDRHHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>; +def : Pat<(relaxed_load (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)), + (LDRHHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>; +def : Pat<(relaxed_load (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)), + (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat<(relaxed_load + (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), + (LDURHHi GPR64sp:$Rn, simm9:$offset)>; + +// 32-bit loads +def : Pat<(acquiring_load GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>; +def : Pat<(relaxed_load (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend32:$extend)), + (LDRWroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>; +def : Pat<(relaxed_load (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend32:$extend)), + (LDRWroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>; +def : Pat<(relaxed_load (am_indexed32 GPR64sp:$Rn, + uimm12s4:$offset)), + (LDRWui GPR64sp:$Rn, uimm12s4:$offset)>; +def : Pat<(relaxed_load + (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), + (LDURWi GPR64sp:$Rn, simm9:$offset)>; + +// 64-bit loads +def : Pat<(acquiring_load GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>; +def : Pat<(relaxed_load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend64:$extend)), + (LDRXroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; +def : Pat<(relaxed_load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend64:$extend)), + (LDRXroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; +def : Pat<(relaxed_load (am_indexed64 GPR64sp:$Rn, + uimm12s8:$offset)), + (LDRXui GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(relaxed_load + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (LDURXi GPR64sp:$Rn, simm9:$offset)>; + +//===---------------------------------- +// Atomic stores +//===---------------------------------- + +// When they're actually atomic, only one addressing mode (GPR64sp) is +// supported, but when they're relaxed and anything can be used, all the +// standard modes would be valid and may give efficiency gains. + +// A store operation that actually needs release semantics. +class releasing_store + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> { + let IsAtomic = 1; + let IsAtomicOrderingReleaseOrStronger = 1; +} + +// An atomic store operation that doesn't actually need to be atomic on AArch64. +class relaxed_store + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> { + let IsAtomic = 1; + let IsAtomicOrderingReleaseOrStronger = 0; +} + +// 8-bit stores +def : Pat<(releasing_store GPR64sp:$ptr, GPR32:$val), + (STLRB GPR32:$val, GPR64sp:$ptr)>; +def : Pat<(relaxed_store + (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend), + GPR32:$val), + (STRBBroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend)>; +def : Pat<(relaxed_store + (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend), + GPR32:$val), + (STRBBroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend)>; +def : Pat<(relaxed_store + (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), GPR32:$val), + (STRBBui GPR32:$val, GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(relaxed_store + (am_unscaled8 GPR64sp:$Rn, simm9:$offset), GPR32:$val), + (STURBBi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; + +// 16-bit stores +def : Pat<(releasing_store GPR64sp:$ptr, GPR32:$val), + (STLRH GPR32:$val, GPR64sp:$ptr)>; +def : Pat<(relaxed_store (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend), + GPR32:$val), + (STRHHroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>; +def : Pat<(relaxed_store (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend), + GPR32:$val), + (STRHHroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>; +def : Pat<(relaxed_store + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), GPR32:$val), + (STRHHui GPR32:$val, GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat<(relaxed_store + (am_unscaled16 GPR64sp:$Rn, simm9:$offset), GPR32:$val), + (STURHHi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; + +// 32-bit stores +def : Pat<(releasing_store GPR64sp:$ptr, GPR32:$val), + (STLRW GPR32:$val, GPR64sp:$ptr)>; +def : Pat<(relaxed_store (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend32:$extend), + GPR32:$val), + (STRWroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>; +def : Pat<(relaxed_store (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend32:$extend), + GPR32:$val), + (STRWroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>; +def : Pat<(relaxed_store + (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), GPR32:$val), + (STRWui GPR32:$val, GPR64sp:$Rn, uimm12s4:$offset)>; +def : Pat<(relaxed_store + (am_unscaled32 GPR64sp:$Rn, simm9:$offset), GPR32:$val), + (STURWi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; + +// 64-bit stores +def : Pat<(releasing_store GPR64sp:$ptr, GPR64:$val), + (STLRX GPR64:$val, GPR64sp:$ptr)>; +def : Pat<(relaxed_store (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend), + GPR64:$val), + (STRXroW GPR64:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; +def : Pat<(relaxed_store (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend), + GPR64:$val), + (STRXroX GPR64:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; +def : Pat<(relaxed_store + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset), GPR64:$val), + (STRXui GPR64:$val, GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(relaxed_store + (am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val), + (STURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>; + +//===---------------------------------- +// Low-level exclusive operations +//===---------------------------------- + +// Load-exclusives. + +def ldxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def ldxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def ldxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +def ldxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i64; +}]>; + +def : Pat<(ldxr_1 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>; +def : Pat<(ldxr_2 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDXRH GPR64sp:$addr), sub_32)>; +def : Pat<(ldxr_4 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDXRW GPR64sp:$addr), sub_32)>; +def : Pat<(ldxr_8 GPR64sp:$addr), (LDXRX GPR64sp:$addr)>; + +def : Pat<(and (ldxr_1 GPR64sp:$addr), 0xff), + (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>; +def : Pat<(and (ldxr_2 GPR64sp:$addr), 0xffff), + (SUBREG_TO_REG (i64 0), (LDXRH GPR64sp:$addr), sub_32)>; +def : Pat<(and (ldxr_4 GPR64sp:$addr), 0xffffffff), + (SUBREG_TO_REG (i64 0), (LDXRW GPR64sp:$addr), sub_32)>; + +// Load-exclusives. + +def ldaxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def ldaxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def ldaxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +def ldaxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i64; +}]>; + +def : Pat<(ldaxr_1 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>; +def : Pat<(ldaxr_2 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDAXRH GPR64sp:$addr), sub_32)>; +def : Pat<(ldaxr_4 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDAXRW GPR64sp:$addr), sub_32)>; +def : Pat<(ldaxr_8 GPR64sp:$addr), (LDAXRX GPR64sp:$addr)>; + +def : Pat<(and (ldaxr_1 GPR64sp:$addr), 0xff), + (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>; +def : Pat<(and (ldaxr_2 GPR64sp:$addr), 0xffff), + (SUBREG_TO_REG (i64 0), (LDAXRH GPR64sp:$addr), sub_32)>; +def : Pat<(and (ldaxr_4 GPR64sp:$addr), 0xffffffff), + (SUBREG_TO_REG (i64 0), (LDAXRW GPR64sp:$addr), sub_32)>; + +// Store-exclusives. + +def stxr_1 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stxr node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def stxr_2 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stxr node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def stxr_4 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stxr node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +def stxr_8 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stxr node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i64; +}]>; + + +def : Pat<(stxr_1 GPR64:$val, GPR64sp:$addr), + (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_2 GPR64:$val, GPR64sp:$addr), + (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_4 GPR64:$val, GPR64sp:$addr), + (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_8 GPR64:$val, GPR64sp:$addr), + (STXRX GPR64:$val, GPR64sp:$addr)>; + +def : Pat<(stxr_1 (zext (and GPR32:$val, 0xff)), GPR64sp:$addr), + (STXRB GPR32:$val, GPR64sp:$addr)>; +def : Pat<(stxr_2 (zext (and GPR32:$val, 0xffff)), GPR64sp:$addr), + (STXRH GPR32:$val, GPR64sp:$addr)>; +def : Pat<(stxr_4 (zext GPR32:$val), GPR64sp:$addr), + (STXRW GPR32:$val, GPR64sp:$addr)>; + +def : Pat<(stxr_1 (and GPR64:$val, 0xff), GPR64sp:$addr), + (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_2 (and GPR64:$val, 0xffff), GPR64sp:$addr), + (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr), + (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; + +// Store-release-exclusives. + +def stlxr_1 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stlxr node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def stlxr_2 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stlxr node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def stlxr_4 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stlxr node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +def stlxr_8 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stlxr node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i64; +}]>; + + +def : Pat<(stlxr_1 GPR64:$val, GPR64sp:$addr), + (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_2 GPR64:$val, GPR64sp:$addr), + (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_4 GPR64:$val, GPR64sp:$addr), + (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_8 GPR64:$val, GPR64sp:$addr), + (STLXRX GPR64:$val, GPR64sp:$addr)>; + +def : Pat<(stlxr_1 (zext (and GPR32:$val, 0xff)), GPR64sp:$addr), + (STLXRB GPR32:$val, GPR64sp:$addr)>; +def : Pat<(stlxr_2 (zext (and GPR32:$val, 0xffff)), GPR64sp:$addr), + (STLXRH GPR32:$val, GPR64sp:$addr)>; +def : Pat<(stlxr_4 (zext GPR32:$val), GPR64sp:$addr), + (STLXRW GPR32:$val, GPR64sp:$addr)>; + +def : Pat<(stlxr_1 (and GPR64:$val, 0xff), GPR64sp:$addr), + (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_2 (and GPR64:$val, 0xffff), GPR64sp:$addr), + (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr), + (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; + + +// And clear exclusive. + +def : Pat<(int_aarch64_clrex), (CLREX 0xf)>; + +//===---------------------------------- +// Atomic cmpxchg for -O0 +//===---------------------------------- + +// The fast register allocator used during -O0 inserts spills to cover any VRegs +// live across basic block boundaries. When this happens between an LDXR and an +// STXR it can clear the exclusive monitor, causing all cmpxchg attempts to +// fail. + +// Unfortunately, this means we have to have an alternative (expanded +// post-regalloc) path for -O0 compilations. Fortunately this path can be +// significantly more naive than the standard expansion: we conservatively +// assume seq_cst, strong cmpxchg and omit clrex on failure. + +let Constraints = "@earlyclobber $Rd,@earlyclobber $scratch", + mayLoad = 1, mayStore = 1 in { +def CMP_SWAP_8 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch), + (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>, + Sched<[WriteAtomic]>; + +def CMP_SWAP_16 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch), + (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>, + Sched<[WriteAtomic]>; + +def CMP_SWAP_32 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch), + (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>, + Sched<[WriteAtomic]>; + +def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$scratch), + (ins GPR64:$addr, GPR64:$desired, GPR64:$new), []>, + Sched<[WriteAtomic]>; +} + +let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $scratch", + mayLoad = 1, mayStore = 1 in +def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$scratch), + (ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi, + GPR64:$newLo, GPR64:$newHi), []>, + Sched<[WriteAtomic]>; + +// v8.1 Atomic instructions: +let Predicates = [HasLSE] in { + defm : LDOPregister_patterns<"LDADD", "atomic_load_add">; + defm : LDOPregister_patterns<"LDSET", "atomic_load_or">; + defm : LDOPregister_patterns<"LDEOR", "atomic_load_xor">; + defm : LDOPregister_patterns<"LDCLR", "atomic_load_clr">; + defm : LDOPregister_patterns<"LDSMAX", "atomic_load_max">; + defm : LDOPregister_patterns<"LDSMIN", "atomic_load_min">; + defm : LDOPregister_patterns<"LDUMAX", "atomic_load_umax">; + defm : LDOPregister_patterns<"LDUMIN", "atomic_load_umin">; + defm : LDOPregister_patterns<"SWP", "atomic_swap">; + defm : CASregister_patterns<"CAS", "atomic_cmp_swap">; + + // These two patterns are only needed for global isel, selection dag isel + // converts atomic load-sub into a sub and atomic load-add, and likewise for + // and -> clr. + defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">; + defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">; +} + diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td new file mode 100644 index 000000000..7caf32dbd --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td @@ -0,0 +1,10402 @@ +//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe AArch64 instructions format here +// + +// Format specifies the encoding used by the instruction. This is part of the +// ad-hoc solution used to emit machine instruction encodings by our machine +// code emitter. +class Format val> { + bits<2> Value = val; +} + +def PseudoFrm : Format<0>; +def NormalFrm : Format<1>; // Do we need any others? + +// AArch64 Instruction Format +class AArch64Inst : Instruction { + field bits<32> Inst; // Instruction encoding. + // Mask of bits that cause an encoding to be UNPREDICTABLE. + // If a bit is set, then if the corresponding bit in the + // target encoding differs from its value in the "Inst" field, + // the instruction is UNPREDICTABLE (SoftFail in abstract parlance). + field bits<32> Unpredictable = 0; + // SoftFail is the generic name for this field, but we alias it so + // as to make it more obvious what it means in ARM-land. + field bits<32> SoftFail = Unpredictable; + let Namespace = "AArch64"; + Format F = f; + bits<2> Form = F.Value; + let Pattern = []; + let Constraints = cstr; +} + +class InstSubst + : InstAlias, Requires<[UseNegativeImmediates]>; + +// Pseudo instructions (don't have encoding information) +class Pseudo pattern, string cstr = ""> + : AArch64Inst { + dag OutOperandList = oops; + dag InOperandList = iops; + let Pattern = pattern; + let isCodeGenOnly = 1; +} + +// Real instructions (have encoding information) +class EncodedI pattern> : AArch64Inst { + let Pattern = pattern; + let Size = 4; +} + +// Enum describing whether an instruction is +// destructive in its first source operand. +class DestructiveInstTypeEnum val> { + bits<1> Value = val; +} +def NotDestructive : DestructiveInstTypeEnum<0>; +def Destructive : DestructiveInstTypeEnum<1>; + +// Normal instructions +class I pattern> + : EncodedI { + dag OutOperandList = oops; + dag InOperandList = iops; + let AsmString = !strconcat(asm, operands); + + // Destructive operations (SVE) + DestructiveInstTypeEnum DestructiveInstType = NotDestructive; + ElementSizeEnum ElementSize = ElementSizeB; + + let TSFlags{3} = DestructiveInstType.Value; + let TSFlags{2-0} = ElementSize.Value; +} + +class TriOpFrag : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>; +class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; +class UnOpFrag : PatFrag<(ops node:$LHS), res>; + +// Helper fragment for an extract of the high portion of a 128-bit vector. +def extract_high_v16i8 : + UnOpFrag<(extract_subvector (v16i8 node:$LHS), (i64 8))>; +def extract_high_v8i16 : + UnOpFrag<(extract_subvector (v8i16 node:$LHS), (i64 4))>; +def extract_high_v4i32 : + UnOpFrag<(extract_subvector (v4i32 node:$LHS), (i64 2))>; +def extract_high_v2i64 : + UnOpFrag<(extract_subvector (v2i64 node:$LHS), (i64 1))>; + +//===----------------------------------------------------------------------===// +// Asm Operand Classes. +// + +// Shifter operand for arithmetic shifted encodings. +def ShifterOperand : AsmOperandClass { + let Name = "Shifter"; +} + +// Shifter operand for mov immediate encodings. +def MovImm32ShifterOperand : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "MovImm32Shifter"; + let RenderMethod = "addShifterOperands"; + let DiagnosticType = "InvalidMovImm32Shift"; +} +def MovImm64ShifterOperand : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "MovImm64Shifter"; + let RenderMethod = "addShifterOperands"; + let DiagnosticType = "InvalidMovImm64Shift"; +} + +// Shifter operand for arithmetic register shifted encodings. +class ArithmeticShifterOperand : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "ArithmeticShifter" # width; + let PredicateMethod = "isArithmeticShifter<" # width # ">"; + let RenderMethod = "addShifterOperands"; + let DiagnosticType = "AddSubRegShift" # width; +} + +def ArithmeticShifterOperand32 : ArithmeticShifterOperand<32>; +def ArithmeticShifterOperand64 : ArithmeticShifterOperand<64>; + +// Shifter operand for logical register shifted encodings. +class LogicalShifterOperand : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "LogicalShifter" # width; + let PredicateMethod = "isLogicalShifter<" # width # ">"; + let RenderMethod = "addShifterOperands"; + let DiagnosticType = "AddSubRegShift" # width; +} + +def LogicalShifterOperand32 : LogicalShifterOperand<32>; +def LogicalShifterOperand64 : LogicalShifterOperand<64>; + +// Shifter operand for logical vector 128/64-bit shifted encodings. +def LogicalVecShifterOperand : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "LogicalVecShifter"; + let RenderMethod = "addShifterOperands"; +} +def LogicalVecHalfWordShifterOperand : AsmOperandClass { + let SuperClasses = [LogicalVecShifterOperand]; + let Name = "LogicalVecHalfWordShifter"; + let RenderMethod = "addShifterOperands"; +} + +// The "MSL" shifter on the vector MOVI instruction. +def MoveVecShifterOperand : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "MoveVecShifter"; + let RenderMethod = "addShifterOperands"; +} + +// Extend operand for arithmetic encodings. +def ExtendOperand : AsmOperandClass { + let Name = "Extend"; + let DiagnosticType = "AddSubRegExtendLarge"; +} +def ExtendOperand64 : AsmOperandClass { + let SuperClasses = [ExtendOperand]; + let Name = "Extend64"; + let DiagnosticType = "AddSubRegExtendSmall"; +} +// 'extend' that's a lsl of a 64-bit register. +def ExtendOperandLSL64 : AsmOperandClass { + let SuperClasses = [ExtendOperand]; + let Name = "ExtendLSL64"; + let RenderMethod = "addExtend64Operands"; + let DiagnosticType = "AddSubRegExtendLarge"; +} + +// 8-bit floating-point immediate encodings. +def FPImmOperand : AsmOperandClass { + let Name = "FPImm"; + let ParserMethod = "tryParseFPImm"; + let DiagnosticType = "InvalidFPImm"; +} + +def CondCode : AsmOperandClass { + let Name = "CondCode"; + let DiagnosticType = "InvalidCondCode"; +} + +// A 32-bit register pasrsed as 64-bit +def GPR32as64Operand : AsmOperandClass { + let Name = "GPR32as64"; + let ParserMethod = + "tryParseGPROperand"; +} +def GPR32as64 : RegisterOperand { + let ParserMatchClass = GPR32as64Operand; +} + +// A 64-bit register pasrsed as 32-bit +def GPR64as32Operand : AsmOperandClass { + let Name = "GPR64as32"; + let ParserMethod = + "tryParseGPROperand"; +} +def GPR64as32 : RegisterOperand { + let ParserMatchClass = GPR64as32Operand; +} + +// 8-bit immediate for AdvSIMD where 64-bit values of the form: +// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh +// are encoded as the eight bit value 'abcdefgh'. +def SIMDImmType10Operand : AsmOperandClass { let Name = "SIMDImmType10"; } + +class UImmScaledMemoryIndexed : AsmOperandClass { + let Name = "UImm" # Width # "s" # Scale; + let DiagnosticType = "InvalidMemoryIndexed" # Scale # "UImm" # Width; + let RenderMethod = "addImmScaledOperands<" # Scale # ">"; + let PredicateMethod = "isUImmScaled<" # Width # ", " # Scale # ">"; +} + +class SImmScaledMemoryIndexed : AsmOperandClass { + let Name = "SImm" # Width # "s" # Scale; + let DiagnosticType = "InvalidMemoryIndexed" # Scale # "SImm" # Width; + let RenderMethod = "addImmScaledOperands<" # Scale # ">"; + let PredicateMethod = "isSImmScaled<" # Width # ", " # Scale # ">"; +} + +//===----------------------------------------------------------------------===// +// Operand Definitions. +// + +// ADR[P] instruction labels. +def AdrpOperand : AsmOperandClass { + let Name = "AdrpLabel"; + let ParserMethod = "tryParseAdrpLabel"; + let DiagnosticType = "InvalidLabel"; +} +def adrplabel : Operand { + let EncoderMethod = "getAdrLabelOpValue"; + let PrintMethod = "printAdrpLabel"; + let ParserMatchClass = AdrpOperand; +} + +def AdrOperand : AsmOperandClass { + let Name = "AdrLabel"; + let ParserMethod = "tryParseAdrLabel"; + let DiagnosticType = "InvalidLabel"; +} +def adrlabel : Operand { + let EncoderMethod = "getAdrLabelOpValue"; + let ParserMatchClass = AdrOperand; +} + +class SImmOperand : AsmOperandClass { + let Name = "SImm" # width; + let DiagnosticType = "InvalidMemoryIndexedSImm" # width; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isSImm<" # width # ">"; +} + +// Authenticated loads for v8.3 can have scaled 10-bit immediate offsets. +def SImm10s8Operand : SImmScaledMemoryIndexed<10, 8>; +def simm10Scaled : Operand { + let ParserMatchClass = SImm10s8Operand; + let DecoderMethod = "DecodeSImm<10>"; + let PrintMethod = "printImmScale<8>"; +} + +// uimm6 predicate - True if the immediate is in the range [0, 63]. +def UImm6Operand : AsmOperandClass { + let Name = "UImm6"; + let DiagnosticType = "InvalidImm0_63"; +} + +def uimm6 : Operand, ImmLeaf= 0 && Imm < 64; }]> { + let ParserMatchClass = UImm6Operand; +} + +def SImm9Operand : SImmOperand<9>; +def simm9 : Operand, ImmLeaf= -256 && Imm < 256; }]> { + let ParserMatchClass = SImm9Operand; + let DecoderMethod = "DecodeSImm<9>"; +} + +def SImm8Operand : SImmOperand<8>; +def simm8 : Operand, ImmLeaf= -128 && Imm < 127; }]> { + let ParserMatchClass = SImm8Operand; + let DecoderMethod = "DecodeSImm<8>"; +} + +def SImm6Operand : SImmOperand<6>; +def simm6_32b : Operand, ImmLeaf= -32 && Imm < 32; }]> { + let ParserMatchClass = SImm6Operand; + let DecoderMethod = "DecodeSImm<6>"; +} + +def SImm5Operand : SImmOperand<5>; +def simm5_64b : Operand, ImmLeaf= -16 && Imm < 16; }]> { + let ParserMatchClass = SImm5Operand; + let DecoderMethod = "DecodeSImm<5>"; +} + +def simm5_32b : Operand, ImmLeaf= -16 && Imm < 16; }]> { + let ParserMatchClass = SImm5Operand; + let DecoderMethod = "DecodeSImm<5>"; +} + +// simm7sN predicate - True if the immediate is a multiple of N in the range +// [-64 * N, 63 * N]. + +def SImm7s4Operand : SImmScaledMemoryIndexed<7, 4>; +def SImm7s8Operand : SImmScaledMemoryIndexed<7, 8>; +def SImm7s16Operand : SImmScaledMemoryIndexed<7, 16>; + +def simm7s4 : Operand { + let ParserMatchClass = SImm7s4Operand; + let PrintMethod = "printImmScale<4>"; +} + +def simm7s8 : Operand { + let ParserMatchClass = SImm7s8Operand; + let PrintMethod = "printImmScale<8>"; +} + +def simm7s16 : Operand { + let ParserMatchClass = SImm7s16Operand; + let PrintMethod = "printImmScale<16>"; +} + +def am_indexed7s8 : ComplexPattern; +def am_indexed7s16 : ComplexPattern; +def am_indexed7s32 : ComplexPattern; +def am_indexed7s64 : ComplexPattern; +def am_indexed7s128 : ComplexPattern; + +// uimm5sN predicate - True if the immediate is a multiple of N in the range +// [0 * N, 32 * N]. +def UImm5s2Operand : UImmScaledMemoryIndexed<5, 2>; +def UImm5s4Operand : UImmScaledMemoryIndexed<5, 4>; +def UImm5s8Operand : UImmScaledMemoryIndexed<5, 8>; + +def uimm5s2 : Operand, ImmLeaf= 0 && Imm < (32*2) && ((Imm % 2) == 0); }]> { + let ParserMatchClass = UImm5s2Operand; + let PrintMethod = "printImmScale<2>"; +} +def uimm5s4 : Operand, ImmLeaf= 0 && Imm < (32*4) && ((Imm % 4) == 0); }]> { + let ParserMatchClass = UImm5s4Operand; + let PrintMethod = "printImmScale<4>"; +} +def uimm5s8 : Operand, ImmLeaf= 0 && Imm < (32*8) && ((Imm % 8) == 0); }]> { + let ParserMatchClass = UImm5s8Operand; + let PrintMethod = "printImmScale<8>"; +} + +// uimm6sN predicate - True if the immediate is a multiple of N in the range +// [0 * N, 64 * N]. +def UImm6s1Operand : UImmScaledMemoryIndexed<6, 1>; +def UImm6s2Operand : UImmScaledMemoryIndexed<6, 2>; +def UImm6s4Operand : UImmScaledMemoryIndexed<6, 4>; +def UImm6s8Operand : UImmScaledMemoryIndexed<6, 8>; + +def uimm6s1 : Operand, ImmLeaf= 0 && Imm < 64; }]> { + let ParserMatchClass = UImm6s1Operand; +} +def uimm6s2 : Operand, ImmLeaf= 0 && Imm < (64*2) && ((Imm % 2) == 0); }]> { + let PrintMethod = "printImmScale<2>"; + let ParserMatchClass = UImm6s2Operand; +} +def uimm6s4 : Operand, ImmLeaf= 0 && Imm < (64*4) && ((Imm % 4) == 0); }]> { + let PrintMethod = "printImmScale<4>"; + let ParserMatchClass = UImm6s4Operand; +} +def uimm6s8 : Operand, ImmLeaf= 0 && Imm < (64*8) && ((Imm % 8) == 0); }]> { + let PrintMethod = "printImmScale<8>"; + let ParserMatchClass = UImm6s8Operand; +} + +// simm6sN predicate - True if the immediate is a multiple of N in the range +// [-32 * N, 31 * N]. +def SImm6s1Operand : SImmScaledMemoryIndexed<6, 1>; +def simm6s1 : Operand, ImmLeaf= -32 && Imm < 32; }]> { + let ParserMatchClass = SImm6s1Operand; + let DecoderMethod = "DecodeSImm<6>"; +} + +// simm4sN predicate - True if the immediate is a multiple of N in the range +// [ -8* N, 7 * N]. +def SImm4s1Operand : SImmScaledMemoryIndexed<4, 1>; +def SImm4s2Operand : SImmScaledMemoryIndexed<4, 2>; +def SImm4s3Operand : SImmScaledMemoryIndexed<4, 3>; +def SImm4s4Operand : SImmScaledMemoryIndexed<4, 4>; +def SImm4s16Operand : SImmScaledMemoryIndexed<4, 16>; + +def simm4s1 : Operand, ImmLeaf=-8 && Imm <= 7; }]> { + let ParserMatchClass = SImm4s1Operand; + let DecoderMethod = "DecodeSImm<4>"; +} + +def simm4s2 : Operand, ImmLeaf=-16 && Imm <= 14 && (Imm % 2) == 0x0; }]> { + let PrintMethod = "printImmScale<2>"; + let ParserMatchClass = SImm4s2Operand; + let DecoderMethod = "DecodeSImm<4>"; +} + +def simm4s3 : Operand, ImmLeaf=-24 && Imm <= 21 && (Imm % 3) == 0x0; }]> { + let PrintMethod = "printImmScale<3>"; + let ParserMatchClass = SImm4s3Operand; + let DecoderMethod = "DecodeSImm<4>"; +} + +def simm4s4 : Operand, ImmLeaf=-32 && Imm <= 28 && (Imm % 4) == 0x0; }]> { + let PrintMethod = "printImmScale<4>"; + let ParserMatchClass = SImm4s4Operand; + let DecoderMethod = "DecodeSImm<4>"; +} +def simm4s16 : Operand, ImmLeaf=-128 && Imm <= 112 && (Imm % 16) == 0x0; }]> { + let PrintMethod = "printImmScale<16>"; + let ParserMatchClass = SImm4s16Operand; + let DecoderMethod = "DecodeSImm<4>"; +} + +class AsmImmRange : AsmOperandClass { + let Name = "Imm" # Low # "_" # High; + let DiagnosticType = "InvalidImm" # Low # "_" # High; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isImmInRange<" # Low # "," # High # ">"; +} + +def Imm1_8Operand : AsmImmRange<1, 8>; +def Imm1_16Operand : AsmImmRange<1, 16>; +def Imm1_32Operand : AsmImmRange<1, 32>; +def Imm1_64Operand : AsmImmRange<1, 64>; + +class BranchTarget : AsmOperandClass { + let Name = "BranchTarget" # N; + let DiagnosticType = "InvalidLabel"; + let PredicateMethod = "isBranchTarget<" # N # ">"; +} + +class PCRelLabel : BranchTarget { + let Name = "PCRelLabel" # N; +} + +def BranchTarget14Operand : BranchTarget<14>; +def BranchTarget26Operand : BranchTarget<26>; +def PCRelLabel19Operand : PCRelLabel<19>; + +def MovZSymbolG3AsmOperand : AsmOperandClass { + let Name = "MovZSymbolG3"; + let RenderMethod = "addImmOperands"; +} + +def movz_symbol_g3 : Operand { + let ParserMatchClass = MovZSymbolG3AsmOperand; +} + +def MovZSymbolG2AsmOperand : AsmOperandClass { + let Name = "MovZSymbolG2"; + let RenderMethod = "addImmOperands"; +} + +def movz_symbol_g2 : Operand { + let ParserMatchClass = MovZSymbolG2AsmOperand; +} + +def MovZSymbolG1AsmOperand : AsmOperandClass { + let Name = "MovZSymbolG1"; + let RenderMethod = "addImmOperands"; +} + +def movz_symbol_g1 : Operand { + let ParserMatchClass = MovZSymbolG1AsmOperand; +} + +def MovZSymbolG0AsmOperand : AsmOperandClass { + let Name = "MovZSymbolG0"; + let RenderMethod = "addImmOperands"; +} + +def movz_symbol_g0 : Operand { + let ParserMatchClass = MovZSymbolG0AsmOperand; +} + +def MovKSymbolG3AsmOperand : AsmOperandClass { + let Name = "MovKSymbolG3"; + let RenderMethod = "addImmOperands"; +} + +def movk_symbol_g3 : Operand { + let ParserMatchClass = MovKSymbolG3AsmOperand; +} + +def MovKSymbolG2AsmOperand : AsmOperandClass { + let Name = "MovKSymbolG2"; + let RenderMethod = "addImmOperands"; +} + +def movk_symbol_g2 : Operand { + let ParserMatchClass = MovKSymbolG2AsmOperand; +} + +def MovKSymbolG1AsmOperand : AsmOperandClass { + let Name = "MovKSymbolG1"; + let RenderMethod = "addImmOperands"; +} + +def movk_symbol_g1 : Operand { + let ParserMatchClass = MovKSymbolG1AsmOperand; +} + +def MovKSymbolG0AsmOperand : AsmOperandClass { + let Name = "MovKSymbolG0"; + let RenderMethod = "addImmOperands"; +} + +def movk_symbol_g0 : Operand { + let ParserMatchClass = MovKSymbolG0AsmOperand; +} + +class fixedpoint_i32 + : Operand, + ComplexPattern", [fpimm, ld]> { + let EncoderMethod = "getFixedPointScaleOpValue"; + let DecoderMethod = "DecodeFixedPointScaleImm32"; + let ParserMatchClass = Imm1_32Operand; +} + +class fixedpoint_i64 + : Operand, + ComplexPattern", [fpimm, ld]> { + let EncoderMethod = "getFixedPointScaleOpValue"; + let DecoderMethod = "DecodeFixedPointScaleImm64"; + let ParserMatchClass = Imm1_64Operand; +} + +def fixedpoint_f16_i32 : fixedpoint_i32; +def fixedpoint_f32_i32 : fixedpoint_i32; +def fixedpoint_f64_i32 : fixedpoint_i32; + +def fixedpoint_f16_i64 : fixedpoint_i64; +def fixedpoint_f32_i64 : fixedpoint_i64; +def fixedpoint_f64_i64 : fixedpoint_i64; + +def vecshiftR8 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 9); +}]> { + let EncoderMethod = "getVecShiftR8OpValue"; + let DecoderMethod = "DecodeVecShiftR8Imm"; + let ParserMatchClass = Imm1_8Operand; +} +def vecshiftR16 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 17); +}]> { + let EncoderMethod = "getVecShiftR16OpValue"; + let DecoderMethod = "DecodeVecShiftR16Imm"; + let ParserMatchClass = Imm1_16Operand; +} +def vecshiftR16Narrow : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 9); +}]> { + let EncoderMethod = "getVecShiftR16OpValue"; + let DecoderMethod = "DecodeVecShiftR16ImmNarrow"; + let ParserMatchClass = Imm1_8Operand; +} +def vecshiftR32 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 33); +}]> { + let EncoderMethod = "getVecShiftR32OpValue"; + let DecoderMethod = "DecodeVecShiftR32Imm"; + let ParserMatchClass = Imm1_32Operand; +} +def vecshiftR32Narrow : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 17); +}]> { + let EncoderMethod = "getVecShiftR32OpValue"; + let DecoderMethod = "DecodeVecShiftR32ImmNarrow"; + let ParserMatchClass = Imm1_16Operand; +} +def vecshiftR64 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 65); +}]> { + let EncoderMethod = "getVecShiftR64OpValue"; + let DecoderMethod = "DecodeVecShiftR64Imm"; + let ParserMatchClass = Imm1_64Operand; +} +def vecshiftR64Narrow : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 33); +}]> { + let EncoderMethod = "getVecShiftR64OpValue"; + let DecoderMethod = "DecodeVecShiftR64ImmNarrow"; + let ParserMatchClass = Imm1_32Operand; +} + +def Imm0_1Operand : AsmImmRange<0, 1>; +def Imm0_7Operand : AsmImmRange<0, 7>; +def Imm0_15Operand : AsmImmRange<0, 15>; +def Imm0_31Operand : AsmImmRange<0, 31>; +def Imm0_63Operand : AsmImmRange<0, 63>; + +def vecshiftL8 : Operand, ImmLeaf { + let EncoderMethod = "getVecShiftL8OpValue"; + let DecoderMethod = "DecodeVecShiftL8Imm"; + let ParserMatchClass = Imm0_7Operand; +} +def vecshiftL16 : Operand, ImmLeaf { + let EncoderMethod = "getVecShiftL16OpValue"; + let DecoderMethod = "DecodeVecShiftL16Imm"; + let ParserMatchClass = Imm0_15Operand; +} +def vecshiftL32 : Operand, ImmLeaf { + let EncoderMethod = "getVecShiftL32OpValue"; + let DecoderMethod = "DecodeVecShiftL32Imm"; + let ParserMatchClass = Imm0_31Operand; +} +def vecshiftL64 : Operand, ImmLeaf { + let EncoderMethod = "getVecShiftL64OpValue"; + let DecoderMethod = "DecodeVecShiftL64Imm"; + let ParserMatchClass = Imm0_63Operand; +} + + +// Crazy immediate formats used by 32-bit and 64-bit logical immediate +// instructions for splatting repeating bit patterns across the immediate. +def logical_imm32_XFORM : SDNodeXFormgetZExtValue(), 32); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); +}]>; +def logical_imm64_XFORM : SDNodeXFormgetZExtValue(), 64); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); +}]>; + +let DiagnosticType = "LogicalSecondSource" in { + def LogicalImm32Operand : AsmOperandClass { + let Name = "LogicalImm32"; + let PredicateMethod = "isLogicalImm"; + let RenderMethod = "addLogicalImmOperands"; + } + def LogicalImm64Operand : AsmOperandClass { + let Name = "LogicalImm64"; + let PredicateMethod = "isLogicalImm"; + let RenderMethod = "addLogicalImmOperands"; + } + def LogicalImm32NotOperand : AsmOperandClass { + let Name = "LogicalImm32Not"; + let PredicateMethod = "isLogicalImm"; + let RenderMethod = "addLogicalImmNotOperands"; + } + def LogicalImm64NotOperand : AsmOperandClass { + let Name = "LogicalImm64Not"; + let PredicateMethod = "isLogicalImm"; + let RenderMethod = "addLogicalImmNotOperands"; + } +} +def logical_imm32 : Operand, IntImmLeaf { + let PrintMethod = "printLogicalImm"; + let ParserMatchClass = LogicalImm32Operand; +} +def logical_imm64 : Operand, IntImmLeaf { + let PrintMethod = "printLogicalImm"; + let ParserMatchClass = LogicalImm64Operand; +} +def logical_imm32_not : Operand { + let ParserMatchClass = LogicalImm32NotOperand; +} +def logical_imm64_not : Operand { + let ParserMatchClass = LogicalImm64NotOperand; +} + +// imm0_65535 predicate - True if the immediate is in the range [0,65535]. +def Imm0_65535Operand : AsmImmRange<0, 65535>; +def imm0_65535 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_65535Operand; + let PrintMethod = "printImmHex"; +} + +// imm0_255 predicate - True if the immediate is in the range [0,255]. +def Imm0_255Operand : AsmImmRange<0,255>; + +def imm0_255 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_255Operand; + let PrintMethod = "printImm"; +} + +// imm0_127 predicate - True if the immediate is in the range [0,127] +def Imm0_127Operand : AsmImmRange<0, 127>; +def imm0_127 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_127Operand; + let PrintMethod = "printImm"; +} + +// NOTE: These imm0_N operands have to be of type i64 because i64 is the size +// for all shift-amounts. + +// imm0_63 predicate - True if the immediate is in the range [0,63] +def imm0_63 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_63Operand; +} + +// imm0_31 predicate - True if the immediate is in the range [0,31] +def imm0_31 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_31Operand; +} + +// True if the 32-bit immediate is in the range [0,31] +def imm32_0_31 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_31Operand; +} + +// imm0_1 predicate - True if the immediate is in the range [0,1] +def imm0_1 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_1Operand; +} + +// imm0_15 predicate - True if the immediate is in the range [0,15] +def imm0_15 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_15Operand; +} + +// imm0_7 predicate - True if the immediate is in the range [0,7] +def imm0_7 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_7Operand; +} + +// imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15] +def imm32_0_15 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_15Operand; +} + +// An arithmetic shifter operand: +// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr +// {5-0} - imm6 +class arith_shift : Operand { + let PrintMethod = "printShifter"; + let ParserMatchClass = !cast( + "ArithmeticShifterOperand" # width); +} + +def arith_shift32 : arith_shift; +def arith_shift64 : arith_shift; + +class arith_shifted_reg + : Operand, + ComplexPattern { + let PrintMethod = "printShiftedRegister"; + let MIOperandInfo = (ops regclass, !cast("arith_shift" # width)); +} + +def arith_shifted_reg32 : arith_shifted_reg; +def arith_shifted_reg64 : arith_shifted_reg; + +// An arithmetic shifter operand: +// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr, 11 = ror +// {5-0} - imm6 +class logical_shift : Operand { + let PrintMethod = "printShifter"; + let ParserMatchClass = !cast( + "LogicalShifterOperand" # width); +} + +def logical_shift32 : logical_shift<32>; +def logical_shift64 : logical_shift<64>; + +class logical_shifted_reg + : Operand, + ComplexPattern { + let PrintMethod = "printShiftedRegister"; + let MIOperandInfo = (ops regclass, shiftop); +} + +def logical_shifted_reg32 : logical_shifted_reg; +def logical_shifted_reg64 : logical_shifted_reg; + +// A logical vector shifter operand: +// {7-6} - shift type: 00 = lsl +// {5-0} - imm6: #0, #8, #16, or #24 +def logical_vec_shift : Operand { + let PrintMethod = "printShifter"; + let EncoderMethod = "getVecShifterOpValue"; + let ParserMatchClass = LogicalVecShifterOperand; +} + +// A logical vector half-word shifter operand: +// {7-6} - shift type: 00 = lsl +// {5-0} - imm6: #0 or #8 +def logical_vec_hw_shift : Operand { + let PrintMethod = "printShifter"; + let EncoderMethod = "getVecShifterOpValue"; + let ParserMatchClass = LogicalVecHalfWordShifterOperand; +} + +// A vector move shifter operand: +// {0} - imm1: #8 or #16 +def move_vec_shift : Operand { + let PrintMethod = "printShifter"; + let EncoderMethod = "getMoveVecShifterOpValue"; + let ParserMatchClass = MoveVecShifterOperand; +} + +let DiagnosticType = "AddSubSecondSource" in { + def AddSubImmOperand : AsmOperandClass { + let Name = "AddSubImm"; + let ParserMethod = "tryParseImmWithOptionalShift"; + let RenderMethod = "addImmWithOptionalShiftOperands<12>"; + } + def AddSubImmNegOperand : AsmOperandClass { + let Name = "AddSubImmNeg"; + let ParserMethod = "tryParseImmWithOptionalShift"; + let RenderMethod = "addImmNegWithOptionalShiftOperands<12>"; + } +} +// An ADD/SUB immediate shifter operand: +// second operand: +// {7-6} - shift type: 00 = lsl +// {5-0} - imm6: #0 or #12 +class addsub_shifted_imm + : Operand, ComplexPattern { + let PrintMethod = "printAddSubImm"; + let EncoderMethod = "getAddSubImmOpValue"; + let ParserMatchClass = AddSubImmOperand; + let MIOperandInfo = (ops i32imm, i32imm); +} + +class addsub_shifted_imm_neg + : Operand { + let EncoderMethod = "getAddSubImmOpValue"; + let ParserMatchClass = AddSubImmNegOperand; + let MIOperandInfo = (ops i32imm, i32imm); +} + +def addsub_shifted_imm32 : addsub_shifted_imm; +def addsub_shifted_imm64 : addsub_shifted_imm; +def addsub_shifted_imm32_neg : addsub_shifted_imm_neg; +def addsub_shifted_imm64_neg : addsub_shifted_imm_neg; + +def gi_addsub_shifted_imm32 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def gi_addsub_shifted_imm64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +class neg_addsub_shifted_imm + : Operand, ComplexPattern { + let PrintMethod = "printAddSubImm"; + let EncoderMethod = "getAddSubImmOpValue"; + let ParserMatchClass = AddSubImmOperand; + let MIOperandInfo = (ops i32imm, i32imm); +} + +def neg_addsub_shifted_imm32 : neg_addsub_shifted_imm; +def neg_addsub_shifted_imm64 : neg_addsub_shifted_imm; + +// An extend operand: +// {5-3} - extend type +// {2-0} - imm3 +def arith_extend : Operand { + let PrintMethod = "printArithExtend"; + let ParserMatchClass = ExtendOperand; +} +def arith_extend64 : Operand { + let PrintMethod = "printArithExtend"; + let ParserMatchClass = ExtendOperand64; +} + +// 'extend' that's a lsl of a 64-bit register. +def arith_extendlsl64 : Operand { + let PrintMethod = "printArithExtend"; + let ParserMatchClass = ExtendOperandLSL64; +} + +class arith_extended_reg32 : Operand, + ComplexPattern { + let PrintMethod = "printExtendedRegister"; + let MIOperandInfo = (ops GPR32, arith_extend); +} + +class arith_extended_reg32to64 : Operand, + ComplexPattern { + let PrintMethod = "printExtendedRegister"; + let MIOperandInfo = (ops GPR32, arith_extend64); +} + +// Floating-point immediate. +def fpimm16 : Operand, + FPImmLeafgetValueAPF(); + uint32_t enc = AArch64_AM::getFP16Imm(InVal); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); + }]>> { + let ParserMatchClass = FPImmOperand; + let PrintMethod = "printFPImmOperand"; +} +def fpimm32 : Operand, + FPImmLeafgetValueAPF(); + uint32_t enc = AArch64_AM::getFP32Imm(InVal); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); + }]>> { + let ParserMatchClass = FPImmOperand; + let PrintMethod = "printFPImmOperand"; +} +def fpimm64 : Operand, + FPImmLeafgetValueAPF(); + uint32_t enc = AArch64_AM::getFP64Imm(InVal); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); + }]>> { + let ParserMatchClass = FPImmOperand; + let PrintMethod = "printFPImmOperand"; +} + +def fpimm8 : Operand { + let ParserMatchClass = FPImmOperand; + let PrintMethod = "printFPImmOperand"; +} + +def fpimm0 : FPImmLeaf; + +// Vector lane operands +class AsmVectorIndex : AsmOperandClass { + let Name = NamePrefix # "IndexRange" # Min # "_" # Max; + let DiagnosticType = "Invalid" # Name; + let PredicateMethod = "isVectorIndex<" # Min # ", " # Max # ">"; + let RenderMethod = "addVectorIndexOperands"; +} + +class AsmVectorIndexOpnd + : Operand, ImmLeaf { + let ParserMatchClass = mc; + let PrintMethod = "printVectorIndex"; +} + +def VectorIndex1Operand : AsmVectorIndex<1, 1>; +def VectorIndexBOperand : AsmVectorIndex<0, 15>; +def VectorIndexHOperand : AsmVectorIndex<0, 7>; +def VectorIndexSOperand : AsmVectorIndex<0, 3>; +def VectorIndexDOperand : AsmVectorIndex<0, 1>; + +def VectorIndex1 : AsmVectorIndexOpnd; +def VectorIndexB : AsmVectorIndexOpnd; +def VectorIndexH : AsmVectorIndexOpnd; +def VectorIndexS : AsmVectorIndexOpnd; +def VectorIndexD : AsmVectorIndexOpnd; + +def SVEVectorIndexExtDupBOperand : AsmVectorIndex<0, 63, "SVE">; +def SVEVectorIndexExtDupHOperand : AsmVectorIndex<0, 31, "SVE">; +def SVEVectorIndexExtDupSOperand : AsmVectorIndex<0, 15, "SVE">; +def SVEVectorIndexExtDupDOperand : AsmVectorIndex<0, 7, "SVE">; +def SVEVectorIndexExtDupQOperand : AsmVectorIndex<0, 3, "SVE">; + +def sve_elm_idx_extdup_b + : AsmVectorIndexOpnd; +def sve_elm_idx_extdup_h + : AsmVectorIndexOpnd; +def sve_elm_idx_extdup_s + : AsmVectorIndexOpnd; +def sve_elm_idx_extdup_d + : AsmVectorIndexOpnd; +def sve_elm_idx_extdup_q + : AsmVectorIndexOpnd; + +// 8-bit immediate for AdvSIMD where 64-bit values of the form: +// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh +// are encoded as the eight bit value 'abcdefgh'. +def simdimmtype10 : Operand, + FPImmLeafgetValueAPF(); + uint32_t enc = AArch64_AM::encodeAdvSIMDModImmType10(N->getValueAPF() + .bitcastToAPInt() + .getZExtValue()); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); + }]>> { + let ParserMatchClass = SIMDImmType10Operand; + let PrintMethod = "printSIMDType10Operand"; +} + + +//--- +// System management +//--- + +// Base encoding for system instruction operands. +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in +class BaseSystemI pattern = []> + : I { + let Inst{31-22} = 0b1101010100; + let Inst{21} = L; +} + +// System instructions which do not have an Rt register. +class SimpleSystemI pattern = []> + : BaseSystemI { + let Inst{4-0} = 0b11111; +} + +// System instructions which have an Rt register. +class RtSystemI + : BaseSystemI, + Sched<[WriteSys]> { + bits<5> Rt; + let Inst{4-0} = Rt; +} + +// Hint instructions that take both a CRm and a 3-bit immediate. +// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot +// model patterns with sufficiently fine granularity +let mayStore = 1, mayLoad = 1, hasSideEffects = 1 in + class HintI + : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#"\t$imm", "", + [(int_aarch64_hint imm0_127:$imm)]>, + Sched<[WriteHint]> { + bits <7> imm; + let Inst{20-12} = 0b000110010; + let Inst{11-5} = imm; + } + +// System instructions taking a single literal operand which encodes into +// CRm. op2 differentiates the opcodes. +def BarrierAsmOperand : AsmOperandClass { + let Name = "Barrier"; + let ParserMethod = "tryParseBarrierOperand"; +} +def barrier_op : Operand { + let PrintMethod = "printBarrierOption"; + let ParserMatchClass = BarrierAsmOperand; +} +class CRmSystemI opc, string asm, + list pattern = []> + : SimpleSystemI<0, (ins crmtype:$CRm), asm, "\t$CRm", pattern>, + Sched<[WriteBarrier]> { + bits<4> CRm; + let Inst{20-12} = 0b000110011; + let Inst{11-8} = CRm; + let Inst{7-5} = opc; +} + +class SystemNoOperands op2, string asm, list pattern = []> + : SimpleSystemI<0, (ins), asm, "", pattern>, + Sched<[]> { + bits<4> CRm; + let CRm = 0b0011; + let Inst{31-12} = 0b11010101000000110010; + let Inst{11-8} = CRm; + let Inst{7-5} = op2; + let Inst{4-0} = 0b11111; +} + +// MRS/MSR system instructions. These have different operand classes because +// a different subset of registers can be accessed through each instruction. +def MRSSystemRegisterOperand : AsmOperandClass { + let Name = "MRSSystemRegister"; + let ParserMethod = "tryParseSysReg"; + let DiagnosticType = "MRS"; +} +// concatenation of op0, op1, CRn, CRm, op2. 16-bit immediate. +def mrs_sysreg_op : Operand { + let ParserMatchClass = MRSSystemRegisterOperand; + let DecoderMethod = "DecodeMRSSystemRegister"; + let PrintMethod = "printMRSSystemRegister"; +} + +def MSRSystemRegisterOperand : AsmOperandClass { + let Name = "MSRSystemRegister"; + let ParserMethod = "tryParseSysReg"; + let DiagnosticType = "MSR"; +} +def msr_sysreg_op : Operand { + let ParserMatchClass = MSRSystemRegisterOperand; + let DecoderMethod = "DecodeMSRSystemRegister"; + let PrintMethod = "printMSRSystemRegister"; +} + +def PSBHintOperand : AsmOperandClass { + let Name = "PSBHint"; + let ParserMethod = "tryParsePSBHint"; +} +def psbhint_op : Operand { + let ParserMatchClass = PSBHintOperand; + let PrintMethod = "printPSBHintOp"; + let MCOperandPredicate = [{ + // Check, if operand is valid, to fix exhaustive aliasing in disassembly. + // "psb" is an alias to "hint" only for certain values of CRm:Op2 fields. + if (!MCOp.isImm()) + return false; + return AArch64PSBHint::lookupPSBByEncoding(MCOp.getImm()) != nullptr; + }]; +} + +class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg), + "mrs", "\t$Rt, $systemreg"> { + bits<16> systemreg; + let Inst{20-5} = systemreg; +} + +// FIXME: Some of these def NZCV, others don't. Best way to model that? +// Explicitly modeling each of the system register as a register class +// would do it, but feels like overkill at this point. +class MSRI : RtSystemI<0, (outs), (ins msr_sysreg_op:$systemreg, GPR64:$Rt), + "msr", "\t$systemreg, $Rt"> { + bits<16> systemreg; + let Inst{20-5} = systemreg; +} + +def SystemPStateFieldWithImm0_15Operand : AsmOperandClass { + let Name = "SystemPStateFieldWithImm0_15"; + let ParserMethod = "tryParseSysReg"; +} +def pstatefield4_op : Operand { + let ParserMatchClass = SystemPStateFieldWithImm0_15Operand; + let PrintMethod = "printSystemPStateField"; +} + +let Defs = [NZCV] in +class MSRpstateImm0_15 + : SimpleSystemI<0, (ins pstatefield4_op:$pstatefield, imm0_15:$imm), + "msr", "\t$pstatefield, $imm">, + Sched<[WriteSys]> { + bits<6> pstatefield; + bits<4> imm; + let Inst{20-19} = 0b00; + let Inst{18-16} = pstatefield{5-3}; + let Inst{15-12} = 0b0100; + let Inst{11-8} = imm; + let Inst{7-5} = pstatefield{2-0}; + + let DecoderMethod = "DecodeSystemPStateInstruction"; + // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns + // Fail the decoder should attempt to decode the instruction as MSRI. + let hasCompleteDecoder = 0; +} + +def SystemPStateFieldWithImm0_1Operand : AsmOperandClass { + let Name = "SystemPStateFieldWithImm0_1"; + let ParserMethod = "tryParseSysReg"; +} +def pstatefield1_op : Operand { + let ParserMatchClass = SystemPStateFieldWithImm0_1Operand; + let PrintMethod = "printSystemPStateField"; +} + +let Defs = [NZCV] in +class MSRpstateImm0_1 + : SimpleSystemI<0, (ins pstatefield1_op:$pstatefield, imm0_1:$imm), + "msr", "\t$pstatefield, $imm">, + Sched<[WriteSys]> { + bits<6> pstatefield; + bit imm; + let Inst{20-19} = 0b00; + let Inst{18-16} = pstatefield{5-3}; + let Inst{15-9} = 0b0100000; + let Inst{8} = imm; + let Inst{7-5} = pstatefield{2-0}; + + let DecoderMethod = "DecodeSystemPStateInstruction"; + // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns + // Fail the decoder should attempt to decode the instruction as MSRI. + let hasCompleteDecoder = 0; +} + +// SYS and SYSL generic system instructions. +def SysCRAsmOperand : AsmOperandClass { + let Name = "SysCR"; + let ParserMethod = "tryParseSysCROperand"; +} + +def sys_cr_op : Operand { + let PrintMethod = "printSysCROperand"; + let ParserMatchClass = SysCRAsmOperand; +} + +class SystemXtI + : RtSystemI { + bits<3> op1; + bits<4> Cn; + bits<4> Cm; + bits<3> op2; + let Inst{20-19} = 0b01; + let Inst{18-16} = op1; + let Inst{15-12} = Cn; + let Inst{11-8} = Cm; + let Inst{7-5} = op2; +} + +class SystemLXtI + : RtSystemI { + bits<3> op1; + bits<4> Cn; + bits<4> Cm; + bits<3> op2; + let Inst{20-19} = 0b01; + let Inst{18-16} = op1; + let Inst{15-12} = Cn; + let Inst{11-8} = Cm; + let Inst{7-5} = op2; +} + + +// Branch (register) instructions: +// +// case opc of +// 0001 blr +// 0000 br +// 0101 dret +// 0100 eret +// 0010 ret +// otherwise UNDEFINED +class BaseBranchReg opc, dag oops, dag iops, string asm, + string operands, list pattern> + : I, Sched<[WriteBrReg]> { + let Inst{31-25} = 0b1101011; + let Inst{24-21} = opc; + let Inst{20-16} = 0b11111; + let Inst{15-10} = 0b000000; + let Inst{4-0} = 0b00000; +} + +class BranchReg opc, string asm, list pattern> + : BaseBranchReg { + bits<5> Rn; + let Inst{9-5} = Rn; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 1, isReturn = 1 in +class SpecialReturn opc, string asm> + : BaseBranchReg { + let Inst{9-5} = 0b11111; +} + +let mayLoad = 1 in +class RCPCLoad sz, string asm, RegisterClass RC> + : I<(outs RC:$Rt), (ins GPR64sp0:$Rn), asm, "\t$Rt, [$Rn]", "", []>, + Sched<[]> { + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = sz; + let Inst{29-10} = 0b11100010111111110000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +class AuthBase M, dag oops, dag iops, string asm, string operands, + list pattern> + : I, Sched<[]> { + let Inst{31-25} = 0b1101011; + let Inst{20-11} = 0b1111100001; + let Inst{10} = M; + let Inst{4-0} = 0b11111; +} + +class AuthBranchTwoOperands op, bits<1> M, string asm> + : AuthBase { + bits<5> Rn; + bits<5> Rm; + let Inst{24-22} = 0b100; + let Inst{21} = op; + let Inst{9-5} = Rn; + let Inst{4-0} = Rm; +} + +class AuthOneOperand opc, bits<1> M, string asm> + : AuthBase { + bits<5> Rn; + let Inst{24} = 0; + let Inst{23-21} = opc; + let Inst{9-5} = Rn; +} + +class AuthReturn op, bits<1> M, string asm> + : AuthBase { + let Inst{24} = 0; + let Inst{23-21} = op; + let Inst{9-0} = 0b1111111111; +} + +let mayLoad = 1 in +class BaseAuthLoad + : I, Sched<[]> { + bits<10> offset; + bits<5> Rn; + bits<5> Rt; + let Inst{31-24} = 0b11111000; + let Inst{23} = M; + let Inst{22} = offset{9}; + let Inst{21} = 1; + let Inst{20-12} = offset{8-0}; + let Inst{11} = W; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass AuthLoad { + def indexed : BaseAuthLoad; + def writeback : BaseAuthLoad; + + def : InstAlias(NAME # "indexed") GPR64:$Rt, GPR64sp:$Rn, 0)>; +} + +//--- +// Conditional branch instruction. +//--- + +// Condition code. +// 4-bit immediate. Pretty-printed as +def ccode : Operand { + let PrintMethod = "printCondCode"; + let ParserMatchClass = CondCode; +} +def inv_ccode : Operand { + // AL and NV are invalid in the aliases which use inv_ccode + let PrintMethod = "printInverseCondCode"; + let ParserMatchClass = CondCode; + let MCOperandPredicate = [{ + return MCOp.isImm() && + MCOp.getImm() != AArch64CC::AL && + MCOp.getImm() != AArch64CC::NV; + }]; +} + +// Conditional branch target. 19-bit immediate. The low two bits of the target +// offset are implied zero and so are not part of the immediate. +def am_brcond : Operand { + let EncoderMethod = "getCondBranchTargetOpValue"; + let DecoderMethod = "DecodePCRelLabel19"; + let PrintMethod = "printAlignedLabel"; + let ParserMatchClass = PCRelLabel19Operand; + let OperandType = "OPERAND_PCREL"; +} + +class BranchCond : I<(outs), (ins ccode:$cond, am_brcond:$target), + "b", ".$cond\t$target", "", + [(AArch64brcond bb:$target, imm:$cond, NZCV)]>, + Sched<[WriteBr]> { + let isBranch = 1; + let isTerminator = 1; + let Uses = [NZCV]; + + bits<4> cond; + bits<19> target; + let Inst{31-24} = 0b01010100; + let Inst{23-5} = target; + let Inst{4} = 0; + let Inst{3-0} = cond; +} + +//--- +// Compare-and-branch instructions. +//--- +class BaseCmpBranch + : I<(outs), (ins regtype:$Rt, am_brcond:$target), + asm, "\t$Rt, $target", "", + [(node regtype:$Rt, bb:$target)]>, + Sched<[WriteBr]> { + let isBranch = 1; + let isTerminator = 1; + + bits<5> Rt; + bits<19> target; + let Inst{30-25} = 0b011010; + let Inst{24} = op; + let Inst{23-5} = target; + let Inst{4-0} = Rt; +} + +multiclass CmpBranch { + def W : BaseCmpBranch { + let Inst{31} = 0; + } + def X : BaseCmpBranch { + let Inst{31} = 1; + } +} + +//--- +// Test-bit-and-branch instructions. +//--- +// Test-and-branch target. 14-bit sign-extended immediate. The low two bits of +// the target offset are implied zero and so are not part of the immediate. +def am_tbrcond : Operand { + let EncoderMethod = "getTestBranchTargetOpValue"; + let PrintMethod = "printAlignedLabel"; + let ParserMatchClass = BranchTarget14Operand; + let OperandType = "OPERAND_PCREL"; +} + +// AsmOperand classes to emit (or not) special diagnostics +def TBZImm0_31Operand : AsmOperandClass { + let Name = "TBZImm0_31"; + let PredicateMethod = "isImmInRange<0,31>"; + let RenderMethod = "addImmOperands"; +} +def TBZImm32_63Operand : AsmOperandClass { + let Name = "Imm32_63"; + let PredicateMethod = "isImmInRange<32,63>"; + let DiagnosticType = "InvalidImm0_63"; + let RenderMethod = "addImmOperands"; +} + +class tbz_imm0_31 : Operand, ImmLeaf { + let ParserMatchClass = matcher; +} + +def tbz_imm0_31_diag : tbz_imm0_31; +def tbz_imm0_31_nodiag : tbz_imm0_31; + +def tbz_imm32_63 : Operand, ImmLeaf 31) && (((uint32_t)Imm) < 64); +}]> { + let ParserMatchClass = TBZImm32_63Operand; +} + +class BaseTestBranch + : I<(outs), (ins regtype:$Rt, immtype:$bit_off, am_tbrcond:$target), + asm, "\t$Rt, $bit_off, $target", "", + [(node regtype:$Rt, immtype:$bit_off, bb:$target)]>, + Sched<[WriteBr]> { + let isBranch = 1; + let isTerminator = 1; + + bits<5> Rt; + bits<6> bit_off; + bits<14> target; + + let Inst{30-25} = 0b011011; + let Inst{24} = op; + let Inst{23-19} = bit_off{4-0}; + let Inst{18-5} = target; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeTestAndBranch"; +} + +multiclass TestBranch { + def W : BaseTestBranch { + let Inst{31} = 0; + } + + def X : BaseTestBranch { + let Inst{31} = 1; + } + + // Alias X-reg with 0-31 imm to W-Reg. + def : InstAlias(NAME#"W") GPR32as64:$Rd, + tbz_imm0_31_nodiag:$imm, am_tbrcond:$target), 0>; + def : Pat<(node GPR64:$Rn, tbz_imm0_31_diag:$imm, bb:$target), + (!cast(NAME#"W") (EXTRACT_SUBREG GPR64:$Rn, sub_32), + tbz_imm0_31_diag:$imm, bb:$target)>; +} + +//--- +// Unconditional branch (immediate) instructions. +//--- +def am_b_target : Operand { + let EncoderMethod = "getBranchTargetOpValue"; + let PrintMethod = "printAlignedLabel"; + let ParserMatchClass = BranchTarget26Operand; + let OperandType = "OPERAND_PCREL"; +} +def am_bl_target : Operand { + let EncoderMethod = "getBranchTargetOpValue"; + let PrintMethod = "printAlignedLabel"; + let ParserMatchClass = BranchTarget26Operand; + let OperandType = "OPERAND_PCREL"; +} + +class BImm pattern> + : I<(outs), iops, asm, "\t$addr", "", pattern>, Sched<[WriteBr]> { + bits<26> addr; + let Inst{31} = op; + let Inst{30-26} = 0b00101; + let Inst{25-0} = addr; + + let DecoderMethod = "DecodeUnconditionalBranch"; +} + +class BranchImm pattern> + : BImm; +class CallImm pattern> + : BImm; + +//--- +// Basic one-operand data processing instructions. +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseOneOperandData opc, RegisterClass regtype, string asm, + SDPatternOperator node> + : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "", + [(set regtype:$Rd, (node regtype:$Rn))]>, + Sched<[WriteI, ReadI]> { + bits<5> Rd; + bits<5> Rn; + + let Inst{30-13} = 0b101101011000000000; + let Inst{12-10} = opc; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +multiclass OneOperandData opc, string asm, + SDPatternOperator node = null_frag> { + def Wr : BaseOneOperandData { + let Inst{31} = 0; + } + + def Xr : BaseOneOperandData { + let Inst{31} = 1; + } +} + +class OneWRegData opc, string asm, SDPatternOperator node> + : BaseOneOperandData { + let Inst{31} = 0; +} + +class OneXRegData opc, string asm, SDPatternOperator node> + : BaseOneOperandData { + let Inst{31} = 1; +} + +class SignAuthOneData opcode_prefix, bits<2> opcode, string asm> + : I<(outs GPR64:$Rd), (ins GPR64sp:$Rn), asm, "\t$Rd, $Rn", "", + []>, + Sched<[WriteI, ReadI]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-15} = 0b11011010110000010; + let Inst{14-12} = opcode_prefix; + let Inst{11-10} = opcode; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class SignAuthZero opcode_prefix, bits<2> opcode, string asm> + : I<(outs GPR64:$Rd), (ins), asm, "\t$Rd", "", []>, Sched<[]> { + bits<5> Rd; + let Inst{31-15} = 0b11011010110000010; + let Inst{14-12} = opcode_prefix; + let Inst{11-10} = opcode; + let Inst{9-5} = 0b11111; + let Inst{4-0} = Rd; +} + +class SignAuthTwoOperand opc, string asm, + SDPatternOperator OpNode> + : I<(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64sp:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", + [(set GPR64:$Rd, (OpNode GPR64:$Rn, GPR64sp:$Rm))]>, + Sched<[WriteI, ReadI, ReadI]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-21} = 0b10011010110; + let Inst{20-16} = Rm; + let Inst{15-14} = 0b00; + let Inst{13-10} = opc; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +// Base class for the Armv8.4-A 8 and 16-bit flag manipulation instructions +class BaseFlagManipulation + : I<(outs), iops, asm, ops, "", []>, + Sched<[WriteI, ReadI, ReadI]> { + let Uses = [NZCV]; + bits<5> Rn; + let Inst{31} = sf; + let Inst{30-15} = 0b0111010000000000; + let Inst{14} = sz; + let Inst{13-10} = 0b0010; + let Inst{9-5} = Rn; + let Inst{4-0} = 0b01101; +} + +class FlagRotate + : BaseFlagManipulation<0b1, 0b0, iops, asm, ops> { + bits<6> imm; + bits<4> mask; + let Inst{20-15} = imm; + let Inst{13-10} = 0b0001; + let Inst{4} = 0b0; + let Inst{3-0} = mask; +} + +//--- +// Basic two-operand data processing instructions. +//--- +class BaseBaseAddSubCarry pattern> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", pattern>, + Sched<[WriteI, ReadI, ReadI]> { + let Uses = [NZCV]; + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{30} = isSub; + let Inst{28-21} = 0b11010000; + let Inst{20-16} = Rm; + let Inst{15-10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class BaseAddSubCarry + : BaseBaseAddSubCarry; + +class BaseAddSubCarrySetFlags + : BaseBaseAddSubCarry { + let Defs = [NZCV]; +} + +multiclass AddSubCarry { + def Wr : BaseAddSubCarry { + let Inst{31} = 0; + let Inst{29} = 0; + } + def Xr : BaseAddSubCarry { + let Inst{31} = 1; + let Inst{29} = 0; + } + + // Sets flags. + def SWr : BaseAddSubCarrySetFlags { + let Inst{31} = 0; + let Inst{29} = 1; + } + def SXr : BaseAddSubCarrySetFlags { + let Inst{31} = 1; + let Inst{29} = 1; + } +} + +class BaseTwoOperand opc, RegisterClass regtype, string asm, + SDPatternOperator OpNode> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", + [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{30-21} = 0b0011010110; + let Inst{20-16} = Rm; + let Inst{15-14} = 0b00; + let Inst{13-10} = opc; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class BaseDiv + : BaseTwoOperand<{0,0,1,?}, regtype, asm, OpNode> { + let Inst{10} = isSigned; +} + +multiclass Div { + def Wr : BaseDiv, + Sched<[WriteID32, ReadID, ReadID]> { + let Inst{31} = 0; + } + def Xr : BaseDiv, + Sched<[WriteID64, ReadID, ReadID]> { + let Inst{31} = 1; + } +} + +class BaseShift shift_type, RegisterClass regtype, string asm, + SDPatternOperator OpNode = null_frag> + : BaseTwoOperand<{1,0,?,?}, regtype, asm, OpNode>, + Sched<[WriteIS, ReadI]> { + let Inst{11-10} = shift_type; +} + +multiclass Shift shift_type, string asm, SDNode OpNode> { + def Wr : BaseShift { + let Inst{31} = 0; + } + + def Xr : BaseShift { + let Inst{31} = 1; + } + + def : Pat<(i32 (OpNode GPR32:$Rn, i64:$Rm)), + (!cast(NAME # "Wr") GPR32:$Rn, + (EXTRACT_SUBREG i64:$Rm, sub_32))>; + + def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (zext GPR32:$Rm)))), + (!cast(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; + + def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (anyext GPR32:$Rm)))), + (!cast(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; + + def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (sext GPR32:$Rm)))), + (!cast(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; +} + +class ShiftAlias + : InstAlias; + +class BaseMulAccum opc, RegisterClass multype, + RegisterClass addtype, string asm, + list pattern> + : I<(outs addtype:$Rd), (ins multype:$Rn, multype:$Rm, addtype:$Ra), + asm, "\t$Rd, $Rn, $Rm, $Ra", "", pattern> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<5> Ra; + let Inst{30-24} = 0b0011011; + let Inst{23-21} = opc; + let Inst{20-16} = Rm; + let Inst{15} = isSub; + let Inst{14-10} = Ra; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass MulAccum { + // MADD/MSUB generation is decided by MachineCombiner.cpp + def Wrrr : BaseMulAccum, + Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> { + let Inst{31} = 0; + } + + def Xrrr : BaseMulAccum, + Sched<[WriteIM64, ReadIM, ReadIM, ReadIMA]> { + let Inst{31} = 1; + } +} + +class WideMulAccum opc, string asm, + SDNode AccNode, SDNode ExtNode> + : BaseMulAccum, + Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> { + let Inst{31} = 1; +} + +class MulHi opc, string asm, SDNode OpNode> + : I<(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", + [(set GPR64:$Rd, (OpNode GPR64:$Rn, GPR64:$Rm))]>, + Sched<[WriteIM64, ReadIM, ReadIM]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-24} = 0b10011011; + let Inst{23-21} = opc; + let Inst{20-16} = Rm; + let Inst{15} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + // The Ra field of SMULH and UMULH is unused: it should be assembled as 31 + // (i.e. all bits 1) but is ignored by the processor. + let PostEncoderMethod = "fixMulHigh"; +} + +class MulAccumWAlias + : InstAlias; +class MulAccumXAlias + : InstAlias; +class WideMulAccumAlias + : InstAlias; + +class BaseCRC32 sz, bit C, RegisterClass StreamReg, + SDPatternOperator OpNode, string asm> + : I<(outs GPR32:$Rd), (ins GPR32:$Rn, StreamReg:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", + [(set GPR32:$Rd, (OpNode GPR32:$Rn, StreamReg:$Rm))]>, + Sched<[WriteISReg, ReadI, ReadISReg]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + + let Inst{31} = sf; + let Inst{30-21} = 0b0011010110; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b010; + let Inst{12} = C; + let Inst{11-10} = sz; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + let Predicates = [HasCRC]; +} + +//--- +// Address generation. +//--- + +class ADRI pattern> + : I<(outs GPR64:$Xd), (ins adr:$label), asm, "\t$Xd, $label", "", + pattern>, + Sched<[WriteI]> { + bits<5> Xd; + bits<21> label; + let Inst{31} = page; + let Inst{30-29} = label{1-0}; + let Inst{28-24} = 0b10000; + let Inst{23-5} = label{20-2}; + let Inst{4-0} = Xd; + + let DecoderMethod = "DecodeAdrInstruction"; +} + +//--- +// Move immediate. +//--- + +def movimm32_imm : Operand { + let ParserMatchClass = Imm0_65535Operand; + let EncoderMethod = "getMoveWideImmOpValue"; + let PrintMethod = "printImm"; +} +def movimm32_shift : Operand { + let PrintMethod = "printShifter"; + let ParserMatchClass = MovImm32ShifterOperand; +} +def movimm64_shift : Operand { + let PrintMethod = "printShifter"; + let ParserMatchClass = MovImm64ShifterOperand; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseMoveImmediate opc, RegisterClass regtype, Operand shifter, + string asm> + : I<(outs regtype:$Rd), (ins movimm32_imm:$imm, shifter:$shift), + asm, "\t$Rd, $imm$shift", "", []>, + Sched<[WriteImm]> { + bits<5> Rd; + bits<16> imm; + bits<6> shift; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100101; + let Inst{22-21} = shift{5-4}; + let Inst{20-5} = imm; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeMoveImmInstruction"; +} + +multiclass MoveImmediate opc, string asm> { + def Wi : BaseMoveImmediate { + let Inst{31} = 0; + } + + def Xi : BaseMoveImmediate { + let Inst{31} = 1; + } +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseInsertImmediate opc, RegisterClass regtype, Operand shifter, + string asm> + : I<(outs regtype:$Rd), + (ins regtype:$src, movimm32_imm:$imm, shifter:$shift), + asm, "\t$Rd, $imm$shift", "$src = $Rd", []>, + Sched<[WriteI, ReadI]> { + bits<5> Rd; + bits<16> imm; + bits<6> shift; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100101; + let Inst{22-21} = shift{5-4}; + let Inst{20-5} = imm; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeMoveImmInstruction"; +} + +multiclass InsertImmediate opc, string asm> { + def Wi : BaseInsertImmediate { + let Inst{31} = 0; + } + + def Xi : BaseInsertImmediate { + let Inst{31} = 1; + } +} + +//--- +// Add/Subtract +//--- + +class BaseAddSubImm + : I<(outs dstRegtype:$Rd), (ins srcRegtype:$Rn, immtype:$imm), + asm, "\t$Rd, $Rn, $imm", "", + [(set dstRegtype:$Rd, (OpNode srcRegtype:$Rn, immtype:$imm))]>, + Sched<[WriteI, ReadI]> { + bits<5> Rd; + bits<5> Rn; + bits<14> imm; + let Inst{30} = isSub; + let Inst{29} = setFlags; + let Inst{28-24} = 0b10001; + let Inst{23-22} = imm{13-12}; // '00' => lsl #0, '01' => lsl #12 + let Inst{21-10} = imm{11-0}; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + let DecoderMethod = "DecodeBaseAddSubImm"; +} + +class BaseAddSubRegPseudo + : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), + [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>, + Sched<[WriteI, ReadI, ReadI]>; + +class BaseAddSubSReg + : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", + [(set regtype:$Rd, (OpNode regtype:$Rn, shifted_regtype:$Rm))]>, + Sched<[WriteISReg, ReadI, ReadISReg]> { + // The operands are in order to match the 'addr' MI operands, so we + // don't need an encoder method and by-name matching. Just use the default + // in-order handling. Since we're using by-order, make sure the names + // do not match. + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<8> shift; + let Inst{30} = isSub; + let Inst{29} = setFlags; + let Inst{28-24} = 0b01011; + let Inst{23-22} = shift{7-6}; + let Inst{21} = 0; + let Inst{20-16} = src2; + let Inst{15-10} = shift{5-0}; + let Inst{9-5} = src1; + let Inst{4-0} = dst; + + let DecoderMethod = "DecodeThreeAddrSRegInstruction"; +} + +class BaseAddSubEReg + : I<(outs dstRegtype:$R1), + (ins src1Regtype:$R2, src2Regtype:$R3), + asm, "\t$R1, $R2, $R3", "", + [(set dstRegtype:$R1, (OpNode src1Regtype:$R2, src2Regtype:$R3))]>, + Sched<[WriteIEReg, ReadI, ReadIEReg]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<6> ext; + let Inst{30} = isSub; + let Inst{29} = setFlags; + let Inst{28-24} = 0b01011; + let Inst{23-21} = 0b001; + let Inst{20-16} = Rm; + let Inst{15-13} = ext{5-3}; + let Inst{12-10} = ext{2-0}; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeAddSubERegInstruction"; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseAddSubEReg64 + : I<(outs dstRegtype:$Rd), + (ins src1Regtype:$Rn, src2Regtype:$Rm, ext_op:$ext), + asm, "\t$Rd, $Rn, $Rm$ext", "", []>, + Sched<[WriteIEReg, ReadI, ReadIEReg]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<6> ext; + let Inst{30} = isSub; + let Inst{29} = setFlags; + let Inst{28-24} = 0b01011; + let Inst{23-21} = 0b001; + let Inst{20-16} = Rm; + let Inst{15} = ext{5}; + let Inst{12-10} = ext{2-0}; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeAddSubERegInstruction"; +} + +// Aliases for register+register add/subtract. +class AddSubRegAlias + : InstAlias; + +multiclass AddSub { + let hasSideEffects = 0, isReMaterializable = 1, isAsCheapAsAMove = 1 in { + // Add/Subtract immediate + // Increase the weight of the immediate variant to try to match it before + // the extended register variant. + // We used to match the register variant before the immediate when the + // register argument could be implicitly zero-extended. + let AddedComplexity = 6 in + def Wri : BaseAddSubImm { + let Inst{31} = 0; + } + let AddedComplexity = 6 in + def Xri : BaseAddSubImm { + let Inst{31} = 1; + } + + // Add/Subtract register - Only used for CodeGen + def Wrr : BaseAddSubRegPseudo; + def Xrr : BaseAddSubRegPseudo; + + // Add/Subtract shifted register + def Wrs : BaseAddSubSReg { + let Inst{31} = 0; + } + def Xrs : BaseAddSubSReg { + let Inst{31} = 1; + } + } + + // Add/Subtract extended register + let AddedComplexity = 1, hasSideEffects = 0 in { + def Wrx : BaseAddSubEReg, mnemonic, OpNode> { + let Inst{31} = 0; + } + def Xrx : BaseAddSubEReg, mnemonic, OpNode> { + let Inst{31} = 1; + } + } + + def Xrx64 : BaseAddSubEReg64 { + // UXTX and SXTX only. + let Inst{14-13} = 0b11; + let Inst{31} = 1; + } + + // add Rd, Rb, -imm -> sub Rd, Rn, imm + def : InstSubst(NAME # "Wri") GPR32sp:$Rd, GPR32sp:$Rn, + addsub_shifted_imm32_neg:$imm), 0>; + def : InstSubst(NAME # "Xri") GPR64sp:$Rd, GPR64sp:$Rn, + addsub_shifted_imm64_neg:$imm), 0>; + + // Register/register aliases with no shift when SP is not used. + def : AddSubRegAlias(NAME#"Wrs"), + GPR32, GPR32, GPR32, 0>; + def : AddSubRegAlias(NAME#"Xrs"), + GPR64, GPR64, GPR64, 0>; + + // Register/register aliases with no shift when either the destination or + // first source register is SP. + def : AddSubRegAlias(NAME#"Wrx"), + GPR32sponly, GPR32sp, GPR32, 16>; // UXTW #0 + def : AddSubRegAlias(NAME#"Wrx"), + GPR32sp, GPR32sponly, GPR32, 16>; // UXTW #0 + def : AddSubRegAlias(NAME#"Xrx64"), + GPR64sponly, GPR64sp, GPR64, 24>; // UXTX #0 + def : AddSubRegAlias(NAME#"Xrx64"), + GPR64sp, GPR64sponly, GPR64, 24>; // UXTX #0 +} + +multiclass AddSubS { + let isCompare = 1, Defs = [NZCV] in { + // Add/Subtract immediate + def Wri : BaseAddSubImm { + let Inst{31} = 0; + } + def Xri : BaseAddSubImm { + let Inst{31} = 1; + } + + // Add/Subtract register + def Wrr : BaseAddSubRegPseudo; + def Xrr : BaseAddSubRegPseudo; + + // Add/Subtract shifted register + def Wrs : BaseAddSubSReg { + let Inst{31} = 0; + } + def Xrs : BaseAddSubSReg { + let Inst{31} = 1; + } + + // Add/Subtract extended register + let AddedComplexity = 1 in { + def Wrx : BaseAddSubEReg, mnemonic, OpNode> { + let Inst{31} = 0; + } + def Xrx : BaseAddSubEReg, mnemonic, OpNode> { + let Inst{31} = 1; + } + } + + def Xrx64 : BaseAddSubEReg64 { + // UXTX and SXTX only. + let Inst{14-13} = 0b11; + let Inst{31} = 1; + } + } // Defs = [NZCV] + + // Support negative immediates, e.g. adds Rd, Rn, -imm -> subs Rd, Rn, imm + def : InstSubst(NAME # "Wri") GPR32:$Rd, GPR32sp:$Rn, + addsub_shifted_imm32_neg:$imm), 0>; + def : InstSubst(NAME # "Xri") GPR64:$Rd, GPR64sp:$Rn, + addsub_shifted_imm64_neg:$imm), 0>; + + // Compare aliases + def : InstAlias(NAME#"Wri") + WZR, GPR32sp:$src, addsub_shifted_imm32:$imm), 5>; + def : InstAlias(NAME#"Xri") + XZR, GPR64sp:$src, addsub_shifted_imm64:$imm), 5>; + def : InstAlias(NAME#"Wrx") + WZR, GPR32sp:$src1, GPR32:$src2, arith_extend:$sh), 4>; + def : InstAlias(NAME#"Xrx") + XZR, GPR64sp:$src1, GPR32:$src2, arith_extend:$sh), 4>; + def : InstAlias(NAME#"Xrx64") + XZR, GPR64sp:$src1, GPR64:$src2, arith_extendlsl64:$sh), 4>; + def : InstAlias(NAME#"Wrs") + WZR, GPR32:$src1, GPR32:$src2, arith_shift32:$sh), 4>; + def : InstAlias(NAME#"Xrs") + XZR, GPR64:$src1, GPR64:$src2, arith_shift64:$sh), 4>; + + // Support negative immediates, e.g. cmp Rn, -imm -> cmn Rn, imm + def : InstSubst(NAME#"Wri") + WZR, GPR32sp:$src, addsub_shifted_imm32_neg:$imm), 0>; + def : InstSubst(NAME#"Xri") + XZR, GPR64sp:$src, addsub_shifted_imm64_neg:$imm), 0>; + + // Compare shorthands + def : InstAlias(NAME#"Wrs") + WZR, GPR32:$src1, GPR32:$src2, 0), 5>; + def : InstAlias(NAME#"Xrs") + XZR, GPR64:$src1, GPR64:$src2, 0), 5>; + def : InstAlias(NAME#"Wrx") + WZR, GPR32sponly:$src1, GPR32:$src2, 16), 5>; + def : InstAlias(NAME#"Xrx64") + XZR, GPR64sponly:$src1, GPR64:$src2, 24), 5>; + + // Register/register aliases with no shift when SP is not used. + def : AddSubRegAlias(NAME#"Wrs"), + GPR32, GPR32, GPR32, 0>; + def : AddSubRegAlias(NAME#"Xrs"), + GPR64, GPR64, GPR64, 0>; + + // Register/register aliases with no shift when the first source register + // is SP. + def : AddSubRegAlias(NAME#"Wrx"), + GPR32, GPR32sponly, GPR32, 16>; // UXTW #0 + def : AddSubRegAlias(NAME#"Xrx64"), + GPR64, GPR64sponly, GPR64, 24>; // UXTX #0 +} + +//--- +// Extract +//--- +def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisPtrTy<3>]>; +def AArch64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>; + +class BaseExtractImm patterns> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, imm_type:$imm), + asm, "\t$Rd, $Rn, $Rm, $imm", "", patterns>, + Sched<[WriteExtr, ReadExtrHi]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<6> imm; + + let Inst{30-23} = 0b00100111; + let Inst{21} = 0; + let Inst{20-16} = Rm; + let Inst{15-10} = imm; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass ExtractImm { + def Wrri : BaseExtractImm { + let Inst{31} = 0; + let Inst{22} = 0; + // imm<5> must be zero. + let imm{5} = 0; + } + def Xrri : BaseExtractImm { + + let Inst{31} = 1; + let Inst{22} = 1; + } +} + +//--- +// Bitfield +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseBitfieldImm opc, + RegisterClass regtype, Operand imm_type, string asm> + : I<(outs regtype:$Rd), (ins regtype:$Rn, imm_type:$immr, imm_type:$imms), + asm, "\t$Rd, $Rn, $immr, $imms", "", []>, + Sched<[WriteIS, ReadI]> { + bits<5> Rd; + bits<5> Rn; + bits<6> immr; + bits<6> imms; + + let Inst{30-29} = opc; + let Inst{28-23} = 0b100110; + let Inst{21-16} = immr; + let Inst{15-10} = imms; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass BitfieldImm opc, string asm> { + def Wri : BaseBitfieldImm { + let Inst{31} = 0; + let Inst{22} = 0; + // imms<5> and immr<5> must be zero, else ReservedValue(). + let Inst{21} = 0; + let Inst{15} = 0; + } + def Xri : BaseBitfieldImm { + let Inst{31} = 1; + let Inst{22} = 1; + } +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseBitfieldImmWith2RegArgs opc, + RegisterClass regtype, Operand imm_type, string asm> + : I<(outs regtype:$Rd), (ins regtype:$src, regtype:$Rn, imm_type:$immr, + imm_type:$imms), + asm, "\t$Rd, $Rn, $immr, $imms", "$src = $Rd", []>, + Sched<[WriteIS, ReadI]> { + bits<5> Rd; + bits<5> Rn; + bits<6> immr; + bits<6> imms; + + let Inst{30-29} = opc; + let Inst{28-23} = 0b100110; + let Inst{21-16} = immr; + let Inst{15-10} = imms; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass BitfieldImmWith2RegArgs opc, string asm> { + def Wri : BaseBitfieldImmWith2RegArgs { + let Inst{31} = 0; + let Inst{22} = 0; + // imms<5> and immr<5> must be zero, else ReservedValue(). + let Inst{21} = 0; + let Inst{15} = 0; + } + def Xri : BaseBitfieldImmWith2RegArgs { + let Inst{31} = 1; + let Inst{22} = 1; + } +} + +//--- +// Logical +//--- + +// Logical (immediate) +class BaseLogicalImm opc, RegisterClass dregtype, + RegisterClass sregtype, Operand imm_type, string asm, + list pattern> + : I<(outs dregtype:$Rd), (ins sregtype:$Rn, imm_type:$imm), + asm, "\t$Rd, $Rn, $imm", "", pattern>, + Sched<[WriteI, ReadI]> { + bits<5> Rd; + bits<5> Rn; + bits<13> imm; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100100; + let Inst{22} = imm{12}; + let Inst{21-16} = imm{11-6}; + let Inst{15-10} = imm{5-0}; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeLogicalImmInstruction"; +} + +// Logical (shifted register) +class BaseLogicalSReg opc, bit N, RegisterClass regtype, + logical_shifted_reg shifted_regtype, string asm, + list pattern> + : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", pattern>, + Sched<[WriteISReg, ReadI, ReadISReg]> { + // The operands are in order to match the 'addr' MI operands, so we + // don't need an encoder method and by-name matching. Just use the default + // in-order handling. Since we're using by-order, make sure the names + // do not match. + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<8> shift; + let Inst{30-29} = opc; + let Inst{28-24} = 0b01010; + let Inst{23-22} = shift{7-6}; + let Inst{21} = N; + let Inst{20-16} = src2; + let Inst{15-10} = shift{5-0}; + let Inst{9-5} = src1; + let Inst{4-0} = dst; + + let DecoderMethod = "DecodeThreeAddrSRegInstruction"; +} + +// Aliases for register+register logical instructions. +class LogicalRegAlias + : InstAlias; + +multiclass LogicalImm opc, string mnemonic, SDNode OpNode, + string Alias> { + let AddedComplexity = 6, isReMaterializable = 1, isAsCheapAsAMove = 1 in + def Wri : BaseLogicalImm { + let Inst{31} = 0; + let Inst{22} = 0; // 64-bit version has an additional bit of immediate. + } + let AddedComplexity = 6, isReMaterializable = 1, isAsCheapAsAMove = 1 in + def Xri : BaseLogicalImm { + let Inst{31} = 1; + } + + def : InstSubst(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn, + logical_imm32_not:$imm), 0>; + def : InstSubst(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn, + logical_imm64_not:$imm), 0>; +} + +multiclass LogicalImmS opc, string mnemonic, SDNode OpNode, + string Alias> { + let isCompare = 1, Defs = [NZCV] in { + def Wri : BaseLogicalImm { + let Inst{31} = 0; + let Inst{22} = 0; // 64-bit version has an additional bit of immediate. + } + def Xri : BaseLogicalImm { + let Inst{31} = 1; + } + } // end Defs = [NZCV] + + def : InstSubst(NAME # "Wri") GPR32:$Rd, GPR32:$Rn, + logical_imm32_not:$imm), 0>; + def : InstSubst(NAME # "Xri") GPR64:$Rd, GPR64:$Rn, + logical_imm64_not:$imm), 0>; +} + +class BaseLogicalRegPseudo + : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), + [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>, + Sched<[WriteI, ReadI, ReadI]>; + +// Split from LogicalImm as not all instructions have both. +multiclass LogicalReg opc, bit N, string mnemonic, + SDPatternOperator OpNode> { + let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + def Wrr : BaseLogicalRegPseudo; + def Xrr : BaseLogicalRegPseudo; + } + + def Wrs : BaseLogicalSReg { + let Inst{31} = 0; + } + def Xrs : BaseLogicalSReg { + let Inst{31} = 1; + } + + def : LogicalRegAlias(NAME#"Wrs"), GPR32>; + def : LogicalRegAlias(NAME#"Xrs"), GPR64>; +} + +// Split from LogicalReg to allow setting NZCV Defs +multiclass LogicalRegS opc, bit N, string mnemonic, + SDPatternOperator OpNode = null_frag> { + let Defs = [NZCV], mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def Wrr : BaseLogicalRegPseudo; + def Xrr : BaseLogicalRegPseudo; + + def Wrs : BaseLogicalSReg { + let Inst{31} = 0; + } + def Xrs : BaseLogicalSReg { + let Inst{31} = 1; + } + } // Defs = [NZCV] + + def : LogicalRegAlias(NAME#"Wrs"), GPR32>; + def : LogicalRegAlias(NAME#"Xrs"), GPR64>; +} + +//--- +// Conditionally set flags +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseCondComparisonImm + : I<(outs), (ins regtype:$Rn, immtype:$imm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $imm, $nzcv, $cond", "", + [(set NZCV, (OpNode regtype:$Rn, immtype:$imm, (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]>, + Sched<[WriteI, ReadI]> { + let Uses = [NZCV]; + let Defs = [NZCV]; + + bits<5> Rn; + bits<5> imm; + bits<4> nzcv; + bits<4> cond; + + let Inst{30} = op; + let Inst{29-21} = 0b111010010; + let Inst{20-16} = imm; + let Inst{15-12} = cond; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = nzcv; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseCondComparisonReg + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", + [(set NZCV, (OpNode regtype:$Rn, regtype:$Rm, (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]>, + Sched<[WriteI, ReadI, ReadI]> { + let Uses = [NZCV]; + let Defs = [NZCV]; + + bits<5> Rn; + bits<5> Rm; + bits<4> nzcv; + bits<4> cond; + + let Inst{30} = op; + let Inst{29-21} = 0b111010010; + let Inst{20-16} = Rm; + let Inst{15-12} = cond; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = nzcv; +} + +multiclass CondComparison { + // immediate operand variants + def Wi : BaseCondComparisonImm { + let Inst{31} = 0; + } + def Xi : BaseCondComparisonImm { + let Inst{31} = 1; + } + // register operand variants + def Wr : BaseCondComparisonReg { + let Inst{31} = 0; + } + def Xr : BaseCondComparisonReg { + let Inst{31} = 1; + } +} + +//--- +// Conditional select +//--- + +class BaseCondSelect op2, RegisterClass regtype, string asm> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), + asm, "\t$Rd, $Rn, $Rm, $cond", "", + [(set regtype:$Rd, + (AArch64csel regtype:$Rn, regtype:$Rm, (i32 imm:$cond), NZCV))]>, + Sched<[WriteI, ReadI, ReadI]> { + let Uses = [NZCV]; + + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<4> cond; + + let Inst{30} = op; + let Inst{29-21} = 0b011010100; + let Inst{20-16} = Rm; + let Inst{15-12} = cond; + let Inst{11-10} = op2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass CondSelect op2, string asm> { + def Wr : BaseCondSelect { + let Inst{31} = 0; + } + def Xr : BaseCondSelect { + let Inst{31} = 1; + } +} + +class BaseCondSelectOp op2, RegisterClass regtype, string asm, + PatFrag frag> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), + asm, "\t$Rd, $Rn, $Rm, $cond", "", + [(set regtype:$Rd, + (AArch64csel regtype:$Rn, (frag regtype:$Rm), + (i32 imm:$cond), NZCV))]>, + Sched<[WriteI, ReadI, ReadI]> { + let Uses = [NZCV]; + + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<4> cond; + + let Inst{30} = op; + let Inst{29-21} = 0b011010100; + let Inst{20-16} = Rm; + let Inst{15-12} = cond; + let Inst{11-10} = op2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +def inv_cond_XFORM : SDNodeXForm(N->getZExtValue()); + return CurDAG->getTargetConstant(AArch64CC::getInvertedCondCode(CC), SDLoc(N), + MVT::i32); +}]>; + +multiclass CondSelectOp op2, string asm, PatFrag frag> { + def Wr : BaseCondSelectOp { + let Inst{31} = 0; + } + def Xr : BaseCondSelectOp { + let Inst{31} = 1; + } + + def : Pat<(AArch64csel (frag GPR32:$Rm), GPR32:$Rn, (i32 imm:$cond), NZCV), + (!cast(NAME # Wr) GPR32:$Rn, GPR32:$Rm, + (inv_cond_XFORM imm:$cond))>; + + def : Pat<(AArch64csel (frag GPR64:$Rm), GPR64:$Rn, (i32 imm:$cond), NZCV), + (!cast(NAME # Xr) GPR64:$Rn, GPR64:$Rm, + (inv_cond_XFORM imm:$cond))>; +} + +//--- +// Special Mask Value +//--- +def maski8_or_more : Operand, + ImmLeaf { +} +def maski16_or_more : Operand, + ImmLeaf { +} + + +//--- +// Load/store +//--- + +// (unsigned immediate) +// Indexed for 8-bit registers. offset is in range [0,4095]. +def am_indexed8 : ComplexPattern; +def am_indexed16 : ComplexPattern; +def am_indexed32 : ComplexPattern; +def am_indexed64 : ComplexPattern; +def am_indexed128 : ComplexPattern; + +def gi_am_indexed8 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed16 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed32 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed64 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed128 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; + +class UImm12OffsetOperand : AsmOperandClass { + let Name = "UImm12Offset" # Scale; + let RenderMethod = "addUImm12OffsetOperands<" # Scale # ">"; + let PredicateMethod = "isUImm12Offset<" # Scale # ">"; + let DiagnosticType = "InvalidMemoryIndexed" # Scale; +} + +def UImm12OffsetScale1Operand : UImm12OffsetOperand<1>; +def UImm12OffsetScale2Operand : UImm12OffsetOperand<2>; +def UImm12OffsetScale4Operand : UImm12OffsetOperand<4>; +def UImm12OffsetScale8Operand : UImm12OffsetOperand<8>; +def UImm12OffsetScale16Operand : UImm12OffsetOperand<16>; + +class uimm12_scaled : Operand { + let ParserMatchClass + = !cast("UImm12OffsetScale" # Scale # "Operand"); + let EncoderMethod + = "getLdStUImm12OpValue"; + let PrintMethod = "printUImm12Offset<" # Scale # ">"; +} + +def uimm12s1 : uimm12_scaled<1>; +def uimm12s2 : uimm12_scaled<2>; +def uimm12s4 : uimm12_scaled<4>; +def uimm12s8 : uimm12_scaled<8>; +def uimm12s16 : uimm12_scaled<16>; + +class BaseLoadStoreUI sz, bit V, bits<2> opc, dag oops, dag iops, + string asm, list pattern> + : I { + bits<5> Rt; + + bits<5> Rn; + bits<12> offset; + + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b01; + let Inst{23-22} = opc; + let Inst{21-10} = offset; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeUnsignedLdStInstruction"; +} + +multiclass LoadUI sz, bit V, bits<2> opc, RegisterOperand regtype, + Operand indextype, string asm, list pattern> { + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def ui : BaseLoadStoreUI, + Sched<[WriteLD]>; + + def : InstAlias(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass StoreUI sz, bit V, bits<2> opc, RegisterOperand regtype, + Operand indextype, string asm, list pattern> { + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def ui : BaseLoadStoreUI, + Sched<[WriteST]>; + + def : InstAlias(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +// Same as StoreUI, but take a RegisterOperand. This is used by GlobalISel to +// substitute zero-registers automatically. +// +// TODO: Roll out zero-register subtitution to GPR32/GPR64 and fold this back +// into StoreUI. +multiclass StoreUIz sz, bit V, bits<2> opc, RegisterOperand regtype, + Operand indextype, string asm, list pattern> { + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def ui : BaseLoadStoreUI, + Sched<[WriteST]>; + + def : InstAlias(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +def PrefetchOperand : AsmOperandClass { + let Name = "Prefetch"; + let ParserMethod = "tryParsePrefetch"; +} +def prfop : Operand { + let PrintMethod = "printPrefetchOp"; + let ParserMatchClass = PrefetchOperand; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in +class PrefetchUI sz, bit V, bits<2> opc, string asm, list pat> + : BaseLoadStoreUI, + Sched<[WriteLD]>; + +//--- +// Load literal +//--- + +// Load literal address: 19-bit immediate. The low two bits of the target +// offset are implied zero and so are not part of the immediate. +def am_ldrlit : Operand { + let EncoderMethod = "getLoadLiteralOpValue"; + let DecoderMethod = "DecodePCRelLabel19"; + let PrintMethod = "printAlignedLabel"; + let ParserMatchClass = PCRelLabel19Operand; + let OperandType = "OPERAND_PCREL"; +} + +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +class LoadLiteral opc, bit V, RegisterOperand regtype, string asm> + : I<(outs regtype:$Rt), (ins am_ldrlit:$label), + asm, "\t$Rt, $label", "", []>, + Sched<[WriteLD]> { + bits<5> Rt; + bits<19> label; + let Inst{31-30} = opc; + let Inst{29-27} = 0b011; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-5} = label; + let Inst{4-0} = Rt; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in +class PrefetchLiteral opc, bit V, string asm, list pat> + : I<(outs), (ins prfop:$Rt, am_ldrlit:$label), + asm, "\t$Rt, $label", "", pat>, + Sched<[WriteLD]> { + bits<5> Rt; + bits<19> label; + let Inst{31-30} = opc; + let Inst{29-27} = 0b011; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-5} = label; + let Inst{4-0} = Rt; +} + +//--- +// Load/store register offset +//--- + +def ro_Xindexed8 : ComplexPattern", []>; +def ro_Xindexed16 : ComplexPattern", []>; +def ro_Xindexed32 : ComplexPattern", []>; +def ro_Xindexed64 : ComplexPattern", []>; +def ro_Xindexed128 : ComplexPattern", []>; + +def ro_Windexed8 : ComplexPattern", []>; +def ro_Windexed16 : ComplexPattern", []>; +def ro_Windexed32 : ComplexPattern", []>; +def ro_Windexed64 : ComplexPattern", []>; +def ro_Windexed128 : ComplexPattern", []>; + +class MemExtendOperand : AsmOperandClass { + let Name = "Mem" # Reg # "Extend" # Width; + let PredicateMethod = "isMem" # Reg # "Extend<" # Width # ">"; + let RenderMethod = "addMemExtendOperands"; + let DiagnosticType = "InvalidMemory" # Reg # "Extend" # Width; +} + +def MemWExtend8Operand : MemExtendOperand<"W", 8> { + // The address "[x0, x1, lsl #0]" actually maps to the variant which performs + // the trivial shift. + let RenderMethod = "addMemExtend8Operands"; +} +def MemWExtend16Operand : MemExtendOperand<"W", 16>; +def MemWExtend32Operand : MemExtendOperand<"W", 32>; +def MemWExtend64Operand : MemExtendOperand<"W", 64>; +def MemWExtend128Operand : MemExtendOperand<"W", 128>; + +def MemXExtend8Operand : MemExtendOperand<"X", 8> { + // The address "[x0, x1, lsl #0]" actually maps to the variant which performs + // the trivial shift. + let RenderMethod = "addMemExtend8Operands"; +} +def MemXExtend16Operand : MemExtendOperand<"X", 16>; +def MemXExtend32Operand : MemExtendOperand<"X", 32>; +def MemXExtend64Operand : MemExtendOperand<"X", 64>; +def MemXExtend128Operand : MemExtendOperand<"X", 128>; + +class ro_extend + : Operand { + let ParserMatchClass = ParserClass; + let PrintMethod = "printMemExtend<'" # Reg # "', " # Width # ">"; + let DecoderMethod = "DecodeMemExtend"; + let EncoderMethod = "getMemExtendOpValue"; + let MIOperandInfo = (ops i32imm:$signed, i32imm:$doshift); +} + +def ro_Wextend8 : ro_extend; +def ro_Wextend16 : ro_extend; +def ro_Wextend32 : ro_extend; +def ro_Wextend64 : ro_extend; +def ro_Wextend128 : ro_extend; + +def ro_Xextend8 : ro_extend; +def ro_Xextend16 : ro_extend; +def ro_Xextend32 : ro_extend; +def ro_Xextend64 : ro_extend; +def ro_Xextend128 : ro_extend; + +class ROAddrMode { + // CodeGen-level pattern covering the entire addressing mode. + ComplexPattern Wpat = windex; + ComplexPattern Xpat = xindex; + + // Asm-level Operand covering the valid "uxtw #3" style syntax. + Operand Wext = wextend; + Operand Xext = xextend; +} + +def ro8 : ROAddrMode; +def ro16 : ROAddrMode; +def ro32 : ROAddrMode; +def ro64 : ROAddrMode; +def ro128 : ROAddrMode; + +class LoadStore8RO sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, dag ins, dag outs, list pat> + : I { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +class ROInstAlias + : InstAlias; + +multiclass Load8RO sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10 in + def roW : LoadStore8RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore8RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +multiclass Store8RO sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10 in + def roW : LoadStore8RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore8RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +class LoadStore16RO sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, dag ins, dag outs, list pat> + : I { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass Load16RO sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10 in + def roW : LoadStore16RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore16RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +multiclass Store16RO sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10 in + def roW : LoadStore16RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore16RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +class LoadStore32RO sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, dag ins, dag outs, list pat> + : I { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass Load32RO sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10 in + def roW : LoadStore32RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore32RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +multiclass Store32RO sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10 in + def roW : LoadStore32RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore32RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +class LoadStore64RO sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, dag ins, dag outs, list pat> + : I { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass Load64RO sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def roW : LoadStore64RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def roX : LoadStore64RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +multiclass Store64RO sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def roW : LoadStore64RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def roX : LoadStore64RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +class LoadStore128RO sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, dag ins, dag outs, list pat> + : I { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass Load128RO sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def roW : LoadStore128RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def roX : LoadStore128RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +multiclass Store128RO sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def roW : LoadStore128RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def roX : LoadStore128RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in +class BasePrefetchRO sz, bit V, bits<2> opc, dag outs, dag ins, + string asm, list pat> + : I, + Sched<[WriteLD]> { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass PrefetchRO sz, bit V, bits<2> opc, string asm> { + def roW : BasePrefetchRO { + let Inst{13} = 0b0; + } + + def roX : BasePrefetchRO { + let Inst{13} = 0b1; + } + + def : InstAlias<"prfm $Rt, [$Rn, $Rm]", + (!cast(NAME # "roX") prfop:$Rt, + GPR64sp:$Rn, GPR64:$Rm, 0, 0)>; +} + +//--- +// Load/store unscaled immediate +//--- + +def am_unscaled8 : ComplexPattern; +def am_unscaled16 : ComplexPattern; +def am_unscaled32 : ComplexPattern; +def am_unscaled64 : ComplexPattern; +def am_unscaled128 :ComplexPattern; + +def gi_am_unscaled8 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled16 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled32 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled128 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + + +class BaseLoadStoreUnscale sz, bit V, bits<2> opc, dag oops, dag iops, + string asm, list pattern> + : I { + bits<5> Rt; + bits<5> Rn; + bits<9> offset; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0; + let Inst{20-12} = offset; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeSignedLdStInstruction"; +} + +// Armv8.4 LDAPR & STLR with Immediate Offset instruction +multiclass BaseLoadUnscaleV84 sz, bits<2> opc, + RegisterOperand regtype > { + def i : BaseLoadStoreUnscale, + Sched<[WriteST]> { + let Inst{29} = 0; + let Inst{24} = 1; + } + def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass BaseStoreUnscaleV84 sz, bits<2> opc, + RegisterOperand regtype > { + def i : BaseLoadStoreUnscale, + Sched<[WriteST]> { + let Inst{29} = 0; + let Inst{24} = 1; + } + def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass LoadUnscaled sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, list pattern> { + let AddedComplexity = 1 in // try this before LoadUI + def i : BaseLoadStoreUnscale, + Sched<[WriteLD]>; + + def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass StoreUnscaled sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, list pattern> { + let AddedComplexity = 1 in // try this before StoreUI + def i : BaseLoadStoreUnscale, + Sched<[WriteST]>; + + def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass PrefetchUnscaled sz, bit V, bits<2> opc, string asm, + list pat> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in + def i : BaseLoadStoreUnscale, + Sched<[WriteLD]>; + + def : InstAlias(NAME # "i") prfop:$Rt, GPR64sp:$Rn, 0)>; +} + +//--- +// Load/store unscaled immediate, unprivileged +//--- + +class BaseLoadStoreUnprivileged sz, bit V, bits<2> opc, + dag oops, dag iops, string asm> + : I { + bits<5> Rt; + bits<5> Rn; + bits<9> offset; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0; + let Inst{20-12} = offset; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeSignedLdStInstruction"; +} + +multiclass LoadUnprivileged sz, bit V, bits<2> opc, + RegisterClass regtype, string asm> { + let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in + def i : BaseLoadStoreUnprivileged, + Sched<[WriteLD]>; + + def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass StoreUnprivileged sz, bit V, bits<2> opc, + RegisterClass regtype, string asm> { + let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in + def i : BaseLoadStoreUnprivileged, + Sched<[WriteST]>; + + def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +//--- +// Load/store pre-indexed +//--- + +class BaseLoadStorePreIdx sz, bit V, bits<2> opc, dag oops, dag iops, + string asm, string cstr, list pat> + : I { + bits<5> Rt; + bits<5> Rn; + bits<9> offset; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0; + let Inst{23-22} = opc; + let Inst{21} = 0; + let Inst{20-12} = offset; + let Inst{11-10} = 0b11; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeSignedLdStInstruction"; +} + +let hasSideEffects = 0 in { +let mayStore = 0, mayLoad = 1 in +class LoadPreIdx sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm> + : BaseLoadStorePreIdx, + Sched<[WriteLD, WriteAdr]>; + +let mayStore = 1, mayLoad = 0 in +class StorePreIdx sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, SDPatternOperator storeop, ValueType Ty> + : BaseLoadStorePreIdx, + Sched<[WriteAdr, WriteST]>; +} // hasSideEffects = 0 + +//--- +// Load/store post-indexed +//--- + +class BaseLoadStorePostIdx sz, bit V, bits<2> opc, dag oops, dag iops, + string asm, string cstr, list pat> + : I { + bits<5> Rt; + bits<5> Rn; + bits<9> offset; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = offset; + let Inst{11-10} = 0b01; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeSignedLdStInstruction"; +} + +let hasSideEffects = 0 in { +let mayStore = 0, mayLoad = 1 in +class LoadPostIdx sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm> + : BaseLoadStorePostIdx, + Sched<[WriteLD, WriteAdr]>; + +let mayStore = 1, mayLoad = 0 in +class StorePostIdx sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, SDPatternOperator storeop, ValueType Ty> + : BaseLoadStorePostIdx, + Sched<[WriteAdr, WriteST]>; +} // hasSideEffects = 0 + + +//--- +// Load/store pair +//--- + +// (indexed, offset) + +class BaseLoadStorePairOffset opc, bit V, bit L, dag oops, dag iops, + string asm> + : I { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + bits<7> offset; + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = V; + let Inst{25-23} = 0b010; + let Inst{22} = L; + let Inst{21-15} = offset; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodePairLdStInstruction"; +} + +multiclass LoadPairOffset opc, bit V, RegisterOperand regtype, + Operand indextype, string asm> { + let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in + def i : BaseLoadStorePairOffset, + Sched<[WriteLD, WriteLDHi]>; + + def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, 0)>; +} + + +multiclass StorePairOffset opc, bit V, RegisterOperand regtype, + Operand indextype, string asm> { + let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in + def i : BaseLoadStorePairOffset, + Sched<[WriteSTP]>; + + def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, 0)>; +} + +// (pre-indexed) +class BaseLoadStorePairPreIdx opc, bit V, bit L, dag oops, dag iops, + string asm> + : I { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + bits<7> offset; + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = V; + let Inst{25-23} = 0b011; + let Inst{22} = L; + let Inst{21-15} = offset; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodePairLdStInstruction"; +} + +let hasSideEffects = 0 in { +let mayStore = 0, mayLoad = 1 in +class LoadPairPreIdx opc, bit V, RegisterOperand regtype, + Operand indextype, string asm> + : BaseLoadStorePairPreIdx, + Sched<[WriteLD, WriteLDHi, WriteAdr]>; + +let mayStore = 1, mayLoad = 0 in +class StorePairPreIdx opc, bit V, RegisterOperand regtype, + Operand indextype, string asm> + : BaseLoadStorePairPreIdx, + Sched<[WriteAdr, WriteSTP]>; +} // hasSideEffects = 0 + +// (post-indexed) + +class BaseLoadStorePairPostIdx opc, bit V, bit L, dag oops, dag iops, + string asm> + : I { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + bits<7> offset; + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = V; + let Inst{25-23} = 0b001; + let Inst{22} = L; + let Inst{21-15} = offset; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodePairLdStInstruction"; +} + +let hasSideEffects = 0 in { +let mayStore = 0, mayLoad = 1 in +class LoadPairPostIdx opc, bit V, RegisterOperand regtype, + Operand idxtype, string asm> + : BaseLoadStorePairPostIdx, + Sched<[WriteLD, WriteLDHi, WriteAdr]>; + +let mayStore = 1, mayLoad = 0 in +class StorePairPostIdx opc, bit V, RegisterOperand regtype, + Operand idxtype, string asm> + : BaseLoadStorePairPostIdx, + Sched<[WriteAdr, WriteSTP]>; +} // hasSideEffects = 0 + +// (no-allocate) + +class BaseLoadStorePairNoAlloc opc, bit V, bit L, dag oops, dag iops, + string asm> + : I { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + bits<7> offset; + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = V; + let Inst{25-23} = 0b000; + let Inst{22} = L; + let Inst{21-15} = offset; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodePairLdStInstruction"; +} + +multiclass LoadPairNoAlloc opc, bit V, RegisterClass regtype, + Operand indextype, string asm> { + let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in + def i : BaseLoadStorePairNoAlloc, + Sched<[WriteLD, WriteLDHi]>; + + + def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, 0)>; +} + +multiclass StorePairNoAlloc opc, bit V, RegisterClass regtype, + Operand indextype, string asm> { + let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in + def i : BaseLoadStorePairNoAlloc, + Sched<[WriteSTP]>; + + def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, 0)>; +} + +//--- +// Load/store exclusive +//--- + +// True exclusive operations write to and/or read from the system's exclusive +// monitors, which as far as a compiler is concerned can be modelled as a +// random shared memory address. Hence LoadExclusive mayStore. +// +// Since these instructions have the undefined register bits set to 1 in +// their canonical form, we need a post encoder method to set those bits +// to 1 when encoding these instructions. We do this using the +// fixLoadStoreExclusive function. This function has template parameters: +// +// fixLoadStoreExclusive +// +// hasRs indicates that the instruction uses the Rs field, so we won't set +// it to 1 (and the same for Rt2). We don't need template parameters for +// the other register fields since Rt and Rn are always used. +// +let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in +class BaseLoadStoreExclusive sz, bit o2, bit L, bit o1, bit o0, + dag oops, dag iops, string asm, string operands> + : I { + let Inst{31-30} = sz; + let Inst{29-24} = 0b001000; + let Inst{23} = o2; + let Inst{22} = L; + let Inst{21} = o1; + let Inst{15} = o0; + + let DecoderMethod = "DecodeExclusiveLdStInstruction"; +} + +// Neither Rs nor Rt2 operands. +class LoadStoreExclusiveSimple sz, bit o2, bit L, bit o1, bit o0, + dag oops, dag iops, string asm, string operands> + : BaseLoadStoreExclusive { + bits<5> Rt; + bits<5> Rn; + let Inst{20-16} = 0b11111; + let Unpredictable{20-16} = 0b11111; + let Inst{14-10} = 0b11111; + let Unpredictable{14-10} = 0b11111; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; +} + +// Simple load acquires don't set the exclusive monitor +let mayLoad = 1, mayStore = 0 in +class LoadAcquire sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : LoadStoreExclusiveSimple, + Sched<[WriteLD]>; + +class LoadExclusive sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : LoadStoreExclusiveSimple, + Sched<[WriteLD]>; + +class LoadExclusivePair sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : BaseLoadStoreExclusive, + Sched<[WriteLD, WriteLDHi]> { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let PostEncoderMethod = "fixLoadStoreExclusive<0,1>"; +} + +// Simple store release operations do not check the exclusive monitor. +let mayLoad = 0, mayStore = 1 in +class StoreRelease sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : LoadStoreExclusiveSimple, + Sched<[WriteST]>; + +let mayLoad = 1, mayStore = 1 in +class StoreExclusive sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : BaseLoadStoreExclusive, + Sched<[WriteSTX]> { + bits<5> Ws; + bits<5> Rt; + bits<5> Rn; + let Inst{20-16} = Ws; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let Constraints = "@earlyclobber $Ws"; + let PostEncoderMethod = "fixLoadStoreExclusive<1,0>"; +} + +class StoreExclusivePair sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : BaseLoadStoreExclusive, + Sched<[WriteSTX]> { + bits<5> Ws; + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + let Inst{20-16} = Ws; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let Constraints = "@earlyclobber $Ws"; +} + +//--- +// Exception generation +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in +class ExceptionGeneration op1, bits<2> ll, string asm> + : I<(outs), (ins imm0_65535:$imm), asm, "\t$imm", "", []>, + Sched<[WriteSys]> { + bits<16> imm; + let Inst{31-24} = 0b11010100; + let Inst{23-21} = op1; + let Inst{20-5} = imm; + let Inst{4-2} = 0b000; + let Inst{1-0} = ll; +} + +let Predicates = [HasFPARMv8] in { + +//--- +// Floating point to integer conversion +//--- + +class BaseFPToIntegerUnscaled type, bits<2> rmode, bits<3> opcode, + RegisterClass srcType, RegisterClass dstType, + string asm, list pattern> + : I<(outs dstType:$Rd), (ins srcType:$Rn), + asm, "\t$Rd, $Rn", "", pattern>, + Sched<[WriteFCvt]> { + bits<5> Rd; + bits<5> Rn; + let Inst{30-29} = 0b00; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 1; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseFPToInteger type, bits<2> rmode, bits<3> opcode, + RegisterClass srcType, RegisterClass dstType, + Operand immType, string asm, list pattern> + : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale), + asm, "\t$Rd, $Rn, $scale", "", pattern>, + Sched<[WriteFCvt]> { + bits<5> Rd; + bits<5> Rn; + bits<6> scale; + let Inst{30-29} = 0b00; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = scale; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass FPToIntegerUnscaled rmode, bits<3> opcode, string asm, + SDPatternOperator OpN> { + // Unscaled half-precision to 32-bit + def UWHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR32, asm, + [(set GPR32:$Rd, (OpN FPR16:$Rn))]> { + let Inst{31} = 0; // 32-bit GPR flag + let Predicates = [HasFullFP16]; + } + + // Unscaled half-precision to 64-bit + def UXHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR64, asm, + [(set GPR64:$Rd, (OpN FPR16:$Rn))]> { + let Inst{31} = 1; // 64-bit GPR flag + let Predicates = [HasFullFP16]; + } + + // Unscaled single-precision to 32-bit + def UWSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR32, asm, + [(set GPR32:$Rd, (OpN FPR32:$Rn))]> { + let Inst{31} = 0; // 32-bit GPR flag + } + + // Unscaled single-precision to 64-bit + def UXSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR64, asm, + [(set GPR64:$Rd, (OpN FPR32:$Rn))]> { + let Inst{31} = 1; // 64-bit GPR flag + } + + // Unscaled double-precision to 32-bit + def UWDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR32, asm, + [(set GPR32:$Rd, (OpN (f64 FPR64:$Rn)))]> { + let Inst{31} = 0; // 32-bit GPR flag + } + + // Unscaled double-precision to 64-bit + def UXDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR64, asm, + [(set GPR64:$Rd, (OpN (f64 FPR64:$Rn)))]> { + let Inst{31} = 1; // 64-bit GPR flag + } +} + +multiclass FPToIntegerScaled rmode, bits<3> opcode, string asm, + SDPatternOperator OpN> { + // Scaled half-precision to 32-bit + def SWHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR32, + fixedpoint_f16_i32, asm, + [(set GPR32:$Rd, (OpN (fmul FPR16:$Rn, + fixedpoint_f16_i32:$scale)))]> { + let Inst{31} = 0; // 32-bit GPR flag + let scale{5} = 1; + let Predicates = [HasFullFP16]; + } + + // Scaled half-precision to 64-bit + def SXHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR64, + fixedpoint_f16_i64, asm, + [(set GPR64:$Rd, (OpN (fmul FPR16:$Rn, + fixedpoint_f16_i64:$scale)))]> { + let Inst{31} = 1; // 64-bit GPR flag + let Predicates = [HasFullFP16]; + } + + // Scaled single-precision to 32-bit + def SWSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR32, + fixedpoint_f32_i32, asm, + [(set GPR32:$Rd, (OpN (fmul FPR32:$Rn, + fixedpoint_f32_i32:$scale)))]> { + let Inst{31} = 0; // 32-bit GPR flag + let scale{5} = 1; + } + + // Scaled single-precision to 64-bit + def SXSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR64, + fixedpoint_f32_i64, asm, + [(set GPR64:$Rd, (OpN (fmul FPR32:$Rn, + fixedpoint_f32_i64:$scale)))]> { + let Inst{31} = 1; // 64-bit GPR flag + } + + // Scaled double-precision to 32-bit + def SWDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR32, + fixedpoint_f64_i32, asm, + [(set GPR32:$Rd, (OpN (fmul FPR64:$Rn, + fixedpoint_f64_i32:$scale)))]> { + let Inst{31} = 0; // 32-bit GPR flag + let scale{5} = 1; + } + + // Scaled double-precision to 64-bit + def SXDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR64, + fixedpoint_f64_i64, asm, + [(set GPR64:$Rd, (OpN (fmul FPR64:$Rn, + fixedpoint_f64_i64:$scale)))]> { + let Inst{31} = 1; // 64-bit GPR flag + } +} + +//--- +// Integer to floating point conversion +//--- + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseIntegerToFP pattern> + : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale), + asm, "\t$Rd, $Rn, $scale", "", pattern>, + Sched<[WriteFCvt]> { + bits<5> Rd; + bits<5> Rn; + bits<6> scale; + let Inst{30-24} = 0b0011110; + let Inst{21-17} = 0b00001; + let Inst{16} = isUnsigned; + let Inst{15-10} = scale; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class BaseIntegerToFPUnscaled + : I<(outs dstType:$Rd), (ins srcType:$Rn), + asm, "\t$Rd, $Rn", "", [(set (dvt dstType:$Rd), (node srcType:$Rn))]>, + Sched<[WriteFCvt]> { + bits<5> Rd; + bits<5> Rn; + bits<6> scale; + let Inst{30-24} = 0b0011110; + let Inst{21-17} = 0b10001; + let Inst{16} = isUnsigned; + let Inst{15-10} = 0b000000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass IntegerToFP { + // Unscaled + def UWHri: BaseIntegerToFPUnscaled { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def UWSri: BaseIntegerToFPUnscaled { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag + } + + def UWDri: BaseIntegerToFPUnscaled { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag + } + + def UXHri: BaseIntegerToFPUnscaled { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def UXSri: BaseIntegerToFPUnscaled { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag + } + + def UXDri: BaseIntegerToFPUnscaled { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag + } + + // Scaled + def SWHri: BaseIntegerToFP { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let scale{5} = 1; + let Predicates = [HasFullFP16]; + } + + def SWSri: BaseIntegerToFP { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag + let scale{5} = 1; + } + + def SWDri: BaseIntegerToFP { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag + let scale{5} = 1; + } + + def SXHri: BaseIntegerToFP { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def SXSri: BaseIntegerToFP { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag + } + + def SXDri: BaseIntegerToFP { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag + } +} + +//--- +// Unscaled integer <-> floating point conversion (i.e. FMOV) +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseUnscaledConversion rmode, bits<3> opcode, + RegisterClass srcType, RegisterClass dstType, + string asm> + : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", + // We use COPY_TO_REGCLASS for these bitconvert operations. + // copyPhysReg() expands the resultant COPY instructions after + // regalloc is done. This gives greater freedom for the allocator + // and related passes (coalescing, copy propagation, et. al.) to + // be more effective. + [/*(set (dvt dstType:$Rd), (bitconvert (svt srcType:$Rn)))*/]>, + Sched<[WriteFCopy]> { + bits<5> Rd; + bits<5> Rn; + let Inst{30-24} = 0b0011110; + let Inst{21} = 1; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = 0b000000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseUnscaledConversionToHigh rmode, bits<3> opcode, + RegisterClass srcType, RegisterOperand dstType, string asm, + string kind> + : I<(outs dstType:$Rd), (ins srcType:$Rn, VectorIndex1:$idx), asm, + "{\t$Rd"#kind#"$idx, $Rn|"#kind#"\t$Rd$idx, $Rn}", "", []>, + Sched<[WriteFCopy]> { + bits<5> Rd; + bits<5> Rn; + let Inst{30-23} = 0b00111101; + let Inst{21} = 1; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = 0b000000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeFMOVLaneInstruction"; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseUnscaledConversionFromHigh rmode, bits<3> opcode, + RegisterOperand srcType, RegisterClass dstType, string asm, + string kind> + : I<(outs dstType:$Rd), (ins srcType:$Rn, VectorIndex1:$idx), asm, + "{\t$Rd, $Rn"#kind#"$idx|"#kind#"\t$Rd, $Rn$idx}", "", []>, + Sched<[WriteFCopy]> { + bits<5> Rd; + bits<5> Rn; + let Inst{30-23} = 0b00111101; + let Inst{21} = 1; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = 0b000000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeFMOVLaneInstruction"; +} + + +multiclass UnscaledConversion { + def WHr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR16, asm> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def XHr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR16, asm> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def WSr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR32, asm> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag + } + + def XDr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR64, asm> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag + } + + def HWr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR32, asm> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def HXr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR64, asm> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def SWr : BaseUnscaledConversion<0b00, 0b110, FPR32, GPR32, asm> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag + } + + def DXr : BaseUnscaledConversion<0b00, 0b110, FPR64, GPR64, asm> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag + } + + def XDHighr : BaseUnscaledConversionToHigh<0b01, 0b111, GPR64, V128, + asm, ".d"> { + let Inst{31} = 1; + let Inst{22} = 0; + } + + def DXHighr : BaseUnscaledConversionFromHigh<0b01, 0b110, V128, GPR64, + asm, ".d"> { + let Inst{31} = 1; + let Inst{22} = 0; + } +} + +//--- +// Floating point conversion +//--- + +class BaseFPConversion type, bits<2> opcode, RegisterClass dstType, + RegisterClass srcType, string asm, list pattern> + : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", pattern>, + Sched<[WriteFCvt]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-24} = 0b00011110; + let Inst{23-22} = type; + let Inst{21-17} = 0b10001; + let Inst{16-15} = opcode; + let Inst{14-10} = 0b10000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass FPConversion { + // Double-precision to Half-precision + def HDr : BaseFPConversion<0b01, 0b11, FPR16, FPR64, asm, + [(set FPR16:$Rd, (fpround FPR64:$Rn))]>; + + // Double-precision to Single-precision + def SDr : BaseFPConversion<0b01, 0b00, FPR32, FPR64, asm, + [(set FPR32:$Rd, (fpround FPR64:$Rn))]>; + + // Half-precision to Double-precision + def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm, + [(set FPR64:$Rd, (fpextend FPR16:$Rn))]>; + + // Half-precision to Single-precision + def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm, + [(set FPR32:$Rd, (fpextend FPR16:$Rn))]>; + + // Single-precision to Double-precision + def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm, + [(set FPR64:$Rd, (fpextend FPR32:$Rn))]>; + + // Single-precision to Half-precision + def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm, + [(set FPR16:$Rd, (fpround FPR32:$Rn))]>; +} + +//--- +// Single operand floating point data processing +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSingleOperandFPData opcode, RegisterClass regtype, + ValueType vt, string asm, SDPatternOperator node> + : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "", + [(set (vt regtype:$Rd), (node (vt regtype:$Rn)))]>, + Sched<[WriteF]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-24} = 0b00011110; + let Inst{21-19} = 0b100; + let Inst{18-15} = opcode; + let Inst{14-10} = 0b10000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SingleOperandFPData opcode, string asm, + SDPatternOperator node = null_frag> { + def Hr : BaseSingleOperandFPData { + let Inst{23-22} = 0b11; // 16-bit size flag + let Predicates = [HasFullFP16]; + } + + def Sr : BaseSingleOperandFPData { + let Inst{23-22} = 0b00; // 32-bit size flag + } + + def Dr : BaseSingleOperandFPData { + let Inst{23-22} = 0b01; // 64-bit size flag + } +} + +//--- +// Two operand floating point data processing +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseTwoOperandFPData opcode, RegisterClass regtype, + string asm, list pat> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", pat>, + Sched<[WriteF]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-24} = 0b00011110; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass TwoOperandFPData opcode, string asm, + SDPatternOperator node = null_frag> { + def Hrr : BaseTwoOperandFPData { + let Inst{23-22} = 0b11; // 16-bit size flag + let Predicates = [HasFullFP16]; + } + + def Srr : BaseTwoOperandFPData { + let Inst{23-22} = 0b00; // 32-bit size flag + } + + def Drr : BaseTwoOperandFPData { + let Inst{23-22} = 0b01; // 64-bit size flag + } +} + +multiclass TwoOperandFPDataNeg opcode, string asm, SDNode node> { + def Hrr : BaseTwoOperandFPData { + let Inst{23-22} = 0b11; // 16-bit size flag + let Predicates = [HasFullFP16]; + } + + def Srr : BaseTwoOperandFPData { + let Inst{23-22} = 0b00; // 32-bit size flag + } + + def Drr : BaseTwoOperandFPData { + let Inst{23-22} = 0b01; // 64-bit size flag + } +} + + +//--- +// Three operand floating point data processing +//--- + +class BaseThreeOperandFPData pat> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, regtype: $Ra), + asm, "\t$Rd, $Rn, $Rm, $Ra", "", pat>, + Sched<[WriteFMul]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<5> Ra; + let Inst{31-24} = 0b00011111; + let Inst{21} = isNegated; + let Inst{20-16} = Rm; + let Inst{15} = isSub; + let Inst{14-10} = Ra; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass ThreeOperandFPData { + def Hrrr : BaseThreeOperandFPData { + let Inst{23-22} = 0b11; // 16-bit size flag + let Predicates = [HasFullFP16]; + } + + def Srrr : BaseThreeOperandFPData { + let Inst{23-22} = 0b00; // 32-bit size flag + } + + def Drrr : BaseThreeOperandFPData { + let Inst{23-22} = 0b01; // 64-bit size flag + } +} + +//--- +// Floating point data comparisons +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseOneOperandFPComparison pat> + : I<(outs), (ins regtype:$Rn), asm, "\t$Rn, #0.0", "", pat>, + Sched<[WriteFCmp]> { + bits<5> Rn; + let Inst{31-24} = 0b00011110; + let Inst{21} = 1; + + let Inst{15-10} = 0b001000; + let Inst{9-5} = Rn; + let Inst{4} = signalAllNans; + let Inst{3-0} = 0b1000; + + // Rm should be 0b00000 canonically, but we need to accept any value. + let PostEncoderMethod = "fixOneOperandFPComparison"; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseTwoOperandFPComparison pat> + : I<(outs), (ins regtype:$Rn, regtype:$Rm), asm, "\t$Rn, $Rm", "", pat>, + Sched<[WriteFCmp]> { + bits<5> Rm; + bits<5> Rn; + let Inst{31-24} = 0b00011110; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-10} = 0b001000; + let Inst{9-5} = Rn; + let Inst{4} = signalAllNans; + let Inst{3-0} = 0b0000; +} + +multiclass FPComparison { + let Defs = [NZCV] in { + def Hrr : BaseTwoOperandFPComparison { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + + def Hri : BaseOneOperandFPComparison { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + + def Srr : BaseTwoOperandFPComparison { + let Inst{23-22} = 0b00; + } + + def Sri : BaseOneOperandFPComparison { + let Inst{23-22} = 0b00; + } + + def Drr : BaseTwoOperandFPComparison { + let Inst{23-22} = 0b01; + } + + def Dri : BaseOneOperandFPComparison { + let Inst{23-22} = 0b01; + } + } // Defs = [NZCV] +} + +//--- +// Floating point conditional comparisons +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseFPCondComparison pat> + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", pat>, + Sched<[WriteFCmp]> { + let Uses = [NZCV]; + let Defs = [NZCV]; + + bits<5> Rn; + bits<5> Rm; + bits<4> nzcv; + bits<4> cond; + + let Inst{31-24} = 0b00011110; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-12} = cond; + let Inst{11-10} = 0b01; + let Inst{9-5} = Rn; + let Inst{4} = signalAllNans; + let Inst{3-0} = nzcv; +} + +multiclass FPCondComparison { + def Hrr : BaseFPCondComparison { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + + def Srr : BaseFPCondComparison { + let Inst{23-22} = 0b00; + } + + def Drr : BaseFPCondComparison { + let Inst{23-22} = 0b01; + } +} + +//--- +// Floating point conditional select +//--- + +class BaseFPCondSelect + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), + asm, "\t$Rd, $Rn, $Rm, $cond", "", + [(set regtype:$Rd, + (AArch64csel (vt regtype:$Rn), regtype:$Rm, + (i32 imm:$cond), NZCV))]>, + Sched<[WriteF]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<4> cond; + + let Inst{31-24} = 0b00011110; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-12} = cond; + let Inst{11-10} = 0b11; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass FPCondSelect { + let Uses = [NZCV] in { + def Hrrr : BaseFPCondSelect { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + + def Srrr : BaseFPCondSelect { + let Inst{23-22} = 0b00; + } + + def Drrr : BaseFPCondSelect { + let Inst{23-22} = 0b01; + } + } // Uses = [NZCV] +} + +//--- +// Floating move immediate +//--- + +class BaseFPMoveImmediate + : I<(outs regtype:$Rd), (ins fpimmtype:$imm), asm, "\t$Rd, $imm", "", + [(set regtype:$Rd, fpimmtype:$imm)]>, + Sched<[WriteFImm]> { + bits<5> Rd; + bits<8> imm; + let Inst{31-24} = 0b00011110; + let Inst{21} = 1; + let Inst{20-13} = imm; + let Inst{12-5} = 0b10000000; + let Inst{4-0} = Rd; +} + +multiclass FPMoveImmediate { + def Hi : BaseFPMoveImmediate { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + + def Si : BaseFPMoveImmediate { + let Inst{23-22} = 0b00; + } + + def Di : BaseFPMoveImmediate { + let Inst{23-22} = 0b01; + } +} +} // end of 'let Predicates = [HasFPARMv8]' + +//---------------------------------------------------------------------------- +// AdvSIMD +//---------------------------------------------------------------------------- + +let Predicates = [HasNEON] in { + +//---------------------------------------------------------------------------- +// AdvSIMD three register vector instructions +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDThreeSameVector size, bits<5> opcode, + RegisterOperand regtype, string asm, string kind, + list pattern> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # + "|" # kind # "\t$Rd, $Rn, $Rm|}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-21} = size; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDThreeSameVectorTied size, bits<5> opcode, + RegisterOperand regtype, string asm, string kind, + list pattern> + : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # + "|" # kind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-21} = size; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class BaseSIMDThreeSameVectorDot : + BaseSIMDThreeSameVectorTied { + let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}"); +} + +multiclass SIMDThreeSameVectorDot { + def v8i8 : BaseSIMDThreeSameVectorDot<0, U, asm, ".2s", ".8b", V64, + v2i32, v8i8, OpNode>; + def v16i8 : BaseSIMDThreeSameVectorDot<1, U, asm, ".4s", ".16b", V128, + v4i32, v16i8, OpNode>; +} + +// All operand sizes distinguished in the encoding. +multiclass SIMDThreeSameVector opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, + asm, ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, + asm, ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, + asm, ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, + asm, ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, + asm, ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, + asm, ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; + def v2i64 : BaseSIMDThreeSameVector<1, U, 0b111, opc, V128, + asm, ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>; +} + +// As above, but D sized elements unsupported. +multiclass SIMDThreeSameVectorBHS opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, + asm, ".8b", + [(set V64:$Rd, (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))]>; + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, + asm, ".16b", + [(set V128:$Rd, (v16i8 (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm))))]>; + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, + asm, ".4h", + [(set V64:$Rd, (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>; + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, + asm, ".8h", + [(set V128:$Rd, (v8i16 (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>; + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, + asm, ".2s", + [(set V64:$Rd, (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>; + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, + asm, ".4s", + [(set V128:$Rd, (v4i32 (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>; +} + +multiclass SIMDThreeSameVectorBHSTied opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b001, opc, V64, + asm, ".8b", + [(set (v8i8 V64:$dst), + (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; + def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b001, opc, V128, + asm, ".16b", + [(set (v16i8 V128:$dst), + (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; + def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b011, opc, V64, + asm, ".4h", + [(set (v4i16 V64:$dst), + (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; + def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b011, opc, V128, + asm, ".8h", + [(set (v8i16 V128:$dst), + (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; + def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b101, opc, V64, + asm, ".2s", + [(set (v2i32 V64:$dst), + (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; + def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b101, opc, V128, + asm, ".4s", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; +} + +// As above, but only B sized elements supported. +multiclass SIMDThreeSameVectorB opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, + asm, ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, + asm, ".16b", + [(set (v16i8 V128:$Rd), + (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; +} + +// As above, but only floating point elements supported. +multiclass SIMDThreeSameVectorFP opc, + string asm, SDPatternOperator OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64, + asm, ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; + def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128, + asm, ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64, + asm, ".2s", + [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; + def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128, + asm, ".4s", + [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; + def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128, + asm, ".2d", + [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; +} + +multiclass SIMDThreeSameVectorFPCmp opc, + string asm, + SDPatternOperator OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64, + asm, ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; + def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128, + asm, ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64, + asm, ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; + def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128, + asm, ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; + def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128, + asm, ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; +} + +multiclass SIMDThreeSameVectorFPTied opc, + string asm, SDPatternOperator OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVectorTied<0, U, {S,0b10}, {0b00,opc}, V64, + asm, ".4h", + [(set (v4f16 V64:$dst), + (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; + def v8f16 : BaseSIMDThreeSameVectorTied<1, U, {S,0b10}, {0b00,opc}, V128, + asm, ".8h", + [(set (v8f16 V128:$dst), + (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0b01}, {0b11,opc}, V64, + asm, ".2s", + [(set (v2f32 V64:$dst), + (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; + def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0b01}, {0b11,opc}, V128, + asm, ".4s", + [(set (v4f32 V128:$dst), + (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; + def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,0b11}, {0b11,opc}, V128, + asm, ".2d", + [(set (v2f64 V128:$dst), + (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; +} + +// As above, but D and B sized elements unsupported. +multiclass SIMDThreeSameVectorHS opc, string asm, + SDPatternOperator OpNode> { + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, + asm, ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, + asm, ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, + asm, ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, + asm, ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; +} + +// Logical three vector ops share opcode bits, and only use B sized elements. +multiclass SIMDLogicalThreeVector size, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8 : BaseSIMDThreeSameVector<0, U, {size,1}, 0b00011, V64, + asm, ".8b", + [(set (v8i8 V64:$Rd), (OpNode V64:$Rn, V64:$Rm))]>; + def v16i8 : BaseSIMDThreeSameVector<1, U, {size,1}, 0b00011, V128, + asm, ".16b", + [(set (v16i8 V128:$Rd), (OpNode V128:$Rn, V128:$Rm))]>; + + def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)), + (!cast(NAME#"v8i8") V64:$LHS, V64:$RHS)>; + def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)), + (!cast(NAME#"v8i8") V64:$LHS, V64:$RHS)>; + def : Pat<(v1i64 (OpNode V64:$LHS, V64:$RHS)), + (!cast(NAME#"v8i8") V64:$LHS, V64:$RHS)>; + + def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)), + (!cast(NAME#"v16i8") V128:$LHS, V128:$RHS)>; + def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)), + (!cast(NAME#"v16i8") V128:$LHS, V128:$RHS)>; + def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)), + (!cast(NAME#"v16i8") V128:$LHS, V128:$RHS)>; +} + +multiclass SIMDLogicalThreeVectorTied size, + string asm, SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVectorTied<0, U, {size,1}, 0b00011, V64, + asm, ".8b", + [(set (v8i8 V64:$dst), + (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; + def v16i8 : BaseSIMDThreeSameVectorTied<1, U, {size,1}, 0b00011, V128, + asm, ".16b", + [(set (v16i8 V128:$dst), + (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), + (v16i8 V128:$Rm)))]>; + + def : Pat<(v4i16 (OpNode (v4i16 V64:$LHS), (v4i16 V64:$MHS), + (v4i16 V64:$RHS))), + (!cast(NAME#"v8i8") + V64:$LHS, V64:$MHS, V64:$RHS)>; + def : Pat<(v2i32 (OpNode (v2i32 V64:$LHS), (v2i32 V64:$MHS), + (v2i32 V64:$RHS))), + (!cast(NAME#"v8i8") + V64:$LHS, V64:$MHS, V64:$RHS)>; + def : Pat<(v1i64 (OpNode (v1i64 V64:$LHS), (v1i64 V64:$MHS), + (v1i64 V64:$RHS))), + (!cast(NAME#"v8i8") + V64:$LHS, V64:$MHS, V64:$RHS)>; + + def : Pat<(v8i16 (OpNode (v8i16 V128:$LHS), (v8i16 V128:$MHS), + (v8i16 V128:$RHS))), + (!cast(NAME#"v16i8") + V128:$LHS, V128:$MHS, V128:$RHS)>; + def : Pat<(v4i32 (OpNode (v4i32 V128:$LHS), (v4i32 V128:$MHS), + (v4i32 V128:$RHS))), + (!cast(NAME#"v16i8") + V128:$LHS, V128:$MHS, V128:$RHS)>; + def : Pat<(v2i64 (OpNode (v2i64 V128:$LHS), (v2i64 V128:$MHS), + (v2i64 V128:$RHS))), + (!cast(NAME#"v16i8") + V128:$LHS, V128:$MHS, V128:$RHS)>; +} + + +//---------------------------------------------------------------------------- +// AdvSIMD two register vector instructions. +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDTwoSameVector size, bits<5> opcode, + bits<2> size2, RegisterOperand regtype, string asm, + string dstkind, string srckind, list pattern> + : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, + "{\t$Rd" # dstkind # ", $Rn" # srckind # + "|" # dstkind # "\t$Rd, $Rn}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDTwoSameVectorTied size, bits<5> opcode, + bits<2> size2, RegisterOperand regtype, + string asm, string dstkind, string srckind, + list pattern> + : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm, + "{\t$Rd" # dstkind # ", $Rn" # srckind # + "|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +// Supports B, H, and S element sizes. +multiclass SIMDTwoVectorBHS opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; + def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; + def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; +} + +class BaseSIMDVectorLShiftLongBySize size, + RegisterOperand regtype, string asm, string dstkind, + string srckind, string amount> + : I<(outs V128:$Rd), (ins regtype:$Rn), asm, + "{\t$Rd" # dstkind # ", $Rn" # srckind # ", #" # amount # + "|" # dstkind # "\t$Rd, $Rn, #" # amount # "}", "", []>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29-24} = 0b101110; + let Inst{23-22} = size; + let Inst{21-10} = 0b100001001110; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDVectorLShiftLongBySizeBHS { + let hasSideEffects = 0 in { + def v8i8 : BaseSIMDVectorLShiftLongBySize<0, 0b00, V64, + "shll", ".8h", ".8b", "8">; + def v16i8 : BaseSIMDVectorLShiftLongBySize<1, 0b00, V128, + "shll2", ".8h", ".16b", "8">; + def v4i16 : BaseSIMDVectorLShiftLongBySize<0, 0b01, V64, + "shll", ".4s", ".4h", "16">; + def v8i16 : BaseSIMDVectorLShiftLongBySize<1, 0b01, V128, + "shll2", ".4s", ".8h", "16">; + def v2i32 : BaseSIMDVectorLShiftLongBySize<0, 0b10, V64, + "shll", ".2d", ".2s", "32">; + def v4i32 : BaseSIMDVectorLShiftLongBySize<1, 0b10, V128, + "shll2", ".2d", ".4s", "32">; + } +} + +// Supports all element sizes. +multiclass SIMDLongTwoVector opc, string asm, + SDPatternOperator OpNode> { + def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, + asm, ".4h", ".8b", + [(set (v4i16 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; + def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, + asm, ".8h", ".16b", + [(set (v8i16 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; + def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, + asm, ".2s", ".4h", + [(set (v2i32 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; + def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, + asm, ".4s", ".8h", + [(set (v4i32 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; + def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, + asm, ".1d", ".2s", + [(set (v1i64 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; + def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, + asm, ".2d", ".4s", + [(set (v2i64 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; +} + +multiclass SIMDLongTwoVectorTied opc, string asm, + SDPatternOperator OpNode> { + def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64, + asm, ".4h", ".8b", + [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), + (v8i8 V64:$Rn)))]>; + def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128, + asm, ".8h", ".16b", + [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), + (v16i8 V128:$Rn)))]>; + def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64, + asm, ".2s", ".4h", + [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), + (v4i16 V64:$Rn)))]>; + def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128, + asm, ".4s", ".8h", + [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), + (v8i16 V128:$Rn)))]>; + def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64, + asm, ".1d", ".2s", + [(set (v1i64 V64:$dst), (OpNode (v1i64 V64:$Rd), + (v2i32 V64:$Rn)))]>; + def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128, + asm, ".2d", ".4s", + [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), + (v4i32 V128:$Rn)))]>; +} + +// Supports all element sizes, except 1xD. +multiclass SIMDTwoVectorBHSDTied opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64, + asm, ".8b", ".8b", + [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn)))]>; + def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128, + asm, ".16b", ".16b", + [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>; + def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64, + asm, ".4h", ".4h", + [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn)))]>; + def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128, + asm, ".8h", ".8h", + [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn)))]>; + def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64, + asm, ".2s", ".2s", + [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn)))]>; + def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128, + asm, ".4s", ".4s", + [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>; + def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, 0b00, V128, + asm, ".2d", ".2d", + [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn)))]>; +} + +multiclass SIMDTwoVectorBHSD opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; + def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; + def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; + def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, 0b00, V128, + asm, ".2d", ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; +} + + +// Supports only B element sizes. +multiclass SIMDTwoVectorB size, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, 0b00, V64, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; + def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, 0b00, V128, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; + +} + +// Supports only B and H element sizes. +multiclass SIMDTwoVectorBH opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode V64:$Rn))]>; + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode V128:$Rn))]>; + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode V64:$Rn))]>; + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>; +} + +// Supports only S and D element sizes, uses high bit of the size field +// as an extra opcode bit. +multiclass SIMDTwoVectorFP opc, string asm, + SDPatternOperator OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, + asm, ".4h", ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>; + def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, + asm, ".8h", ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, + asm, ".2s", ".2s", + [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, + asm, ".4s", ".4s", + [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, + asm, ".2d", ".2d", + [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; +} + +// Supports only S element size. +multiclass SIMDTwoVectorS opc, string asm, + SDPatternOperator OpNode> { + def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; + def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; +} + + +multiclass SIMDTwoVectorFPToInt opc, string asm, + SDPatternOperator OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>; + def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, + asm, ".2d", ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; +} + +multiclass SIMDTwoVectorIntToFP opc, string asm, + SDPatternOperator OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, + asm, ".4h", ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; + def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, + asm, ".8h", ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, + asm, ".2s", ".2s", + [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, + asm, ".4s", ".4s", + [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, + asm, ".2d", ".2d", + [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; +} + + +class BaseSIMDMixedTwoVector size, bits<5> opcode, + RegisterOperand inreg, RegisterOperand outreg, + string asm, string outkind, string inkind, + list pattern> + : I<(outs outreg:$Rd), (ins inreg:$Rn), asm, + "{\t$Rd" # outkind # ", $Rn" # inkind # + "|" # outkind # "\t$Rd, $Rn}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class BaseSIMDMixedTwoVectorTied size, bits<5> opcode, + RegisterOperand inreg, RegisterOperand outreg, + string asm, string outkind, string inkind, + list pattern> + : I<(outs outreg:$dst), (ins outreg:$Rd, inreg:$Rn), asm, + "{\t$Rd" # outkind # ", $Rn" # inkind # + "|" # outkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDMixedTwoVector opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDMixedTwoVector<0, U, 0b00, opc, V128, V64, + asm, ".8b", ".8h", + [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn)))]>; + def v16i8 : BaseSIMDMixedTwoVectorTied<1, U, 0b00, opc, V128, V128, + asm#"2", ".16b", ".8h", []>; + def v4i16 : BaseSIMDMixedTwoVector<0, U, 0b01, opc, V128, V64, + asm, ".4h", ".4s", + [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn)))]>; + def v8i16 : BaseSIMDMixedTwoVectorTied<1, U, 0b01, opc, V128, V128, + asm#"2", ".8h", ".4s", []>; + def v2i32 : BaseSIMDMixedTwoVector<0, U, 0b10, opc, V128, V64, + asm, ".2s", ".2d", + [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn)))]>; + def v4i32 : BaseSIMDMixedTwoVectorTied<1, U, 0b10, opc, V128, V128, + asm#"2", ".4s", ".2d", []>; + + def : Pat<(concat_vectors (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn))), + (!cast(NAME # "v16i8") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; + def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn))), + (!cast(NAME # "v8i16") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; + def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn))), + (!cast(NAME # "v4i32") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; +} + +class BaseSIMDCmpTwoVector size, bits<2> size2, + bits<5> opcode, RegisterOperand regtype, string asm, + string kind, string zero, ValueType dty, + ValueType sty, SDNode OpNode> + : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero # + "|" # kind # "\t$Rd, $Rn, #" # zero # "}", "", + [(set (dty regtype:$Rd), (OpNode (sty regtype:$Rn)))]>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +// Comparisons support all element sizes, except 1xD. +multiclass SIMDCmpTwoVector opc, string asm, + SDNode OpNode> { + def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, 0b00, opc, V64, + asm, ".8b", "0", + v8i8, v8i8, OpNode>; + def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, 0b00, opc, V128, + asm, ".16b", "0", + v16i8, v16i8, OpNode>; + def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, 0b00, opc, V64, + asm, ".4h", "0", + v4i16, v4i16, OpNode>; + def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, 0b00, opc, V128, + asm, ".8h", "0", + v8i16, v8i16, OpNode>; + def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, 0b00, opc, V64, + asm, ".2s", "0", + v2i32, v2i32, OpNode>; + def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, 0b00, opc, V128, + asm, ".4s", "0", + v4i32, v4i32, OpNode>; + def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, 0b00, opc, V128, + asm, ".2d", "0", + v2i64, v2i64, OpNode>; +} + +// FP Comparisons support only S and D element sizes (and H for v8.2a). +multiclass SIMDFPCmpTwoVector opc, + string asm, SDNode OpNode> { + + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16rz : BaseSIMDCmpTwoVector<0, U, {S,1}, 0b11, opc, V64, + asm, ".4h", "0.0", + v4i16, v4f16, OpNode>; + def v8i16rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b11, opc, V128, + asm, ".8h", "0.0", + v8i16, v8f16, OpNode>; + } // Predicates = [HasNEON, HasFullFP16] + def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, 0b00, opc, V64, + asm, ".2s", "0.0", + v2i32, v2f32, OpNode>; + def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, 0b00, opc, V128, + asm, ".4s", "0.0", + v4i32, v4f32, OpNode>; + def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b00, opc, V128, + asm, ".2d", "0.0", + v2i64, v2f64, OpNode>; + + let Predicates = [HasNEON, HasFullFP16] in { + def : InstAlias(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>; + def : InstAlias(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>; + } + def : InstAlias(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>; + def : InstAlias(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>; + def : InstAlias(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>; + let Predicates = [HasNEON, HasFullFP16] in { + def : InstAlias(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>; + def : InstAlias(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>; + } + def : InstAlias(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>; + def : InstAlias(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>; + def : InstAlias(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDFPCvtTwoVector size, bits<5> opcode, + RegisterOperand outtype, RegisterOperand intype, + string asm, string VdTy, string VnTy, + list pattern> + : I<(outs outtype:$Rd), (ins intype:$Rn), asm, + !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class BaseSIMDFPCvtTwoVectorTied size, bits<5> opcode, + RegisterOperand outtype, RegisterOperand intype, + string asm, string VdTy, string VnTy, + list pattern> + : I<(outs outtype:$dst), (ins outtype:$Rd, intype:$Rn), asm, + !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDFPWidenTwoVector opc, string asm> { + def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V128, V64, + asm, ".4s", ".4h", []>; + def v8i16 : BaseSIMDFPCvtTwoVector<1, U, {S,0}, opc, V128, V128, + asm#"2", ".4s", ".8h", []>; + def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V128, V64, + asm, ".2d", ".2s", []>; + def v4i32 : BaseSIMDFPCvtTwoVector<1, U, {S,1}, opc, V128, V128, + asm#"2", ".2d", ".4s", []>; +} + +multiclass SIMDFPNarrowTwoVector opc, string asm> { + def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V64, V128, + asm, ".4h", ".4s", []>; + def v8i16 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,0}, opc, V128, V128, + asm#"2", ".8h", ".4s", []>; + def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128, + asm, ".2s", ".2d", []>; + def v4i32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128, + asm#"2", ".4s", ".2d", []>; +} + +multiclass SIMDFPInexactCvtTwoVector opc, string asm, + Intrinsic OpNode> { + def v2f32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128, + asm, ".2s", ".2d", + [(set (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn)))]>; + def v4f32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128, + asm#"2", ".4s", ".2d", []>; + + def : Pat<(concat_vectors (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn))), + (!cast(NAME # "v4f32") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD three register different-size vector instructions. +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDDifferentThreeVector size, bits<4> opcode, + RegisterOperand outtype, RegisterOperand intype1, + RegisterOperand intype2, string asm, + string outkind, string inkind1, string inkind2, + list pattern> + : I<(outs outtype:$Rd), (ins intype1:$Rn, intype2:$Rm), asm, + "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 # + "|" # outkind # "\t$Rd, $Rn, $Rm}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31} = 0; + let Inst{30} = size{0}; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size{2-1}; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-12} = opcode; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDDifferentThreeVectorTied size, bits<4> opcode, + RegisterOperand outtype, RegisterOperand intype1, + RegisterOperand intype2, string asm, + string outkind, string inkind1, string inkind2, + list pattern> + : I<(outs outtype:$dst), (ins outtype:$Rd, intype1:$Rn, intype2:$Rm), asm, + "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 # + "|" # outkind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31} = 0; + let Inst{30} = size{0}; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size{2-1}; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-12} = opcode; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +// FIXME: TableGen doesn't know how to deal with expanded types that also +// change the element count (in this case, placing the results in +// the high elements of the result register rather than the low +// elements). Until that's fixed, we can't code-gen those. +multiclass SIMDNarrowThreeVectorBHS opc, string asm, + Intrinsic IntOp> { + def v8i16_v8i8 : BaseSIMDDifferentThreeVector; + def v8i16_v16i8 : BaseSIMDDifferentThreeVectorTied; + def v4i32_v4i16 : BaseSIMDDifferentThreeVector; + def v4i32_v8i16 : BaseSIMDDifferentThreeVectorTied; + def v2i64_v2i32 : BaseSIMDDifferentThreeVector; + def v2i64_v4i32 : BaseSIMDDifferentThreeVectorTied; + + + // Patterns for the '2' variants involve INSERT_SUBREG, which you can't put in + // a version attached to an instruction. + def : Pat<(concat_vectors (v8i8 V64:$Rd), (IntOp (v8i16 V128:$Rn), + (v8i16 V128:$Rm))), + (!cast(NAME # "v8i16_v16i8") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; + def : Pat<(concat_vectors (v4i16 V64:$Rd), (IntOp (v4i32 V128:$Rn), + (v4i32 V128:$Rm))), + (!cast(NAME # "v4i32_v8i16") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; + def : Pat<(concat_vectors (v2i32 V64:$Rd), (IntOp (v2i64 V128:$Rn), + (v2i64 V128:$Rm))), + (!cast(NAME # "v2i64_v4i32") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; +} + +multiclass SIMDDifferentThreeVectorBD opc, string asm, + Intrinsic IntOp> { + def v8i8 : BaseSIMDDifferentThreeVector; + def v16i8 : BaseSIMDDifferentThreeVector; + let Predicates = [HasAES] in { + def v1i64 : BaseSIMDDifferentThreeVector; + def v2i64 : BaseSIMDDifferentThreeVector; + } + + def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 V128:$Rn)), + (v8i8 (extract_high_v16i8 V128:$Rm)))), + (!cast(NAME#"v16i8") V128:$Rn, V128:$Rm)>; +} + +multiclass SIMDLongThreeVectorHS opc, string asm, + SDPatternOperator OpNode> { + def v4i16_v4i32 : BaseSIMDDifferentThreeVector; + def v8i16_v4i32 : BaseSIMDDifferentThreeVector; + def v2i32_v2i64 : BaseSIMDDifferentThreeVector; + def v4i32_v2i64 : BaseSIMDDifferentThreeVector; +} + +multiclass SIMDLongThreeVectorBHSabdl opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8_v8i16 : BaseSIMDDifferentThreeVector; + def v16i8_v8i16 : BaseSIMDDifferentThreeVector; + def v4i16_v4i32 : BaseSIMDDifferentThreeVector; + def v8i16_v4i32 : BaseSIMDDifferentThreeVector; + def v2i32_v2i64 : BaseSIMDDifferentThreeVector; + def v4i32_v2i64 : BaseSIMDDifferentThreeVector; +} + +multiclass SIMDLongThreeVectorTiedBHSabal opc, + string asm, + SDPatternOperator OpNode> { + def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied; + def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied; + def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied; + def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied; + def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied; + def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied; +} + +multiclass SIMDLongThreeVectorBHS opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8_v8i16 : BaseSIMDDifferentThreeVector; + def v16i8_v8i16 : BaseSIMDDifferentThreeVector; + def v4i16_v4i32 : BaseSIMDDifferentThreeVector; + def v8i16_v4i32 : BaseSIMDDifferentThreeVector; + def v2i32_v2i64 : BaseSIMDDifferentThreeVector; + def v4i32_v2i64 : BaseSIMDDifferentThreeVector; +} + +multiclass SIMDLongThreeVectorTiedBHS opc, + string asm, + SDPatternOperator OpNode> { + def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied; + def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied; + def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied; + def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied; + def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied; + def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied; +} + +multiclass SIMDLongThreeVectorSQDMLXTiedHS opc, string asm, + SDPatternOperator Accum> { + def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied; + def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied; + def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied; + def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied; +} + +multiclass SIMDWideThreeVectorBHS opc, string asm, + SDPatternOperator OpNode> { + def v8i8_v8i16 : BaseSIMDDifferentThreeVector; + def v16i8_v8i16 : BaseSIMDDifferentThreeVector; + def v4i16_v4i32 : BaseSIMDDifferentThreeVector; + def v8i16_v4i32 : BaseSIMDDifferentThreeVector; + def v2i32_v2i64 : BaseSIMDDifferentThreeVector; + def v4i32_v2i64 : BaseSIMDDifferentThreeVector; +} + +//---------------------------------------------------------------------------- +// AdvSIMD bitwise extract from vector +//---------------------------------------------------------------------------- + +class BaseSIMDBitwiseExtract + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, i32imm:$imm), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $imm" # + "|" # kind # "\t$Rd, $Rn, $Rm, $imm}", "", + [(set (vty regtype:$Rd), + (AArch64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<4> imm; + let Inst{31} = 0; + let Inst{30} = size; + let Inst{29-21} = 0b101110000; + let Inst{20-16} = Rm; + let Inst{15} = 0; + let Inst{14-11} = imm; + let Inst{10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + + +multiclass SIMDBitwiseExtract { + def v8i8 : BaseSIMDBitwiseExtract<0, V64, v8i8, asm, ".8b"> { + let imm{3} = 0; + } + def v16i8 : BaseSIMDBitwiseExtract<1, V128, v16i8, asm, ".16b">; +} + +//---------------------------------------------------------------------------- +// AdvSIMD zip vector +//---------------------------------------------------------------------------- + +class BaseSIMDZipVector size, bits<3> opc, RegisterOperand regtype, + string asm, string kind, SDNode OpNode, ValueType valty> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # + "|" # kind # "\t$Rd, $Rn, $Rm}", "", + [(set (valty regtype:$Rd), (OpNode regtype:$Rn, regtype:$Rm))]>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31} = 0; + let Inst{30} = size{0}; + let Inst{29-24} = 0b001110; + let Inst{23-22} = size{2-1}; + let Inst{21} = 0; + let Inst{20-16} = Rm; + let Inst{15} = 0; + let Inst{14-12} = opc; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDZipVectoropc, string asm, + SDNode OpNode> { + def v8i8 : BaseSIMDZipVector<0b000, opc, V64, + asm, ".8b", OpNode, v8i8>; + def v16i8 : BaseSIMDZipVector<0b001, opc, V128, + asm, ".16b", OpNode, v16i8>; + def v4i16 : BaseSIMDZipVector<0b010, opc, V64, + asm, ".4h", OpNode, v4i16>; + def v8i16 : BaseSIMDZipVector<0b011, opc, V128, + asm, ".8h", OpNode, v8i16>; + def v2i32 : BaseSIMDZipVector<0b100, opc, V64, + asm, ".2s", OpNode, v2i32>; + def v4i32 : BaseSIMDZipVector<0b101, opc, V128, + asm, ".4s", OpNode, v4i32>; + def v2i64 : BaseSIMDZipVector<0b111, opc, V128, + asm, ".2d", OpNode, v2i64>; + + def : Pat<(v4f16 (OpNode V64:$Rn, V64:$Rm)), + (!cast(NAME#"v4i16") V64:$Rn, V64:$Rm)>; + def : Pat<(v8f16 (OpNode V128:$Rn, V128:$Rm)), + (!cast(NAME#"v8i16") V128:$Rn, V128:$Rm)>; + def : Pat<(v2f32 (OpNode V64:$Rn, V64:$Rm)), + (!cast(NAME#"v2i32") V64:$Rn, V64:$Rm)>; + def : Pat<(v4f32 (OpNode V128:$Rn, V128:$Rm)), + (!cast(NAME#"v4i32") V128:$Rn, V128:$Rm)>; + def : Pat<(v2f64 (OpNode V128:$Rn, V128:$Rm)), + (!cast(NAME#"v2i64") V128:$Rn, V128:$Rm)>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD three register scalar instructions +//---------------------------------------------------------------------------- + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDThreeScalar size, bits<5> opcode, + RegisterClass regtype, string asm, + list pattern> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, + "\t$Rd, $Rn, $Rm", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-21} = size; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDThreeScalarTied size, bit R, bits<5> opcode, + dag oops, dag iops, string asm, + list pattern> + : I, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21} = R; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDThreeScalarD opc, string asm, + SDPatternOperator OpNode> { + def v1i64 : BaseSIMDThreeScalar; +} + +multiclass SIMDThreeScalarBHSD opc, string asm, + SDPatternOperator OpNode> { + def v1i64 : BaseSIMDThreeScalar; + def v1i32 : BaseSIMDThreeScalar; + def v1i16 : BaseSIMDThreeScalar; + def v1i8 : BaseSIMDThreeScalar; + + def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))), + (!cast(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>; + def : Pat<(i32 (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))), + (!cast(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>; +} + +multiclass SIMDThreeScalarHS opc, string asm, + SDPatternOperator OpNode> { + def v1i32 : BaseSIMDThreeScalar; + def v1i16 : BaseSIMDThreeScalar; +} + +multiclass SIMDThreeScalarHSTied opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v1i32: BaseSIMDThreeScalarTied; + def v1i16: BaseSIMDThreeScalarTied; +} + +multiclass SIMDFPThreeScalar opc, string asm, + SDPatternOperator OpNode = null_frag> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def #NAME#64 : BaseSIMDThreeScalar; + def #NAME#32 : BaseSIMDThreeScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def #NAME#16 : BaseSIMDThreeScalar; + } // Predicates = [HasNEON, HasFullFP16] + } + + def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (!cast(NAME # "64") FPR64:$Rn, FPR64:$Rm)>; +} + +multiclass SIMDThreeScalarFPCmp opc, string asm, + SDPatternOperator OpNode = null_frag> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def #NAME#64 : BaseSIMDThreeScalar; + def #NAME#32 : BaseSIMDThreeScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def #NAME#16 : BaseSIMDThreeScalar; + } // Predicates = [HasNEON, HasFullFP16] + } + + def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (!cast(NAME # "64") FPR64:$Rn, FPR64:$Rm)>; +} + +class BaseSIMDThreeScalarMixed size, bits<5> opcode, + dag oops, dag iops, string asm, string cstr, list pat> + : I, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDThreeScalarMixedHS opc, string asm, + SDPatternOperator OpNode = null_frag> { + def i16 : BaseSIMDThreeScalarMixed; + def i32 : BaseSIMDThreeScalarMixed; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDThreeScalarMixedTiedHS opc, string asm, + SDPatternOperator OpNode = null_frag> { + def i16 : BaseSIMDThreeScalarMixed; + def i32 : BaseSIMDThreeScalarMixed; +} + +//---------------------------------------------------------------------------- +// AdvSIMD two register scalar instructions +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDTwoScalar size, bits<2> size2, bits<5> opcode, + RegisterClass regtype, RegisterClass regtype2, + string asm, list pat> + : I<(outs regtype:$Rd), (ins regtype2:$Rn), asm, + "\t$Rd, $Rn", "", pat>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDTwoScalarTied size, bits<5> opcode, + RegisterClass regtype, RegisterClass regtype2, + string asm, list pat> + : I<(outs regtype:$dst), (ins regtype:$Rd, regtype2:$Rn), asm, + "\t$Rd, $Rn", "$Rd = $dst", pat>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDCmpTwoScalar size, bits<2> size2, bits<5> opcode, + RegisterClass regtype, string asm, string zero> + : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, + "\t$Rd, $Rn, #" # zero, "", []>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class SIMDInexactCvtTwoScalar opcode, string asm> + : I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "", + [(set (f32 FPR32:$Rd), (int_aarch64_sisd_fcvtxn (f64 FPR64:$Rn)))]>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-17} = 0b011111100110000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDCmpTwoScalarD opc, string asm, + SDPatternOperator OpNode> { + def v1i64rz : BaseSIMDCmpTwoScalar; + + def : Pat<(v1i64 (OpNode FPR64:$Rn)), + (!cast(NAME # v1i64rz) FPR64:$Rn)>; +} + +multiclass SIMDFPCmpTwoScalar opc, string asm, + SDPatternOperator OpNode> { + def v1i64rz : BaseSIMDCmpTwoScalar; + def v1i32rz : BaseSIMDCmpTwoScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16rz : BaseSIMDCmpTwoScalar; + } + + def : InstAlias(NAME # v1i64rz) FPR64:$Rd, FPR64:$Rn), 0>; + def : InstAlias(NAME # v1i32rz) FPR32:$Rd, FPR32:$Rn), 0>; + let Predicates = [HasNEON, HasFullFP16] in { + def : InstAlias(NAME # v1i16rz) FPR16:$Rd, FPR16:$Rn), 0>; + } + + def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))), + (!cast(NAME # v1i64rz) FPR64:$Rn)>; +} + +multiclass SIMDTwoScalarD opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v1i64 : BaseSIMDTwoScalar; + + def : Pat<(i64 (OpNode (i64 FPR64:$Rn))), + (!cast(NAME # "v1i64") FPR64:$Rn)>; +} + +multiclass SIMDFPTwoScalar opc, string asm> { + def v1i64 : BaseSIMDTwoScalar; + def v1i32 : BaseSIMDTwoScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def v1f16 : BaseSIMDTwoScalar; + } +} + +multiclass SIMDFPTwoScalarCVT opc, string asm, + SDPatternOperator OpNode> { + def v1i64 : BaseSIMDTwoScalar; + def v1i32 : BaseSIMDTwoScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16 : BaseSIMDTwoScalar; + } +} + +multiclass SIMDTwoScalarBHSD opc, string asm, + SDPatternOperator OpNode = null_frag> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def v1i64 : BaseSIMDTwoScalar; + def v1i32 : BaseSIMDTwoScalar; + def v1i16 : BaseSIMDTwoScalar; + def v1i8 : BaseSIMDTwoScalar; + } + + def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))), + (!cast(NAME # v1i64) FPR64:$Rn)>; +} + +multiclass SIMDTwoScalarBHSDTied opc, string asm, + Intrinsic OpNode> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def v1i64 : BaseSIMDTwoScalarTied; + def v1i32 : BaseSIMDTwoScalarTied; + def v1i16 : BaseSIMDTwoScalarTied; + def v1i8 : BaseSIMDTwoScalarTied; + } + + def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn))), + (!cast(NAME # v1i64) FPR64:$Rd, FPR64:$Rn)>; +} + + + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDTwoScalarMixedBHS opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v1i32 : BaseSIMDTwoScalar; + def v1i16 : BaseSIMDTwoScalar; + def v1i8 : BaseSIMDTwoScalar; +} + +//---------------------------------------------------------------------------- +// AdvSIMD scalar pairwise instructions +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDPairwiseScalar size, bits<5> opcode, + RegisterOperand regtype, RegisterOperand vectype, + string asm, string kind> + : I<(outs regtype:$Rd), (ins vectype:$Rn), asm, + "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", []>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21-17} = 0b11000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDPairwiseScalarD opc, string asm> { + def v2i64p : BaseSIMDPairwiseScalar; +} + +multiclass SIMDFPPairwiseScalar opc, string asm> { + let Predicates = [HasNEON, HasFullFP16] in { + def v2i16p : BaseSIMDPairwiseScalar<0, {S,0}, opc, FPR16Op, V64, + asm, ".2h">; + } + def v2i32p : BaseSIMDPairwiseScalar<1, {S,0}, opc, FPR32Op, V64, + asm, ".2s">; + def v2i64p : BaseSIMDPairwiseScalar<1, {S,1}, opc, FPR64Op, V128, + asm, ".2d">; +} + +//---------------------------------------------------------------------------- +// AdvSIMD across lanes instructions +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDAcrossLanes size, bits<5> opcode, + RegisterClass regtype, RegisterOperand vectype, + string asm, string kind, list pattern> + : I<(outs regtype:$Rd), (ins vectype:$Rn), asm, + "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b11000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDAcrossLanesBHS opcode, + string asm> { + def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR8, V64, + asm, ".8b", []>; + def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR8, V128, + asm, ".16b", []>; + def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR16, V64, + asm, ".4h", []>; + def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR16, V128, + asm, ".8h", []>; + def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR32, V128, + asm, ".4s", []>; +} + +multiclass SIMDAcrossLanesHSD opcode, string asm> { + def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR16, V64, + asm, ".8b", []>; + def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR16, V128, + asm, ".16b", []>; + def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR32, V64, + asm, ".4h", []>; + def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR32, V128, + asm, ".8h", []>; + def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR64, V128, + asm, ".4s", []>; +} + +multiclass SIMDFPAcrossLanes opcode, bit sz1, string asm, + Intrinsic intOp> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16v : BaseSIMDAcrossLanes<0, 0, {sz1, 0}, opcode, FPR16, V64, + asm, ".4h", + [(set FPR16:$Rd, (intOp (v4f16 V64:$Rn)))]>; + def v8i16v : BaseSIMDAcrossLanes<1, 0, {sz1, 0}, opcode, FPR16, V128, + asm, ".8h", + [(set FPR16:$Rd, (intOp (v8f16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128, + asm, ".4s", + [(set FPR32:$Rd, (intOp (v4f32 V128:$Rn)))]>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD INS/DUP instructions +//---------------------------------------------------------------------------- + +// FIXME: There has got to be a better way to factor these. ugh. + +class BaseSIMDInsDup pattern> + : I, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = op; + let Inst{28-21} = 0b01110000; + let Inst{15} = 0; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class SIMDDupFromMain imm5, string size, ValueType vectype, + RegisterOperand vecreg, RegisterClass regtype> + : BaseSIMDInsDup { + let Inst{20-16} = imm5; + let Inst{14-11} = 0b0001; +} + +class SIMDDupFromElement + : BaseSIMDInsDup { + let Inst{14-11} = 0b0000; +} + +class SIMDDup64FromElement + : SIMDDupFromElement<1, ".2d", ".d", v2i64, v2i64, V128, + VectorIndexD, i64, AArch64duplane64> { + bits<1> idx; + let Inst{20} = idx; + let Inst{19-16} = 0b1000; +} + +class SIMDDup32FromElement + : SIMDDupFromElement { + bits<2> idx; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; +} + +class SIMDDup16FromElement + : SIMDDupFromElement { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; +} + +class SIMDDup8FromElement + : SIMDDupFromElement { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; +} + +class BaseSIMDMov imm4, RegisterClass regtype, + Operand idxtype, string asm, list pattern> + : BaseSIMDInsDup { + let Inst{14-11} = imm4; +} + +class SIMDSMov + : BaseSIMDMov; +class SIMDUMov + : BaseSIMDMov; + +class SIMDMovAlias + : InstAlias; + +multiclass SMov { + def vi8to32 : SIMDSMov<0, ".b", GPR32, VectorIndexB> { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; + } + def vi8to64 : SIMDSMov<1, ".b", GPR64, VectorIndexB> { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; + } + def vi16to32 : SIMDSMov<0, ".h", GPR32, VectorIndexH> { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + } + def vi16to64 : SIMDSMov<1, ".h", GPR64, VectorIndexH> { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + } + def vi32to64 : SIMDSMov<1, ".s", GPR64, VectorIndexS> { + bits<2> idx; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; + } +} + +multiclass UMov { + def vi8 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndexB> { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; + } + def vi16 : SIMDUMov<0, ".h", v8i16, GPR32, VectorIndexH> { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + } + def vi32 : SIMDUMov<0, ".s", v4i32, GPR32, VectorIndexS> { + bits<2> idx; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; + } + def vi64 : SIMDUMov<1, ".d", v2i64, GPR64, VectorIndexD> { + bits<1> idx; + let Inst{20} = idx; + let Inst{19-16} = 0b1000; + } + def : SIMDMovAlias<"mov", ".s", + !cast(NAME#"vi32"), + GPR32, VectorIndexS>; + def : SIMDMovAlias<"mov", ".d", + !cast(NAME#"vi64"), + GPR64, VectorIndexD>; +} + +class SIMDInsFromMain + : BaseSIMDInsDup<1, 0, (outs V128:$dst), + (ins V128:$Rd, idxtype:$idx, regtype:$Rn), "ins", + "{\t$Rd" # size # "$idx, $Rn" # + "|" # size # "\t$Rd$idx, $Rn}", + "$Rd = $dst", + [(set V128:$dst, + (vector_insert (vectype V128:$Rd), regtype:$Rn, idxtype:$idx))]> { + let Inst{14-11} = 0b0011; +} + +class SIMDInsFromElement + : BaseSIMDInsDup<1, 1, (outs V128:$dst), + (ins V128:$Rd, idxtype:$idx, V128:$Rn, idxtype:$idx2), "ins", + "{\t$Rd" # size # "$idx, $Rn" # size # "$idx2" # + "|" # size # "\t$Rd$idx, $Rn$idx2}", + "$Rd = $dst", + [(set V128:$dst, + (vector_insert + (vectype V128:$Rd), + (elttype (vector_extract (vectype V128:$Rn), idxtype:$idx2)), + idxtype:$idx))]>; + +class SIMDInsMainMovAlias + : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # + "|" # size #"\t$dst$idx, $src}", + (inst V128:$dst, idxtype:$idx, regtype:$src)>; +class SIMDInsElementMovAlias + : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # size # "$idx2" # + # "|" # size #"\t$dst$idx, $src$idx2}", + (inst V128:$dst, idxtype:$idx, V128:$src, idxtype:$idx2)>; + + +multiclass SIMDIns { + def vi8gpr : SIMDInsFromMain<".b", v16i8, GPR32, VectorIndexB> { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; + } + def vi16gpr : SIMDInsFromMain<".h", v8i16, GPR32, VectorIndexH> { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + } + def vi32gpr : SIMDInsFromMain<".s", v4i32, GPR32, VectorIndexS> { + bits<2> idx; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; + } + def vi64gpr : SIMDInsFromMain<".d", v2i64, GPR64, VectorIndexD> { + bits<1> idx; + let Inst{20} = idx; + let Inst{19-16} = 0b1000; + } + + def vi8lane : SIMDInsFromElement<".b", v16i8, i32, VectorIndexB> { + bits<4> idx; + bits<4> idx2; + let Inst{20-17} = idx; + let Inst{16} = 1; + let Inst{14-11} = idx2; + } + def vi16lane : SIMDInsFromElement<".h", v8i16, i32, VectorIndexH> { + bits<3> idx; + bits<3> idx2; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + let Inst{14-12} = idx2; + let Inst{11} = {?}; + } + def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> { + bits<2> idx; + bits<2> idx2; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; + let Inst{14-13} = idx2; + let Inst{12-11} = {?,?}; + } + def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> { + bits<1> idx; + bits<1> idx2; + let Inst{20} = idx; + let Inst{19-16} = 0b1000; + let Inst{14} = idx2; + let Inst{13-11} = {?,?,?}; + } + + // For all forms of the INS instruction, the "mov" mnemonic is the + // preferred alias. Why they didn't just call the instruction "mov" in + // the first place is a very good question indeed... + def : SIMDInsMainMovAlias<".b", !cast(NAME#"vi8gpr"), + GPR32, VectorIndexB>; + def : SIMDInsMainMovAlias<".h", !cast(NAME#"vi16gpr"), + GPR32, VectorIndexH>; + def : SIMDInsMainMovAlias<".s", !cast(NAME#"vi32gpr"), + GPR32, VectorIndexS>; + def : SIMDInsMainMovAlias<".d", !cast(NAME#"vi64gpr"), + GPR64, VectorIndexD>; + + def : SIMDInsElementMovAlias<".b", !cast(NAME#"vi8lane"), + VectorIndexB>; + def : SIMDInsElementMovAlias<".h", !cast(NAME#"vi16lane"), + VectorIndexH>; + def : SIMDInsElementMovAlias<".s", !cast(NAME#"vi32lane"), + VectorIndexS>; + def : SIMDInsElementMovAlias<".d", !cast(NAME#"vi64lane"), + VectorIndexD>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD TBL/TBX +//---------------------------------------------------------------------------- + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDTableLookup len, bit op, RegisterOperand vectype, + RegisterOperand listtype, string asm, string kind> + : I<(outs vectype:$Vd), (ins listtype:$Vn, vectype:$Vm), asm, + "\t$Vd" # kind # ", $Vn, $Vm" # kind, "", []>, + Sched<[WriteV]> { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29-21} = 0b001110000; + let Inst{20-16} = Vm; + let Inst{15} = 0; + let Inst{14-13} = len; + let Inst{12} = op; + let Inst{11-10} = 0b00; + let Inst{9-5} = Vn; + let Inst{4-0} = Vd; +} + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDTableLookupTied len, bit op, RegisterOperand vectype, + RegisterOperand listtype, string asm, string kind> + : I<(outs vectype:$dst), (ins vectype:$Vd, listtype:$Vn, vectype:$Vm), asm, + "\t$Vd" # kind # ", $Vn, $Vm" # kind, "$Vd = $dst", []>, + Sched<[WriteV]> { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29-21} = 0b001110000; + let Inst{20-16} = Vm; + let Inst{15} = 0; + let Inst{14-13} = len; + let Inst{12} = op; + let Inst{11-10} = 0b00; + let Inst{9-5} = Vn; + let Inst{4-0} = Vd; +} + +class SIMDTableLookupAlias + : InstAlias; + +multiclass SIMDTableLookup { + def v8i8One : BaseSIMDTableLookup<0, 0b00, op, V64, VecListOne16b, + asm, ".8b">; + def v8i8Two : BaseSIMDTableLookup<0, 0b01, op, V64, VecListTwo16b, + asm, ".8b">; + def v8i8Three : BaseSIMDTableLookup<0, 0b10, op, V64, VecListThree16b, + asm, ".8b">; + def v8i8Four : BaseSIMDTableLookup<0, 0b11, op, V64, VecListFour16b, + asm, ".8b">; + def v16i8One : BaseSIMDTableLookup<1, 0b00, op, V128, VecListOne16b, + asm, ".16b">; + def v16i8Two : BaseSIMDTableLookup<1, 0b01, op, V128, VecListTwo16b, + asm, ".16b">; + def v16i8Three: BaseSIMDTableLookup<1, 0b10, op, V128, VecListThree16b, + asm, ".16b">; + def v16i8Four : BaseSIMDTableLookup<1, 0b11, op, V128, VecListFour16b, + asm, ".16b">; + + def : SIMDTableLookupAlias(NAME#"v8i8One"), + V64, VecListOne128>; + def : SIMDTableLookupAlias(NAME#"v8i8Two"), + V64, VecListTwo128>; + def : SIMDTableLookupAlias(NAME#"v8i8Three"), + V64, VecListThree128>; + def : SIMDTableLookupAlias(NAME#"v8i8Four"), + V64, VecListFour128>; + def : SIMDTableLookupAlias(NAME#"v16i8One"), + V128, VecListOne128>; + def : SIMDTableLookupAlias(NAME#"v16i8Two"), + V128, VecListTwo128>; + def : SIMDTableLookupAlias(NAME#"v16i8Three"), + V128, VecListThree128>; + def : SIMDTableLookupAlias(NAME#"v16i8Four"), + V128, VecListFour128>; +} + +multiclass SIMDTableLookupTied { + def v8i8One : BaseSIMDTableLookupTied<0, 0b00, op, V64, VecListOne16b, + asm, ".8b">; + def v8i8Two : BaseSIMDTableLookupTied<0, 0b01, op, V64, VecListTwo16b, + asm, ".8b">; + def v8i8Three : BaseSIMDTableLookupTied<0, 0b10, op, V64, VecListThree16b, + asm, ".8b">; + def v8i8Four : BaseSIMDTableLookupTied<0, 0b11, op, V64, VecListFour16b, + asm, ".8b">; + def v16i8One : BaseSIMDTableLookupTied<1, 0b00, op, V128, VecListOne16b, + asm, ".16b">; + def v16i8Two : BaseSIMDTableLookupTied<1, 0b01, op, V128, VecListTwo16b, + asm, ".16b">; + def v16i8Three: BaseSIMDTableLookupTied<1, 0b10, op, V128, VecListThree16b, + asm, ".16b">; + def v16i8Four : BaseSIMDTableLookupTied<1, 0b11, op, V128, VecListFour16b, + asm, ".16b">; + + def : SIMDTableLookupAlias(NAME#"v8i8One"), + V64, VecListOne128>; + def : SIMDTableLookupAlias(NAME#"v8i8Two"), + V64, VecListTwo128>; + def : SIMDTableLookupAlias(NAME#"v8i8Three"), + V64, VecListThree128>; + def : SIMDTableLookupAlias(NAME#"v8i8Four"), + V64, VecListFour128>; + def : SIMDTableLookupAlias(NAME#"v16i8One"), + V128, VecListOne128>; + def : SIMDTableLookupAlias(NAME#"v16i8Two"), + V128, VecListTwo128>; + def : SIMDTableLookupAlias(NAME#"v16i8Three"), + V128, VecListThree128>; + def : SIMDTableLookupAlias(NAME#"v16i8Four"), + V128, VecListFour128>; +} + + +//---------------------------------------------------------------------------- +// AdvSIMD scalar CPY +//---------------------------------------------------------------------------- +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDScalarCPY + : I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), "mov", + "{\t$dst, $src" # kind # "$idx" # + "|\t$dst, $src$idx}", "", []>, + Sched<[WriteV]> { + bits<5> dst; + bits<5> src; + let Inst{31-21} = 0b01011110000; + let Inst{15-10} = 0b000001; + let Inst{9-5} = src; + let Inst{4-0} = dst; +} + +class SIMDScalarCPYAlias + : InstAlias; + + +multiclass SIMDScalarCPY { + def i8 : BaseSIMDScalarCPY { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; + } + def i16 : BaseSIMDScalarCPY { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + } + def i32 : BaseSIMDScalarCPY { + bits<2> idx; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; + } + def i64 : BaseSIMDScalarCPY { + bits<1> idx; + let Inst{20} = idx; + let Inst{19-16} = 0b1000; + } + + def : Pat<(v1i64 (scalar_to_vector (i64 (vector_extract (v2i64 V128:$src), + VectorIndexD:$idx)))), + (!cast(NAME # i64) V128:$src, VectorIndexD:$idx)>; + + // 'DUP' mnemonic aliases. + def : SIMDScalarCPYAlias<"dup", ".b", + !cast(NAME#"i8"), + FPR8, V128, VectorIndexB>; + def : SIMDScalarCPYAlias<"dup", ".h", + !cast(NAME#"i16"), + FPR16, V128, VectorIndexH>; + def : SIMDScalarCPYAlias<"dup", ".s", + !cast(NAME#"i32"), + FPR32, V128, VectorIndexS>; + def : SIMDScalarCPYAlias<"dup", ".d", + !cast(NAME#"i64"), + FPR64, V128, VectorIndexD>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD modified immediate instructions +//---------------------------------------------------------------------------- + +class BaseSIMDModifiedImm pattern> + : I, + Sched<[WriteV]> { + bits<5> Rd; + bits<8> imm8; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = op; + let Inst{28-19} = 0b0111100000; + let Inst{18-16} = imm8{7-5}; + let Inst{11} = op2; + let Inst{10} = 1; + let Inst{9-5} = imm8{4-0}; + let Inst{4-0} = Rd; +} + +class BaseSIMDModifiedImmVector pattern> + : BaseSIMDModifiedImm { + let DecoderMethod = "DecodeModImmInstruction"; +} + +class BaseSIMDModifiedImmVectorTied pattern> + : BaseSIMDModifiedImm { + let DecoderMethod = "DecodeModImmTiedInstruction"; +} + +class BaseSIMDModifiedImmVectorShift b15_b12, + RegisterOperand vectype, string asm, + string kind, list pattern> + : BaseSIMDModifiedImmVector { + bits<2> shift; + let Inst{15} = b15_b12{1}; + let Inst{14-13} = shift; + let Inst{12} = b15_b12{0}; +} + +class BaseSIMDModifiedImmVectorShiftTied b15_b12, + RegisterOperand vectype, string asm, + string kind, list pattern> + : BaseSIMDModifiedImmVectorTied { + bits<2> shift; + let Inst{15} = b15_b12{1}; + let Inst{14-13} = shift; + let Inst{12} = b15_b12{0}; +} + + +class BaseSIMDModifiedImmVectorShiftHalf b15_b12, + RegisterOperand vectype, string asm, + string kind, list pattern> + : BaseSIMDModifiedImmVector { + bits<2> shift; + let Inst{15} = b15_b12{1}; + let Inst{14} = 0; + let Inst{13} = shift{0}; + let Inst{12} = b15_b12{0}; +} + +class BaseSIMDModifiedImmVectorShiftHalfTied b15_b12, + RegisterOperand vectype, string asm, + string kind, list pattern> + : BaseSIMDModifiedImmVectorTied { + bits<2> shift; + let Inst{15} = b15_b12{1}; + let Inst{14} = 0; + let Inst{13} = shift{0}; + let Inst{12} = b15_b12{0}; +} + +multiclass SIMDModifiedImmVectorShift hw_cmode, bits<2> w_cmode, + string asm> { + def v4i16 : BaseSIMDModifiedImmVectorShiftHalf<0, op, hw_cmode, V64, + asm, ".4h", []>; + def v8i16 : BaseSIMDModifiedImmVectorShiftHalf<1, op, hw_cmode, V128, + asm, ".8h", []>; + + def v2i32 : BaseSIMDModifiedImmVectorShift<0, op, w_cmode, V64, + asm, ".2s", []>; + def v4i32 : BaseSIMDModifiedImmVectorShift<1, op, w_cmode, V128, + asm, ".4s", []>; +} + +multiclass SIMDModifiedImmVectorShiftTied hw_cmode, + bits<2> w_cmode, string asm, + SDNode OpNode> { + def v4i16 : BaseSIMDModifiedImmVectorShiftHalfTied<0, op, hw_cmode, V64, + asm, ".4h", + [(set (v4i16 V64:$dst), (OpNode V64:$Rd, + imm0_255:$imm8, + (i32 imm:$shift)))]>; + def v8i16 : BaseSIMDModifiedImmVectorShiftHalfTied<1, op, hw_cmode, V128, + asm, ".8h", + [(set (v8i16 V128:$dst), (OpNode V128:$Rd, + imm0_255:$imm8, + (i32 imm:$shift)))]>; + + def v2i32 : BaseSIMDModifiedImmVectorShiftTied<0, op, w_cmode, V64, + asm, ".2s", + [(set (v2i32 V64:$dst), (OpNode V64:$Rd, + imm0_255:$imm8, + (i32 imm:$shift)))]>; + def v4i32 : BaseSIMDModifiedImmVectorShiftTied<1, op, w_cmode, V128, + asm, ".4s", + [(set (v4i32 V128:$dst), (OpNode V128:$Rd, + imm0_255:$imm8, + (i32 imm:$shift)))]>; +} + +class SIMDModifiedImmMoveMSL cmode, + RegisterOperand vectype, string asm, + string kind, list pattern> + : BaseSIMDModifiedImmVector { + bits<1> shift; + let Inst{15-13} = cmode{3-1}; + let Inst{12} = shift; +} + +class SIMDModifiedImmVectorNoShift cmode, + RegisterOperand vectype, + Operand imm_type, string asm, + string kind, list pattern> + : BaseSIMDModifiedImmVector { + let Inst{15-12} = cmode; +} + +class SIMDModifiedImmScalarNoShift cmode, string asm, + list pattern> + : BaseSIMDModifiedImm { + let Inst{15-12} = cmode; + let DecoderMethod = "DecodeModImmInstruction"; +} + +//---------------------------------------------------------------------------- +// AdvSIMD indexed element +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDIndexed size, bits<4> opc, + RegisterOperand dst_reg, RegisterOperand lhs_reg, + RegisterOperand rhs_reg, Operand vec_idx, string asm, + string apple_kind, string dst_kind, string lhs_kind, + string rhs_kind, list pattern> + : I<(outs dst_reg:$Rd), (ins lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), + asm, + "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" # + "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28} = Scalar; + let Inst{27-24} = 0b1111; + let Inst{23-22} = size; + // Bit 21 must be set by the derived class. + let Inst{20-16} = Rm; + let Inst{15-12} = opc; + // Bit 11 must be set by the derived class. + let Inst{10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDIndexedTied size, bits<4> opc, + RegisterOperand dst_reg, RegisterOperand lhs_reg, + RegisterOperand rhs_reg, Operand vec_idx, string asm, + string apple_kind, string dst_kind, string lhs_kind, + string rhs_kind, list pattern> + : I<(outs dst_reg:$dst), + (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), asm, + "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" # + "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28} = Scalar; + let Inst{27-24} = 0b1111; + let Inst{23-22} = size; + // Bit 21 must be set by the derived class. + let Inst{20-16} = Rm; + let Inst{15-12} = opc; + // Bit 11 must be set by the derived class. + let Inst{10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +// ARMv8.2 Index Dot product instructions +class BaseSIMDThreeSameVectorDotIndex : + BaseSIMDIndexedTied { + bits<2> idx; + let Inst{21} = idx{0}; // L + let Inst{11} = idx{1}; // H +} + +multiclass SIMDThreeSameVectorDotIndex { + def v8i8 : BaseSIMDThreeSameVectorDotIndex<0, U, asm, ".2s", ".8b", ".4b", V64, + v2i32, v8i8, OpNode>; + def v16i8 : BaseSIMDThreeSameVectorDotIndex<1, U, asm, ".4s", ".16b", ".4b", V128, + v4i32, v16i8, OpNode>; +} + +multiclass SIMDFPIndexed opc, string asm, + SDPatternOperator OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b00, opc, + V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4f16 V64:$Rd), + (OpNode (v4f16 V64:$Rn), + (v4f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b00, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8f16 V128:$Rd), + (OpNode (v8f16 V128:$Rn), + (v8f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] + + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, + V64, V64, + V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2f32 V64:$Rd), + (OpNode (v2f32 V64:$Rn), + (v2f32 (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4f32 V128:$Rd), + (OpNode (v4f32 V128:$Rn), + (v4f32 (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v2i64_indexed : BaseSIMDIndexed<1, U, 0, 0b11, opc, + V128, V128, + V128, VectorIndexD, + asm, ".2d", ".2d", ".2d", ".d", + [(set (v2f64 V128:$Rd), + (OpNode (v2f64 V128:$Rn), + (v2f64 (AArch64duplane64 (v2f64 V128:$Rm), VectorIndexD:$idx))))]> { + bits<1> idx; + let Inst{11} = idx{0}; + let Inst{21} = 0; + } + + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b00, opc, + FPR16Op, FPR16Op, V128_lo, VectorIndexH, + asm, ".h", "", "", ".h", + [(set (f16 FPR16Op:$Rd), + (OpNode (f16 FPR16Op:$Rn), + (f16 (vector_extract (v8f16 V128_lo:$Rm), + VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] + + def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, + FPR32Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", + [(set (f32 FPR32Op:$Rd), + (OpNode (f32 FPR32Op:$Rn), + (f32 (vector_extract (v4f32 V128:$Rm), + VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b11, opc, + FPR64Op, FPR64Op, V128, VectorIndexD, + asm, ".d", "", "", ".d", + [(set (f64 FPR64Op:$Rd), + (OpNode (f64 FPR64Op:$Rn), + (f64 (vector_extract (v2f64 V128:$Rm), + VectorIndexD:$idx))))]> { + bits<1> idx; + let Inst{11} = idx{0}; + let Inst{21} = 0; + } +} + +multiclass SIMDFPIndexedTiedPatterns { + // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar. + def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), + (AArch64duplane32 (v4f32 V128:$Rm), + VectorIndexS:$idx))), + (!cast(INST # v2i32_indexed) + V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), + (AArch64dup (f32 FPR32Op:$Rm)))), + (!cast(INST # "v2i32_indexed") V64:$Rd, V64:$Rn, + (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; + + + // 2 variants for the .4s version: DUPLANE from 128-bit and DUP scalar. + def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), + (AArch64duplane32 (v4f32 V128:$Rm), + VectorIndexS:$idx))), + (!cast(INST # "v4i32_indexed") + V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), + (AArch64dup (f32 FPR32Op:$Rm)))), + (!cast(INST # "v4i32_indexed") V128:$Rd, V128:$Rn, + (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; + + // 2 variants for the .2d version: DUPLANE from 128-bit and DUP scalar. + def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), + (AArch64duplane64 (v2f64 V128:$Rm), + VectorIndexD:$idx))), + (!cast(INST # "v2i64_indexed") + V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), + (AArch64dup (f64 FPR64Op:$Rm)))), + (!cast(INST # "v2i64_indexed") V128:$Rd, V128:$Rn, + (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; + + // 2 variants for 32-bit scalar version: extract from .2s or from .4s + def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), + (vector_extract (v4f32 V128:$Rm), VectorIndexS:$idx))), + (!cast(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn, + V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), + (vector_extract (v2f32 V64:$Rm), VectorIndexS:$idx))), + (!cast(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn, + (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; + + // 1 variant for 64-bit scalar version: extract from .1d or from .2d + def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), + (vector_extract (v2f64 V128:$Rm), VectorIndexD:$idx))), + (!cast(INST # "v1i64_indexed") FPR64:$Rd, FPR64:$Rn, + V128:$Rm, VectorIndexD:$idx)>; +} + +multiclass SIMDFPIndexedTied opc, string asm> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b00, opc, V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b00, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64, + V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", []> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", []> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v2i64_indexed : BaseSIMDIndexedTied<1, U, 0, 0b11, opc, + V128, V128, + V128, VectorIndexD, + asm, ".2d", ".2d", ".2d", ".d", []> { + bits<1> idx; + let Inst{11} = idx{0}; + let Inst{21} = 0; + } + + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b00, opc, + FPR16Op, FPR16Op, V128_lo, VectorIndexH, + asm, ".h", "", "", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] + + def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, + FPR32Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", []> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b11, opc, + FPR64Op, FPR64Op, V128, VectorIndexD, + asm, ".d", "", "", ".d", []> { + bits<1> idx; + let Inst{11} = idx{0}; + let Inst{21} = 0; + } +} + +multiclass SIMDIndexedHS opc, string asm, + SDPatternOperator OpNode> { + def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4i16 V64:$Rd), + (OpNode (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8i16 V128:$Rd), + (OpNode (v8i16 V128:$Rn), + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, + V64, V64, + V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2i32 V64:$Rd), + (OpNode (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4i32 V128:$Rd), + (OpNode (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc, + FPR16Op, FPR16Op, V128_lo, VectorIndexH, + asm, ".h", "", "", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, + FPR32Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", + [(set (i32 FPR32Op:$Rd), + (OpNode FPR32Op:$Rn, + (i32 (vector_extract (v4i32 V128:$Rm), + VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} + +multiclass SIMDVectorIndexedHS opc, string asm, + SDPatternOperator OpNode> { + def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, + V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4i16 V64:$Rd), + (OpNode (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8i16 V128:$Rd), + (OpNode (v8i16 V128:$Rn), + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, + V64, V64, + V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2i32 V64:$Rd), + (OpNode (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4i32 V128:$Rd), + (OpNode (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} + +multiclass SIMDVectorIndexedHSTied opc, string asm, + SDPatternOperator OpNode> { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4i16 V64:$dst), + (OpNode (v4i16 V64:$Rd),(v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8i16 V128:$dst), + (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, + V64, V64, + V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2i32 V64:$dst), + (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} + +multiclass SIMDIndexedLongSD opc, string asm, + SDPatternOperator OpNode> { + def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, + V128, V64, + V128_lo, VectorIndexH, + asm, ".4s", ".4s", ".4h", ".h", + [(set (v4i32 V128:$Rd), + (OpNode (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm#"2", ".4s", ".4s", ".8h", ".h", + [(set (v4i32 V128:$Rd), + (OpNode (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))]> { + + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, + V128, V64, + V128, VectorIndexS, + asm, ".2d", ".2d", ".2s", ".s", + [(set (v2i64 V128:$Rd), + (OpNode (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm#"2", ".2d", ".2d", ".4s", ".s", + [(set (v2i64 V128:$Rd), + (OpNode (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc, + FPR32Op, FPR16Op, V128_lo, VectorIndexH, + asm, ".h", "", "", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, + FPR64Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", []> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} + +multiclass SIMDIndexedLongSQDMLXSDTied opc, string asm, + SDPatternOperator Accum> { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, + V128, V64, + V128_lo, VectorIndexH, + asm, ".4s", ".4s", ".4h", ".h", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqdmull + (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + // FIXME: it would be nice to use the scalar (v1i32) instruction here, but an + // intermediate EXTRACT_SUBREG would be untyped. + def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), + (i32 (vector_extract (v4i32 + (int_aarch64_neon_sqdmull (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx)))), + (i64 0))))), + (EXTRACT_SUBREG + (!cast(NAME # v4i16_indexed) + (SUBREG_TO_REG (i32 0), FPR32Op:$Rd, ssub), V64:$Rn, + V128_lo:$Rm, VectorIndexH:$idx), + ssub)>; + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm#"2", ".4s", ".4s", ".8h", ".h", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqdmull + (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 + (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, + V128, V64, + V128, VectorIndexS, + asm, ".2d", ".2d", ".2s", ".s", + [(set (v2i64 V128:$dst), + (Accum (v2i64 V128:$Rd), + (v2i64 (int_aarch64_neon_sqdmull + (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm#"2", ".2d", ".2d", ".4s", ".s", + [(set (v2i64 V128:$dst), + (Accum (v2i64 V128:$Rd), + (v2i64 (int_aarch64_neon_sqdmull + (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 + (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc, + FPR32Op, FPR16Op, V128_lo, VectorIndexH, + asm, ".h", "", "", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + + def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, + FPR64Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", + [(set (i64 FPR64Op:$dst), + (Accum (i64 FPR64Op:$Rd), + (i64 (int_aarch64_neon_sqdmulls_scalar + (i32 FPR32Op:$Rn), + (i32 (vector_extract (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} + +multiclass SIMDVectorIndexedLongSD opc, string asm, + SDPatternOperator OpNode> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, + V128, V64, + V128_lo, VectorIndexH, + asm, ".4s", ".4s", ".4h", ".h", + [(set (v4i32 V128:$Rd), + (OpNode (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm#"2", ".4s", ".4s", ".8h", ".h", + [(set (v4i32 V128:$Rd), + (OpNode (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))]> { + + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, + V128, V64, + V128, VectorIndexS, + asm, ".2d", ".2d", ".2s", ".s", + [(set (v2i64 V128:$Rd), + (OpNode (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm#"2", ".2d", ".2d", ".4s", ".s", + [(set (v2i64 V128:$Rd), + (OpNode (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + } +} + +multiclass SIMDVectorIndexedLongSDTied opc, string asm, + SDPatternOperator OpNode> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, + V128, V64, + V128_lo, VectorIndexH, + asm, ".4s", ".4s", ".4h", ".h", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm#"2", ".4s", ".4s", ".8h", ".h", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), + (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, + V128, V64, + V128, VectorIndexS, + asm, ".2d", ".2d", ".2s", ".s", + [(set (v2i64 V128:$dst), + (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm#"2", ".2d", ".2d", ".4s", ".s", + [(set (v2i64 V128:$dst), + (OpNode (v2i64 V128:$Rd), + (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + } +} + +//---------------------------------------------------------------------------- +// AdvSIMD scalar shift by immediate +//---------------------------------------------------------------------------- + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDScalarShift opc, bits<7> fixed_imm, + RegisterClass regtype1, RegisterClass regtype2, + Operand immtype, string asm, list pattern> + : I<(outs regtype1:$Rd), (ins regtype2:$Rn, immtype:$imm), + asm, "\t$Rd, $Rn, $imm", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<7> imm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-23} = 0b111110; + let Inst{22-16} = fixed_imm; + let Inst{15-11} = opc; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDScalarShiftTied opc, bits<7> fixed_imm, + RegisterClass regtype1, RegisterClass regtype2, + Operand immtype, string asm, list pattern> + : I<(outs regtype1:$dst), (ins regtype1:$Rd, regtype2:$Rn, immtype:$imm), + asm, "\t$Rd, $Rn, $imm", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<7> imm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-23} = 0b111110; + let Inst{22-16} = fixed_imm; + let Inst{15-11} = opc; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + + +multiclass SIMDFPScalarRShift opc, string asm> { + let Predicates = [HasNEON, HasFullFP16] in { + def h : BaseSIMDScalarShift { + let Inst{19-16} = imm{3-0}; + } + } // Predicates = [HasNEON, HasFullFP16] + def s : BaseSIMDScalarShift { + let Inst{20-16} = imm{4-0}; + } + def d : BaseSIMDScalarShift { + let Inst{21-16} = imm{5-0}; + } +} + +multiclass SIMDScalarRShiftD opc, string asm, + SDPatternOperator OpNode> { + def d : BaseSIMDScalarShift { + let Inst{21-16} = imm{5-0}; + } + + def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftR64:$imm))), + (!cast(NAME # "d") FPR64:$Rn, vecshiftR64:$imm)>; +} + +multiclass SIMDScalarRShiftDTied opc, string asm, + SDPatternOperator OpNode = null_frag> { + def d : BaseSIMDScalarShiftTied { + let Inst{21-16} = imm{5-0}; + } + + def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), + (i32 vecshiftR64:$imm))), + (!cast(NAME # "d") FPR64:$Rd, FPR64:$Rn, + vecshiftR64:$imm)>; +} + +multiclass SIMDScalarLShiftD opc, string asm, + SDPatternOperator OpNode> { + def d : BaseSIMDScalarShift { + let Inst{21-16} = imm{5-0}; + } +} + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +multiclass SIMDScalarLShiftDTied opc, string asm> { + def d : BaseSIMDScalarShiftTied { + let Inst{21-16} = imm{5-0}; + } +} + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +multiclass SIMDScalarRShiftBHS opc, string asm, + SDPatternOperator OpNode = null_frag> { + def b : BaseSIMDScalarShift { + let Inst{18-16} = imm{2-0}; + } + + def h : BaseSIMDScalarShift { + let Inst{19-16} = imm{3-0}; + } + + def s : BaseSIMDScalarShift { + let Inst{20-16} = imm{4-0}; + } +} + +multiclass SIMDScalarLShiftBHSD opc, string asm, + SDPatternOperator OpNode> { + def b : BaseSIMDScalarShift { + let Inst{18-16} = imm{2-0}; + } + + def h : BaseSIMDScalarShift { + let Inst{19-16} = imm{3-0}; + } + + def s : BaseSIMDScalarShift { + let Inst{20-16} = imm{4-0}; + } + + def d : BaseSIMDScalarShift { + let Inst{21-16} = imm{5-0}; + } + + def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm))), + (!cast(NAME # "d") FPR64:$Rn, vecshiftL64:$imm)>; +} + +multiclass SIMDScalarRShiftBHSD opc, string asm> { + def b : BaseSIMDScalarShift { + let Inst{18-16} = imm{2-0}; + } + + def h : BaseSIMDScalarShift { + let Inst{19-16} = imm{3-0}; + } + + def s : BaseSIMDScalarShift { + let Inst{20-16} = imm{4-0}; + } + + def d : BaseSIMDScalarShift { + let Inst{21-16} = imm{5-0}; + } +} + +//---------------------------------------------------------------------------- +// AdvSIMD vector x indexed element +//---------------------------------------------------------------------------- + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDVectorShift opc, bits<7> fixed_imm, + RegisterOperand dst_reg, RegisterOperand src_reg, + Operand immtype, + string asm, string dst_kind, string src_kind, + list pattern> + : I<(outs dst_reg:$Rd), (ins src_reg:$Rn, immtype:$imm), + asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" # + "|" # dst_kind # "\t$Rd, $Rn, $imm}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-23} = 0b011110; + let Inst{22-16} = fixed_imm; + let Inst{15-11} = opc; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDVectorShiftTied opc, bits<7> fixed_imm, + RegisterOperand vectype1, RegisterOperand vectype2, + Operand immtype, + string asm, string dst_kind, string src_kind, + list pattern> + : I<(outs vectype1:$dst), (ins vectype1:$Rd, vectype2:$Rn, immtype:$imm), + asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" # + "|" # dst_kind # "\t$Rd, $Rn, $imm}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-23} = 0b011110; + let Inst{22-16} = fixed_imm; + let Inst{15-11} = opc; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDVectorRShiftSD opc, string asm, + Intrinsic OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftR16, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR16, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + } // Predicates = [HasNEON, HasFullFP16] + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftR32, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 imm:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftR32, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 imm:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftR64, + asm, ".2d", ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 imm:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } +} + +multiclass SIMDVectorRShiftToFP opc, string asm, + Intrinsic OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftR16, + asm, ".4h", ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR16, + asm, ".8h", ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + } // Predicates = [HasNEON, HasFullFP16] + + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftR32, + asm, ".2s", ".2s", + [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 imm:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftR32, + asm, ".4s", ".4s", + [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 imm:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftR64, + asm, ".2d", ".2d", + [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 imm:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } +} + +multiclass SIMDVectorRShiftNarrowBHS opc, string asm, + SDPatternOperator OpNode> { + def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, + V64, V128, vecshiftR16Narrow, + asm, ".8b", ".8h", + [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftR16Narrow, + asm#"2", ".16b", ".8h", []> { + bits<3> imm; + let Inst{18-16} = imm; + let hasSideEffects = 0; + } + + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V128, vecshiftR32Narrow, + asm, ".4h", ".4s", + [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR32Narrow, + asm#"2", ".8h", ".4s", []> { + bits<4> imm; + let Inst{19-16} = imm; + let hasSideEffects = 0; + } + + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V64, V128, vecshiftR64Narrow, + asm, ".2s", ".2d", + [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftR64Narrow, + asm#"2", ".4s", ".2d", []> { + bits<5> imm; + let Inst{20-16} = imm; + let hasSideEffects = 0; + } + + // TableGen doesn't like patters w/ INSERT_SUBREG on the instructions + // themselves, so put them here instead. + + // Patterns involving what's effectively an insert high and a normal + // intrinsic, represented by CONCAT_VECTORS. + def : Pat<(concat_vectors (v8i8 V64:$Rd),(OpNode (v8i16 V128:$Rn), + vecshiftR16Narrow:$imm)), + (!cast(NAME # "v16i8_shift") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR16Narrow:$imm)>; + def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn), + vecshiftR32Narrow:$imm)), + (!cast(NAME # "v8i16_shift") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR32Narrow:$imm)>; + def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn), + vecshiftR64Narrow:$imm)), + (!cast(NAME # "v4i32_shift") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR64Narrow:$imm)>; +} + +multiclass SIMDVectorLShiftBHSD opc, string asm, + SDPatternOperator OpNode> { + def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, + V64, V64, vecshiftL8, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), + (i32 vecshiftL8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftL8, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), + (i32 vecshiftL8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftL16, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), + (i32 vecshiftL16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftL16, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), + (i32 vecshiftL16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftL32, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), + (i32 vecshiftL32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftL32, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), + (i32 vecshiftL32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftL64, + asm, ".2d", ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), + (i32 vecshiftL64:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } +} + +multiclass SIMDVectorRShiftBHSD opc, string asm, + SDPatternOperator OpNode> { + def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, + V64, V64, vecshiftR8, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), + (i32 vecshiftR8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftR8, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), + (i32 vecshiftR8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftR16, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), + (i32 vecshiftR16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR16, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), + (i32 vecshiftR16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftR32, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), + (i32 vecshiftR32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftR32, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), + (i32 vecshiftR32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftR64, + asm, ".2d", ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), + (i32 vecshiftR64:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDVectorRShiftBHSDTied opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?}, + V64, V64, vecshiftR8, asm, ".8b", ".8b", + [(set (v8i8 V64:$dst), + (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), + (i32 vecshiftR8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftR8, asm, ".16b", ".16b", + [(set (v16i8 V128:$dst), + (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), + (i32 vecshiftR8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftR16, asm, ".4h", ".4h", + [(set (v4i16 V64:$dst), + (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), + (i32 vecshiftR16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR16, asm, ".8h", ".8h", + [(set (v8i16 V128:$dst), + (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), + (i32 vecshiftR16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftR32, asm, ".2s", ".2s", + [(set (v2i32 V64:$dst), + (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), + (i32 vecshiftR32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftR32, asm, ".4s", ".4s", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), + (i32 vecshiftR32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftR64, + asm, ".2d", ".2d", [(set (v2i64 V128:$dst), + (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn), + (i32 vecshiftR64:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } +} + +multiclass SIMDVectorLShiftBHSDTied opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?}, + V64, V64, vecshiftL8, + asm, ".8b", ".8b", + [(set (v8i8 V64:$dst), + (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), + (i32 vecshiftL8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftL8, + asm, ".16b", ".16b", + [(set (v16i8 V128:$dst), + (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), + (i32 vecshiftL8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftL16, + asm, ".4h", ".4h", + [(set (v4i16 V64:$dst), + (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), + (i32 vecshiftL16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftL16, + asm, ".8h", ".8h", + [(set (v8i16 V128:$dst), + (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), + (i32 vecshiftL16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftL32, + asm, ".2s", ".2s", + [(set (v2i32 V64:$dst), + (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), + (i32 vecshiftL32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftL32, + asm, ".4s", ".4s", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), + (i32 vecshiftL32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftL64, + asm, ".2d", ".2d", + [(set (v2i64 V128:$dst), + (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn), + (i32 vecshiftL64:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } +} + +multiclass SIMDVectorLShiftLongBHSD opc, string asm, + SDPatternOperator OpNode> { + def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, + V128, V64, vecshiftL8, asm, ".8h", ".8b", + [(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), vecshiftL8:$imm))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftL8, + asm#"2", ".8h", ".16b", + [(set (v8i16 V128:$Rd), + (OpNode (extract_high_v16i8 V128:$Rn), vecshiftL8:$imm))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V128, V64, vecshiftL16, asm, ".4s", ".4h", + [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), vecshiftL16:$imm))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftL16, + asm#"2", ".4s", ".8h", + [(set (v4i32 V128:$Rd), + (OpNode (extract_high_v8i16 V128:$Rn), vecshiftL16:$imm))]> { + + bits<4> imm; + let Inst{19-16} = imm; + } + + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V128, V64, vecshiftL32, asm, ".2d", ".2s", + [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), vecshiftL32:$imm))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftL32, + asm#"2", ".2d", ".4s", + [(set (v2i64 V128:$Rd), + (OpNode (extract_high_v4i32 V128:$Rn), vecshiftL32:$imm))]> { + bits<5> imm; + let Inst{20-16} = imm; + } +} + + +//--- +// Vector load/store +//--- +// SIMD ldX/stX no-index memory references don't allow the optional +// ", #0" constant and handle post-indexing explicitly, so we use +// a more specialized parse method for them. Otherwise, it's the same as +// the general GPR64sp handling. + +class BaseSIMDLdSt opcode, bits<2> size, + string asm, dag oops, dag iops, list pattern> + : I { + bits<5> Vt; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29-23} = 0b0011000; + let Inst{22} = L; + let Inst{21-16} = 0b000000; + let Inst{15-12} = opcode; + let Inst{11-10} = size; + let Inst{9-5} = Rn; + let Inst{4-0} = Vt; +} + +class BaseSIMDLdStPost opcode, bits<2> size, + string asm, dag oops, dag iops> + : I { + bits<5> Vt; + bits<5> Rn; + bits<5> Xm; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29-23} = 0b0011001; + let Inst{22} = L; + let Inst{21} = 0; + let Inst{20-16} = Xm; + let Inst{15-12} = opcode; + let Inst{11-10} = size; + let Inst{9-5} = Rn; + let Inst{4-0} = Vt; +} + +// The immediate form of AdvSIMD post-indexed addressing is encoded with +// register post-index addressing from the zero register. +multiclass SIMDLdStAliases { + // E.g. "ld1 { v0.8b, v1.8b }, [x1], #16" + // "ld1\t$Vt, [$Rn], #16" + // may get mapped to + // (LD1Twov8b_POST VecListTwo8b:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias(BaseName # Count # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # layout):$Vt, + XZR), 1>; + + // E.g. "ld1.8b { v0, v1 }, [x1], #16" + // "ld1.8b\t$Vt, [$Rn], #16" + // may get mapped to + // (LD1Twov8b_POST VecListTwo64:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias(BaseName # Count # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # Size):$Vt, + XZR), 0>; + + // E.g. "ld1.8b { v0, v1 }, [x1]" + // "ld1\t$Vt, [$Rn]" + // may get mapped to + // (LD1Twov8b VecListTwo64:$Vt, GPR64sp:$Rn) + def : InstAlias(BaseName # Count # "v" # layout) + !cast("VecList" # Count # Size):$Vt, + GPR64sp:$Rn), 0>; + + // E.g. "ld1.8b { v0, v1 }, [x1], x2" + // "ld1\t$Vt, [$Rn], $Xm" + // may get mapped to + // (LD1Twov8b_POST VecListTwo64:$Vt, GPR64sp:$Rn, GPR64pi8:$Xm) + def : InstAlias(BaseName # Count # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # Size):$Vt, + !cast("GPR64pi" # Offset):$Xm), 0>; +} + +multiclass BaseSIMDLdN opcode> { + let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { + def v16b: BaseSIMDLdSt<1, 1, opcode, 0b00, asm, + (outs !cast(veclist # "16b"):$Vt), + (ins GPR64sp:$Rn), []>; + def v8h : BaseSIMDLdSt<1, 1, opcode, 0b01, asm, + (outs !cast(veclist # "8h"):$Vt), + (ins GPR64sp:$Rn), []>; + def v4s : BaseSIMDLdSt<1, 1, opcode, 0b10, asm, + (outs !cast(veclist # "4s"):$Vt), + (ins GPR64sp:$Rn), []>; + def v2d : BaseSIMDLdSt<1, 1, opcode, 0b11, asm, + (outs !cast(veclist # "2d"):$Vt), + (ins GPR64sp:$Rn), []>; + def v8b : BaseSIMDLdSt<0, 1, opcode, 0b00, asm, + (outs !cast(veclist # "8b"):$Vt), + (ins GPR64sp:$Rn), []>; + def v4h : BaseSIMDLdSt<0, 1, opcode, 0b01, asm, + (outs !cast(veclist # "4h"):$Vt), + (ins GPR64sp:$Rn), []>; + def v2s : BaseSIMDLdSt<0, 1, opcode, 0b10, asm, + (outs !cast(veclist # "2s"):$Vt), + (ins GPR64sp:$Rn), []>; + + + def v16b_POST: BaseSIMDLdStPost<1, 1, opcode, 0b00, asm, + (outs GPR64sp:$wback, + !cast(veclist # "16b"):$Vt), + (ins GPR64sp:$Rn, + !cast("GPR64pi" # Offset128):$Xm)>; + def v8h_POST : BaseSIMDLdStPost<1, 1, opcode, 0b01, asm, + (outs GPR64sp:$wback, + !cast(veclist # "8h"):$Vt), + (ins GPR64sp:$Rn, + !cast("GPR64pi" # Offset128):$Xm)>; + def v4s_POST : BaseSIMDLdStPost<1, 1, opcode, 0b10, asm, + (outs GPR64sp:$wback, + !cast(veclist # "4s"):$Vt), + (ins GPR64sp:$Rn, + !cast("GPR64pi" # Offset128):$Xm)>; + def v2d_POST : BaseSIMDLdStPost<1, 1, opcode, 0b11, asm, + (outs GPR64sp:$wback, + !cast(veclist # "2d"):$Vt), + (ins GPR64sp:$Rn, + !cast("GPR64pi" # Offset128):$Xm)>; + def v8b_POST : BaseSIMDLdStPost<0, 1, opcode, 0b00, asm, + (outs GPR64sp:$wback, + !cast(veclist # "8b"):$Vt), + (ins GPR64sp:$Rn, + !cast("GPR64pi" # Offset64):$Xm)>; + def v4h_POST : BaseSIMDLdStPost<0, 1, opcode, 0b01, asm, + (outs GPR64sp:$wback, + !cast(veclist # "4h"):$Vt), + (ins GPR64sp:$Rn, + !cast("GPR64pi" # Offset64):$Xm)>; + def v2s_POST : BaseSIMDLdStPost<0, 1, opcode, 0b10, asm, + (outs GPR64sp:$wback, + !cast(veclist # "2s"):$Vt), + (ins GPR64sp:$Rn, + !cast("GPR64pi" # Offset64):$Xm)>; + } + + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; +} + +// Only ld1/st1 has a v1d version. +multiclass BaseSIMDStN opcode> { + let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in { + def v16b : BaseSIMDLdSt<1, 0, opcode, 0b00, asm, (outs), + (ins !cast(veclist # "16b"):$Vt, + GPR64sp:$Rn), []>; + def v8h : BaseSIMDLdSt<1, 0, opcode, 0b01, asm, (outs), + (ins !cast(veclist # "8h"):$Vt, + GPR64sp:$Rn), []>; + def v4s : BaseSIMDLdSt<1, 0, opcode, 0b10, asm, (outs), + (ins !cast(veclist # "4s"):$Vt, + GPR64sp:$Rn), []>; + def v2d : BaseSIMDLdSt<1, 0, opcode, 0b11, asm, (outs), + (ins !cast(veclist # "2d"):$Vt, + GPR64sp:$Rn), []>; + def v8b : BaseSIMDLdSt<0, 0, opcode, 0b00, asm, (outs), + (ins !cast(veclist # "8b"):$Vt, + GPR64sp:$Rn), []>; + def v4h : BaseSIMDLdSt<0, 0, opcode, 0b01, asm, (outs), + (ins !cast(veclist # "4h"):$Vt, + GPR64sp:$Rn), []>; + def v2s : BaseSIMDLdSt<0, 0, opcode, 0b10, asm, (outs), + (ins !cast(veclist # "2s"):$Vt, + GPR64sp:$Rn), []>; + + def v16b_POST : BaseSIMDLdStPost<1, 0, opcode, 0b00, asm, + (outs GPR64sp:$wback), + (ins !cast(veclist # "16b"):$Vt, + GPR64sp:$Rn, + !cast("GPR64pi" # Offset128):$Xm)>; + def v8h_POST : BaseSIMDLdStPost<1, 0, opcode, 0b01, asm, + (outs GPR64sp:$wback), + (ins !cast(veclist # "8h"):$Vt, + GPR64sp:$Rn, + !cast("GPR64pi" # Offset128):$Xm)>; + def v4s_POST : BaseSIMDLdStPost<1, 0, opcode, 0b10, asm, + (outs GPR64sp:$wback), + (ins !cast(veclist # "4s"):$Vt, + GPR64sp:$Rn, + !cast("GPR64pi" # Offset128):$Xm)>; + def v2d_POST : BaseSIMDLdStPost<1, 0, opcode, 0b11, asm, + (outs GPR64sp:$wback), + (ins !cast(veclist # "2d"):$Vt, + GPR64sp:$Rn, + !cast("GPR64pi" # Offset128):$Xm)>; + def v8b_POST : BaseSIMDLdStPost<0, 0, opcode, 0b00, asm, + (outs GPR64sp:$wback), + (ins !cast(veclist # "8b"):$Vt, + GPR64sp:$Rn, + !cast("GPR64pi" # Offset64):$Xm)>; + def v4h_POST : BaseSIMDLdStPost<0, 0, opcode, 0b01, asm, + (outs GPR64sp:$wback), + (ins !cast(veclist # "4h"):$Vt, + GPR64sp:$Rn, + !cast("GPR64pi" # Offset64):$Xm)>; + def v2s_POST : BaseSIMDLdStPost<0, 0, opcode, 0b10, asm, + (outs GPR64sp:$wback), + (ins !cast(veclist # "2s"):$Vt, + GPR64sp:$Rn, + !cast("GPR64pi" # Offset64):$Xm)>; + } + + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; +} + +multiclass BaseSIMDLd1 opcode> + : BaseSIMDLdN { + + // LD1 instructions have extra "1d" variants. + let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { + def v1d : BaseSIMDLdSt<0, 1, opcode, 0b11, asm, + (outs !cast(veclist # "1d"):$Vt), + (ins GPR64sp:$Rn), []>; + + def v1d_POST : BaseSIMDLdStPost<0, 1, opcode, 0b11, asm, + (outs GPR64sp:$wback, + !cast(veclist # "1d"):$Vt), + (ins GPR64sp:$Rn, + !cast("GPR64pi" # Offset64):$Xm)>; + } + + defm : SIMDLdStAliases; +} + +multiclass BaseSIMDSt1 opcode> + : BaseSIMDStN { + + // ST1 instructions have extra "1d" variants. + let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { + def v1d : BaseSIMDLdSt<0, 0, opcode, 0b11, asm, (outs), + (ins !cast(veclist # "1d"):$Vt, + GPR64sp:$Rn), []>; + + def v1d_POST : BaseSIMDLdStPost<0, 0, opcode, 0b11, asm, + (outs GPR64sp:$wback), + (ins !cast(veclist # "1d"):$Vt, + GPR64sp:$Rn, + !cast("GPR64pi" # Offset64):$Xm)>; + } + + defm : SIMDLdStAliases; +} + +multiclass SIMDLd1Multiple { + defm One : BaseSIMDLd1; + defm Two : BaseSIMDLd1; + defm Three : BaseSIMDLd1; + defm Four : BaseSIMDLd1; +} + +multiclass SIMDSt1Multiple { + defm One : BaseSIMDSt1; + defm Two : BaseSIMDSt1; + defm Three : BaseSIMDSt1; + defm Four : BaseSIMDSt1; +} + +multiclass SIMDLd2Multiple { + defm Two : BaseSIMDLdN; +} + +multiclass SIMDSt2Multiple { + defm Two : BaseSIMDStN; +} + +multiclass SIMDLd3Multiple { + defm Three : BaseSIMDLdN; +} + +multiclass SIMDSt3Multiple { + defm Three : BaseSIMDStN; +} + +multiclass SIMDLd4Multiple { + defm Four : BaseSIMDLdN; +} + +multiclass SIMDSt4Multiple { + defm Four : BaseSIMDStN; +} + +//--- +// AdvSIMD Load/store single-element +//--- + +class BaseSIMDLdStSingle opcode, + string asm, string operands, string cst, + dag oops, dag iops, list pattern> + : I { + bits<5> Vt; + bits<5> Rn; + let Inst{31} = 0; + let Inst{29-24} = 0b001101; + let Inst{22} = L; + let Inst{21} = R; + let Inst{15-13} = opcode; + let Inst{9-5} = Rn; + let Inst{4-0} = Vt; +} + +class BaseSIMDLdStSingleTied opcode, + string asm, string operands, string cst, + dag oops, dag iops, list pattern> + : I { + bits<5> Vt; + bits<5> Rn; + let Inst{31} = 0; + let Inst{29-24} = 0b001101; + let Inst{22} = L; + let Inst{21} = R; + let Inst{15-13} = opcode; + let Inst{9-5} = Rn; + let Inst{4-0} = Vt; +} + + +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDLdR opcode, bit S, bits<2> size, string asm, + DAGOperand listtype> + : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, [$Rn]", "", + (outs listtype:$Vt), (ins GPR64sp:$Rn), + []> { + let Inst{30} = Q; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = S; + let Inst{11-10} = size; +} +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDLdRPost opcode, bit S, bits<2> size, + string asm, DAGOperand listtype, DAGOperand GPR64pi> + : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, [$Rn], $Xm", + "$Rn = $wback", + (outs GPR64sp:$wback, listtype:$Vt), + (ins GPR64sp:$Rn, GPR64pi:$Xm), []> { + bits<5> Xm; + let Inst{30} = Q; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = S; + let Inst{11-10} = size; +} + +multiclass SIMDLdrAliases { + // E.g. "ld1r { v0.8b }, [x1], #1" + // "ld1r.8b\t$Vt, [$Rn], #1" + // may get mapped to + // (LD1Rv8b_POST VecListOne8b:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias(BaseName # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # layout):$Vt, + XZR), 1>; + + // E.g. "ld1r.8b { v0 }, [x1], #1" + // "ld1r.8b\t$Vt, [$Rn], #1" + // may get mapped to + // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias(BaseName # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # Size):$Vt, + XZR), 0>; + + // E.g. "ld1r.8b { v0 }, [x1]" + // "ld1r.8b\t$Vt, [$Rn]" + // may get mapped to + // (LD1Rv8b VecListOne64:$Vt, GPR64sp:$Rn) + def : InstAlias(BaseName # "v" # layout) + !cast("VecList" # Count # Size):$Vt, + GPR64sp:$Rn), 0>; + + // E.g. "ld1r.8b { v0 }, [x1], x2" + // "ld1r.8b\t$Vt, [$Rn], $Xm" + // may get mapped to + // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, GPR64pi1:$Xm) + def : InstAlias(BaseName # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # Size):$Vt, + !cast("GPR64pi" # Offset):$Xm), 0>; +} + +multiclass SIMDLdR opcode, bit S, string asm, string Count, + int Offset1, int Offset2, int Offset4, int Offset8> { + def v8b : BaseSIMDLdR<0, R, opcode, S, 0b00, asm, + !cast("VecList" # Count # "8b")>; + def v16b: BaseSIMDLdR<1, R, opcode, S, 0b00, asm, + !cast("VecList" # Count #"16b")>; + def v4h : BaseSIMDLdR<0, R, opcode, S, 0b01, asm, + !cast("VecList" # Count #"4h")>; + def v8h : BaseSIMDLdR<1, R, opcode, S, 0b01, asm, + !cast("VecList" # Count #"8h")>; + def v2s : BaseSIMDLdR<0, R, opcode, S, 0b10, asm, + !cast("VecList" # Count #"2s")>; + def v4s : BaseSIMDLdR<1, R, opcode, S, 0b10, asm, + !cast("VecList" # Count #"4s")>; + def v1d : BaseSIMDLdR<0, R, opcode, S, 0b11, asm, + !cast("VecList" # Count #"1d")>; + def v2d : BaseSIMDLdR<1, R, opcode, S, 0b11, asm, + !cast("VecList" # Count #"2d")>; + + def v8b_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b00, asm, + !cast("VecList" # Count # "8b"), + !cast("GPR64pi" # Offset1)>; + def v16b_POST: BaseSIMDLdRPost<1, R, opcode, S, 0b00, asm, + !cast("VecList" # Count # "16b"), + !cast("GPR64pi" # Offset1)>; + def v4h_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b01, asm, + !cast("VecList" # Count # "4h"), + !cast("GPR64pi" # Offset2)>; + def v8h_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b01, asm, + !cast("VecList" # Count # "8h"), + !cast("GPR64pi" # Offset2)>; + def v2s_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b10, asm, + !cast("VecList" # Count # "2s"), + !cast("GPR64pi" # Offset4)>; + def v4s_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b10, asm, + !cast("VecList" # Count # "4s"), + !cast("GPR64pi" # Offset4)>; + def v1d_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b11, asm, + !cast("VecList" # Count # "1d"), + !cast("GPR64pi" # Offset8)>; + def v2d_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b11, asm, + !cast("VecList" # Count # "2d"), + !cast("GPR64pi" # Offset8)>; + + defm : SIMDLdrAliases; + defm : SIMDLdrAliases; + defm : SIMDLdrAliases; + defm : SIMDLdrAliases; + defm : SIMDLdrAliases; + defm : SIMDLdrAliases; + defm : SIMDLdrAliases; + defm : SIMDLdrAliases; +} + +class SIMDLdStSingleB opcode, string asm, + dag oops, dag iops, list pattern> + : BaseSIMDLdStSingle { + // idx encoded in Q:S:size fields. + bits<4> idx; + let Inst{30} = idx{3}; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = idx{2}; + let Inst{11-10} = idx{1-0}; +} +class SIMDLdStSingleBTied opcode, string asm, + dag oops, dag iops, list pattern> + : BaseSIMDLdStSingleTied { + // idx encoded in Q:S:size fields. + bits<4> idx; + let Inst{30} = idx{3}; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = idx{2}; + let Inst{11-10} = idx{1-0}; +} +class SIMDLdStSingleBPost opcode, string asm, + dag oops, dag iops> + : BaseSIMDLdStSingle { + // idx encoded in Q:S:size fields. + bits<4> idx; + bits<5> Xm; + let Inst{30} = idx{3}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{2}; + let Inst{11-10} = idx{1-0}; +} +class SIMDLdStSingleBTiedPost opcode, string asm, + dag oops, dag iops> + : BaseSIMDLdStSingleTied { + // idx encoded in Q:S:size fields. + bits<4> idx; + bits<5> Xm; + let Inst{30} = idx{3}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{2}; + let Inst{11-10} = idx{1-0}; +} + +class SIMDLdStSingleH opcode, bit size, string asm, + dag oops, dag iops, list pattern> + : BaseSIMDLdStSingle { + // idx encoded in Q:S:size<1> fields. + bits<3> idx; + let Inst{30} = idx{2}; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = idx{1}; + let Inst{11} = idx{0}; + let Inst{10} = size; +} +class SIMDLdStSingleHTied opcode, bit size, string asm, + dag oops, dag iops, list pattern> + : BaseSIMDLdStSingleTied { + // idx encoded in Q:S:size<1> fields. + bits<3> idx; + let Inst{30} = idx{2}; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = idx{1}; + let Inst{11} = idx{0}; + let Inst{10} = size; +} + +class SIMDLdStSingleHPost opcode, bit size, string asm, + dag oops, dag iops> + : BaseSIMDLdStSingle { + // idx encoded in Q:S:size<1> fields. + bits<3> idx; + bits<5> Xm; + let Inst{30} = idx{2}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{1}; + let Inst{11} = idx{0}; + let Inst{10} = size; +} +class SIMDLdStSingleHTiedPost opcode, bit size, string asm, + dag oops, dag iops> + : BaseSIMDLdStSingleTied { + // idx encoded in Q:S:size<1> fields. + bits<3> idx; + bits<5> Xm; + let Inst{30} = idx{2}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{1}; + let Inst{11} = idx{0}; + let Inst{10} = size; +} +class SIMDLdStSingleS opcode, bits<2> size, string asm, + dag oops, dag iops, list pattern> + : BaseSIMDLdStSingle { + // idx encoded in Q:S fields. + bits<2> idx; + let Inst{30} = idx{1}; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = idx{0}; + let Inst{11-10} = size; +} +class SIMDLdStSingleSTied opcode, bits<2> size, string asm, + dag oops, dag iops, list pattern> + : BaseSIMDLdStSingleTied { + // idx encoded in Q:S fields. + bits<2> idx; + let Inst{30} = idx{1}; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = idx{0}; + let Inst{11-10} = size; +} +class SIMDLdStSingleSPost opcode, bits<2> size, + string asm, dag oops, dag iops> + : BaseSIMDLdStSingle { + // idx encoded in Q:S fields. + bits<2> idx; + bits<5> Xm; + let Inst{30} = idx{1}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{0}; + let Inst{11-10} = size; +} +class SIMDLdStSingleSTiedPost opcode, bits<2> size, + string asm, dag oops, dag iops> + : BaseSIMDLdStSingleTied { + // idx encoded in Q:S fields. + bits<2> idx; + bits<5> Xm; + let Inst{30} = idx{1}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{0}; + let Inst{11-10} = size; +} +class SIMDLdStSingleD opcode, bits<2> size, string asm, + dag oops, dag iops, list pattern> + : BaseSIMDLdStSingle { + // idx encoded in Q field. + bits<1> idx; + let Inst{30} = idx; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = 0; + let Inst{11-10} = size; +} +class SIMDLdStSingleDTied opcode, bits<2> size, string asm, + dag oops, dag iops, list pattern> + : BaseSIMDLdStSingleTied { + // idx encoded in Q field. + bits<1> idx; + let Inst{30} = idx; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = 0; + let Inst{11-10} = size; +} +class SIMDLdStSingleDPost opcode, bits<2> size, + string asm, dag oops, dag iops> + : BaseSIMDLdStSingle { + // idx encoded in Q field. + bits<1> idx; + bits<5> Xm; + let Inst{30} = idx; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = 0; + let Inst{11-10} = size; +} +class SIMDLdStSingleDTiedPost opcode, bits<2> size, + string asm, dag oops, dag iops> + : BaseSIMDLdStSingleTied { + // idx encoded in Q field. + bits<1> idx; + bits<5> Xm; + let Inst{30} = idx; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = 0; + let Inst{11-10} = size; +} + +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDLdSingleBTied opcode, string asm, + RegisterOperand listtype, + RegisterOperand GPR64pi> { + def i8 : SIMDLdStSingleBTied<1, R, opcode, asm, + (outs listtype:$dst), + (ins listtype:$Vt, VectorIndexB:$idx, + GPR64sp:$Rn), []>; + + def i8_POST : SIMDLdStSingleBTiedPost<1, R, opcode, asm, + (outs GPR64sp:$wback, listtype:$dst), + (ins listtype:$Vt, VectorIndexB:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDLdSingleHTied opcode, bit size, string asm, + RegisterOperand listtype, + RegisterOperand GPR64pi> { + def i16 : SIMDLdStSingleHTied<1, R, opcode, size, asm, + (outs listtype:$dst), + (ins listtype:$Vt, VectorIndexH:$idx, + GPR64sp:$Rn), []>; + + def i16_POST : SIMDLdStSingleHTiedPost<1, R, opcode, size, asm, + (outs GPR64sp:$wback, listtype:$dst), + (ins listtype:$Vt, VectorIndexH:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDLdSingleSTied opcode, bits<2> size,string asm, + RegisterOperand listtype, + RegisterOperand GPR64pi> { + def i32 : SIMDLdStSingleSTied<1, R, opcode, size, asm, + (outs listtype:$dst), + (ins listtype:$Vt, VectorIndexS:$idx, + GPR64sp:$Rn), []>; + + def i32_POST : SIMDLdStSingleSTiedPost<1, R, opcode, size, asm, + (outs GPR64sp:$wback, listtype:$dst), + (ins listtype:$Vt, VectorIndexS:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDLdSingleDTied opcode, bits<2> size, string asm, + RegisterOperand listtype, RegisterOperand GPR64pi> { + def i64 : SIMDLdStSingleDTied<1, R, opcode, size, asm, + (outs listtype:$dst), + (ins listtype:$Vt, VectorIndexD:$idx, + GPR64sp:$Rn), []>; + + def i64_POST : SIMDLdStSingleDTiedPost<1, R, opcode, size, asm, + (outs GPR64sp:$wback, listtype:$dst), + (ins listtype:$Vt, VectorIndexD:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in +multiclass SIMDStSingleB opcode, string asm, + RegisterOperand listtype, RegisterOperand GPR64pi> { + def i8 : SIMDLdStSingleB<0, R, opcode, asm, + (outs), (ins listtype:$Vt, VectorIndexB:$idx, + GPR64sp:$Rn), []>; + + def i8_POST : SIMDLdStSingleBPost<0, R, opcode, asm, + (outs GPR64sp:$wback), + (ins listtype:$Vt, VectorIndexB:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in +multiclass SIMDStSingleH opcode, bit size, string asm, + RegisterOperand listtype, RegisterOperand GPR64pi> { + def i16 : SIMDLdStSingleH<0, R, opcode, size, asm, + (outs), (ins listtype:$Vt, VectorIndexH:$idx, + GPR64sp:$Rn), []>; + + def i16_POST : SIMDLdStSingleHPost<0, R, opcode, size, asm, + (outs GPR64sp:$wback), + (ins listtype:$Vt, VectorIndexH:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in +multiclass SIMDStSingleS opcode, bits<2> size,string asm, + RegisterOperand listtype, RegisterOperand GPR64pi> { + def i32 : SIMDLdStSingleS<0, R, opcode, size, asm, + (outs), (ins listtype:$Vt, VectorIndexS:$idx, + GPR64sp:$Rn), []>; + + def i32_POST : SIMDLdStSingleSPost<0, R, opcode, size, asm, + (outs GPR64sp:$wback), + (ins listtype:$Vt, VectorIndexS:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in +multiclass SIMDStSingleD opcode, bits<2> size, string asm, + RegisterOperand listtype, RegisterOperand GPR64pi> { + def i64 : SIMDLdStSingleD<0, R, opcode, size, asm, + (outs), (ins listtype:$Vt, VectorIndexD:$idx, + GPR64sp:$Rn), []>; + + def i64_POST : SIMDLdStSingleDPost<0, R, opcode, size, asm, + (outs GPR64sp:$wback), + (ins listtype:$Vt, VectorIndexD:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} + +multiclass SIMDLdStSingleAliases { + // E.g. "ld1 { v0.8b }[0], [x1], #1" + // "ld1\t$Vt, [$Rn], #1" + // may get mapped to + // (LD1Rv8b_POST VecListOne8b:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias(NAME # Type # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # layout):$Vt, + idxtype:$idx, XZR), 1>; + + // E.g. "ld1.8b { v0 }[0], [x1], #1" + // "ld1.8b\t$Vt, [$Rn], #1" + // may get mapped to + // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias(NAME # Type # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # "128"):$Vt, + idxtype:$idx, XZR), 0>; + + // E.g. "ld1.8b { v0 }[0], [x1]" + // "ld1.8b\t$Vt, [$Rn]" + // may get mapped to + // (LD1Rv8b VecListOne64:$Vt, GPR64sp:$Rn) + def : InstAlias(NAME # Type) + !cast("VecList" # Count # "128"):$Vt, + idxtype:$idx, GPR64sp:$Rn), 0>; + + // E.g. "ld1.8b { v0 }[0], [x1], x2" + // "ld1.8b\t$Vt, [$Rn], $Xm" + // may get mapped to + // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, GPR64pi1:$Xm) + def : InstAlias(NAME # Type # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # "128"):$Vt, + idxtype:$idx, + !cast("GPR64pi" # Offset):$Xm), 0>; +} + +multiclass SIMDLdSt1SingleAliases { + defm "" : SIMDLdStSingleAliases; + defm "" : SIMDLdStSingleAliases; + defm "" : SIMDLdStSingleAliases; + defm "" : SIMDLdStSingleAliases; +} + +multiclass SIMDLdSt2SingleAliases { + defm "" : SIMDLdStSingleAliases; + defm "" : SIMDLdStSingleAliases; + defm "" : SIMDLdStSingleAliases; + defm "" : SIMDLdStSingleAliases; +} + +multiclass SIMDLdSt3SingleAliases { + defm "" : SIMDLdStSingleAliases; + defm "" : SIMDLdStSingleAliases; + defm "" : SIMDLdStSingleAliases; + defm "" : SIMDLdStSingleAliases; +} + +multiclass SIMDLdSt4SingleAliases { + defm "" : SIMDLdStSingleAliases; + defm "" : SIMDLdStSingleAliases; + defm "" : SIMDLdStSingleAliases; + defm "" : SIMDLdStSingleAliases; +} +} // end of 'let Predicates = [HasNEON]' + +//---------------------------------------------------------------------------- +// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract +//---------------------------------------------------------------------------- + +let Predicates = [HasNEON, HasRDM] in { + +class BaseSIMDThreeSameVectorTiedR0 size, bits<5> opcode, + RegisterOperand regtype, string asm, + string kind, list pattern> + : BaseSIMDThreeSameVectorTied { +} +multiclass SIMDThreeSameVectorSQRDMLxHTiedHS opc, string asm, + SDPatternOperator Accum> { + def v4i16 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b01, opc, V64, asm, ".4h", + [(set (v4i16 V64:$dst), + (Accum (v4i16 V64:$Rd), + (v4i16 (int_aarch64_neon_sqrdmulh (v4i16 V64:$Rn), + (v4i16 V64:$Rm)))))]>; + def v8i16 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b01, opc, V128, asm, ".8h", + [(set (v8i16 V128:$dst), + (Accum (v8i16 V128:$Rd), + (v8i16 (int_aarch64_neon_sqrdmulh (v8i16 V128:$Rn), + (v8i16 V128:$Rm)))))]>; + def v2i32 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b10, opc, V64, asm, ".2s", + [(set (v2i32 V64:$dst), + (Accum (v2i32 V64:$Rd), + (v2i32 (int_aarch64_neon_sqrdmulh (v2i32 V64:$Rn), + (v2i32 V64:$Rm)))))]>; + def v4i32 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b10, opc, V128, asm, ".4s", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqrdmulh (v4i32 V128:$Rn), + (v4i32 V128:$Rm)))))]>; +} + +multiclass SIMDIndexedSQRDMLxHSDTied opc, string asm, + SDPatternOperator Accum> { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, + V64, V64, V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4i16 V64:$dst), + (Accum (v4i16 V64:$Rd), + (v4i16 (int_aarch64_neon_sqrdmulh + (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, + V128, V128, V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8i16 V128:$dst), + (Accum (v8i16 V128:$Rd), + (v8i16 (int_aarch64_neon_sqrdmulh + (v8i16 V128:$Rn), + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, + V64, V64, V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2i32 V64:$dst), + (Accum (v2i32 V64:$Rd), + (v2i32 (int_aarch64_neon_sqrdmulh + (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + // FIXME: it would be nice to use the scalar (v1i32) instruction here, but + // an intermediate EXTRACT_SUBREG would be untyped. + // FIXME: direct EXTRACT_SUBREG from v2i32 to i32 is illegal, that's why we + // got it lowered here as (i32 vector_extract (v4i32 insert_subvector(..))) + def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), + (i32 (vector_extract + (v4i32 (insert_subvector + (undef), + (v2i32 (int_aarch64_neon_sqrdmulh + (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 + (v4i32 V128:$Rm), + VectorIndexS:$idx)))), + (i32 0))), + (i64 0))))), + (EXTRACT_SUBREG + (v2i32 (!cast(NAME # v2i32_indexed) + (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), + FPR32Op:$Rd, + ssub)), + V64:$Rn, + V128:$Rm, + VectorIndexS:$idx)), + ssub)>; + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqrdmulh + (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + // FIXME: it would be nice to use the scalar (v1i32) instruction here, but + // an intermediate EXTRACT_SUBREG would be untyped. + def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), + (i32 (vector_extract + (v4i32 (int_aarch64_neon_sqrdmulh + (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 + (v4i32 V128:$Rm), + VectorIndexS:$idx)))), + (i64 0))))), + (EXTRACT_SUBREG + (v4i32 (!cast(NAME # v4i32_indexed) + (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + FPR32Op:$Rd, + ssub)), + V128:$Rn, + V128:$Rm, + VectorIndexS:$idx)), + ssub)>; + + def i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc, + FPR16Op, FPR16Op, V128_lo, + VectorIndexH, asm, ".h", "", "", ".h", + []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, + FPR32Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", + [(set (i32 FPR32Op:$dst), + (Accum (i32 FPR32Op:$Rd), + (i32 (int_aarch64_neon_sqrdmulh + (i32 FPR32Op:$Rn), + (i32 (vector_extract (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} +} // let Predicates = [HasNeon, HasRDM] + +//---------------------------------------------------------------------------- +// ARMv8.3 Complex ADD/MLA instructions +//---------------------------------------------------------------------------- + +class ComplexRotationOperand + : AsmOperandClass { + let PredicateMethod = "isComplexRotation<" # Angle # ", " # Remainder # ">"; + let DiagnosticType = "InvalidComplexRotation" # Type; + let Name = "ComplexRotation" # Type; +} +def complexrotateop : Operand { + let ParserMatchClass = ComplexRotationOperand<90, 0, "Even">; + let PrintMethod = "printComplexRotationOp<90, 0>"; +} +def complexrotateopodd : Operand { + let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">; + let PrintMethod = "printComplexRotationOp<180, 90>"; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDThreeSameVectorComplex size, bits<3> opcode, + RegisterOperand regtype, Operand rottype, + string asm, string kind, list pattern> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, rottype:$rot), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot" + "|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<1> rot; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21} = 0; + let Inst{20-16} = Rm; + let Inst{15-13} = opcode; + // Non-tied version (FCADD) only has one rotation bit + let Inst{12} = rot; + let Inst{11} = 0; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDThreeSameVectorComplexHSD opcode, Operand rottype, + string asm, SDPatternOperator OpNode>{ + let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVectorComplex<0, U, 0b01, opcode, V64, rottype, + asm, ".4h", + [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd), + (v4f16 V64:$Rn), + (v4f16 V64:$Rm), + (rottype i32:$rot)))]>; + + def v8f16 : BaseSIMDThreeSameVectorComplex<1, U, 0b01, opcode, V128, rottype, + asm, ".8h", + [(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd), + (v8f16 V128:$Rn), + (v8f16 V128:$Rm), + (rottype i32:$rot)))]>; + } + + let Predicates = [HasV8_3a, HasNEON] in { + def v2f32 : BaseSIMDThreeSameVectorComplex<0, U, 0b10, opcode, V64, rottype, + asm, ".2s", + [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd), + (v2f32 V64:$Rn), + (v2f32 V64:$Rm), + (rottype i32:$rot)))]>; + + def v4f32 : BaseSIMDThreeSameVectorComplex<1, U, 0b10, opcode, V128, rottype, + asm, ".4s", + [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd), + (v4f32 V128:$Rn), + (v4f32 V128:$Rm), + (rottype i32:$rot)))]>; + + def v2f64 : BaseSIMDThreeSameVectorComplex<1, U, 0b11, opcode, V128, rottype, + asm, ".2d", + [(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd), + (v2f64 V128:$Rn), + (v2f64 V128:$Rm), + (rottype i32:$rot)))]>; + } +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDThreeSameVectorTiedComplex size, + bits<3> opcode, + RegisterOperand regtype, + Operand rottype, string asm, + string kind, list pattern> + : I<(outs regtype:$dst), + (ins regtype:$Rd, regtype:$Rn, regtype:$Rm, rottype:$rot), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot" + "|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<2> rot; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21} = 0; + let Inst{20-16} = Rm; + let Inst{15-13} = opcode; + let Inst{12-11} = rot; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDThreeSameVectorTiedComplexHSD opcode, + Operand rottype, string asm, + SDPatternOperator OpNode> { + let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b01, opcode, V64, + rottype, asm, ".4h", + [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd), + (v4f16 V64:$Rn), + (v4f16 V64:$Rm), + (rottype i32:$rot)))]>; + + def v8f16 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b01, opcode, V128, + rottype, asm, ".8h", + [(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd), + (v8f16 V128:$Rn), + (v8f16 V128:$Rm), + (rottype i32:$rot)))]>; + } + + let Predicates = [HasV8_3a, HasNEON] in { + def v2f32 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b10, opcode, V64, + rottype, asm, ".2s", + [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd), + (v2f32 V64:$Rn), + (v2f32 V64:$Rm), + (rottype i32:$rot)))]>; + + def v4f32 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b10, opcode, V128, + rottype, asm, ".4s", + [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd), + (v4f32 V128:$Rn), + (v4f32 V128:$Rm), + (rottype i32:$rot)))]>; + + def v2f64 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b11, opcode, V128, + rottype, asm, ".2d", + [(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd), + (v2f64 V128:$Rn), + (v2f64 V128:$Rm), + (rottype i32:$rot)))]>; + } +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDIndexedTiedComplex size, + bit opc1, bit opc2, RegisterOperand dst_reg, + RegisterOperand lhs_reg, + RegisterOperand rhs_reg, Operand vec_idx, + Operand rottype, string asm, string apple_kind, + string dst_kind, string lhs_kind, + string rhs_kind, list pattern> + : I<(outs dst_reg:$dst), + (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx, rottype:$rot), + asm, + "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # + "$idx, $rot" # "|" # apple_kind # + "\t$Rd, $Rn, $Rm$idx, $rot}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<2> rot; + + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28} = Scalar; + let Inst{27-24} = 0b1111; + let Inst{23-22} = size; + // Bit 21 must be set by the derived class. + let Inst{20-16} = Rm; + let Inst{15} = opc1; + let Inst{14-13} = rot; + let Inst{12} = opc2; + // Bit 11 must be set by the derived class. + let Inst{10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +// The complex instructions index by pairs of elements, so the VectorIndexes +// don't match the lane types, and the index bits are different to the other +// classes. +multiclass SIMDIndexedTiedComplexHSD { + let Predicates = [HasV8_3a,HasNEON,HasFullFP16] in { + def v4f16_indexed : BaseSIMDIndexedTiedComplex<0, 1, 0, 0b01, opc1, opc2, V64, + V64, V128, VectorIndexD, rottype, asm, ".4h", ".4h", + ".4h", ".h", []> { + bits<1> idx; + let Inst{11} = 0; + let Inst{21} = idx{0}; + } + + def v8f16_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b01, opc1, opc2, + V128, V128, V128, VectorIndexS, rottype, asm, ".8h", + ".8h", ".8h", ".h", []> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + } // Predicates = [HasV8_3a,HasNEON,HasFullFP16] + + let Predicates = [HasV8_3a,HasNEON] in { + def v4f32_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b10, opc1, opc2, + V128, V128, V128, VectorIndexD, rottype, asm, ".4s", + ".4s", ".4s", ".s", []> { + bits<1> idx; + let Inst{11} = idx{0}; + let Inst{21} = 0; + } + } // Predicates = [HasV8_3a,HasNEON] +} + +//---------------------------------------------------------------------------- +// Crypto extensions +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class AESBase opc, string asm, dag outs, dag ins, string cstr, + list pat> + : I, + Sched<[WriteV]>{ + bits<5> Rd; + bits<5> Rn; + let Inst{31-16} = 0b0100111000101000; + let Inst{15-12} = opc; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class AESInst opc, string asm, Intrinsic OpNode> + : AESBase; + +class AESTiedInst opc, string asm, Intrinsic OpNode> + : AESBase; + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class SHA3OpTiedInst opc, string asm, string dst_lhs_kind, + dag oops, dag iops, list pat> + : I, + Sched<[WriteV]>{ + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-21} = 0b01011110000; + let Inst{20-16} = Rm; + let Inst{15} = 0; + let Inst{14-12} = opc; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class SHATiedInstQSV opc, string asm, Intrinsic OpNode> + : SHA3OpTiedInst; + +class SHATiedInstVVV opc, string asm, Intrinsic OpNode> + : SHA3OpTiedInst; + +class SHATiedInstQQV opc, string asm, Intrinsic OpNode> + : SHA3OpTiedInst; + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class SHA2OpInst opc, string asm, string kind, + string cstr, dag oops, dag iops, + list pat> + : I, + Sched<[WriteV]>{ + bits<5> Rd; + bits<5> Rn; + let Inst{31-16} = 0b0101111000101000; + let Inst{15-12} = opc; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class SHATiedInstVV opc, string asm, Intrinsic OpNode> + : SHA2OpInst; + +class SHAInstSS opc, string asm, Intrinsic OpNode> + : SHA2OpInst; + +// Armv8.2-A Crypto extensions +class BaseCryptoV82 pattern> + : I , Sched<[WriteV]> { + bits<5> Vd; + bits<5> Vn; + let Inst{31-25} = 0b1100111; + let Inst{9-5} = Vn; + let Inst{4-0} = Vd; +} + +class CryptoRRTiedop0, bits<2>op1, string asm, string asmops> + : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm), asm, asmops, + "$Vm = $Vd", []> { + let Inst{31-25} = 0b1100111; + let Inst{24-21} = 0b0110; + let Inst{20-15} = 0b000001; + let Inst{14} = op0; + let Inst{13-12} = 0b00; + let Inst{11-10} = op1; +} +class CryptoRRTied_2Dop0, bits<2>op1, string asm> + : CryptoRRTied; +class CryptoRRTied_4Sop0, bits<2>op1, string asm> + : CryptoRRTied; + +class CryptoRRR op0, bits<2>op1, dag oops, dag iops, string asm, + string asmops, string cst> + : BaseCryptoV82 { + bits<5> Vm; + let Inst{24-21} = 0b0011; + let Inst{20-16} = Vm; + let Inst{15} = 0b1; + let Inst{14} = op0; + let Inst{13-12} = 0b00; + let Inst{11-10} = op1; +} +class CryptoRRR_2D op0, bits<2>op1, string asm> + : CryptoRRR; +class CryptoRRRTied_2D op0, bits<2>op1, string asm> + : CryptoRRR; +class CryptoRRR_4S op0, bits<2>op1, string asm> + : CryptoRRR; +class CryptoRRRTied_4S op0, bits<2>op1, string asm> + : CryptoRRR; +class CryptoRRRTied op0, bits<2>op1, string asm> + : CryptoRRR; + +class CryptoRRRRop0, string asm, string asmops> + : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm, V128:$Va), asm, + asmops, "", []> { + bits<5> Vm; + bits<5> Va; + let Inst{24-23} = 0b00; + let Inst{22-21} = op0; + let Inst{20-16} = Vm; + let Inst{15} = 0b0; + let Inst{14-10} = Va; +} +class CryptoRRRR_16Bop0, string asm> + : CryptoRRRR { +} +class CryptoRRRR_4Sop0, string asm> + : CryptoRRRR { +} + +class CryptoRRRi6 + : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm, uimm6:$imm), asm, + "{\t$Vd.2d, $Vn.2d, $Vm.2d, $imm}", "", []> { + bits<6> imm; + bits<5> Vm; + let Inst{24-21} = 0b0100; + let Inst{20-16} = Vm; + let Inst{15-10} = imm; + let Inst{9-5} = Vn; + let Inst{4-0} = Vd; +} + +class CryptoRRRi2Tiedop0, bits<2>op1, string asm> + : BaseCryptoV82<(outs V128:$Vdst), + (ins V128:$Vd, V128:$Vn, V128:$Vm, VectorIndexS:$imm), + asm, "{\t$Vd.4s, $Vn.4s, $Vm.s$imm}", "$Vd = $Vdst", []> { + bits<2> imm; + bits<5> Vm; + let Inst{24-21} = 0b0010; + let Inst{20-16} = Vm; + let Inst{15} = 0b1; + let Inst{14} = op0; + let Inst{13-12} = imm; + let Inst{11-10} = op1; +} + +//---------------------------------------------------------------------------- +// v8.1 atomic instructions extension: +// * CAS +// * CASP +// * SWP +// * LDOPregister, and aliases STOPregister + +// Instruction encodings: +// +// 31 30|29 24|23|22|21|20 16|15|14 10|9 5|4 0 +// CAS SZ |001000|1 |A |1 |Rs |R |11111 |Rn |Rt +// CASP 0|SZ|001000|0 |A |1 |Rs |R |11111 |Rn |Rt +// SWP SZ |111000|A |R |1 |Rs |1 |OPC|00|Rn |Rt +// LD SZ |111000|A |R |1 |Rs |0 |OPC|00|Rn |Rt +// ST SZ |111000|A |R |1 |Rs |0 |OPC|00|Rn |11111 + +// Instruction syntax: +// +// CAS{}[] , , [] +// CAS{} , , [] +// CASP{} , , , , [] +// CASP{} , , , , [] +// SWP{}[] , , [] +// SWP{} , , [] +// LD{}[] , , [] +// LD{} , , [] +// ST{}[] , [] +// ST{} , [] + +let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseCASEncoding pattern> + : I { + bits<2> Sz; + bit NP; + bit Acq; + bit Rel; + bits<5> Rs; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b001000; + let Inst{23} = NP; + let Inst{22} = Acq; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = Rel; + let Inst{14-10} = 0b11111; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + let Predicates = [HasLSE]; +} + +class BaseCAS + : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), + "cas" # order # size, "\t$Rs, $Rt, [$Rn]", + "$out = $Rs",[]>, + Sched<[WriteAtomic]> { + let NP = 1; +} + +multiclass CompareAndSwap Acq, bits<1> Rel, string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel in def B : BaseCAS; + let Sz = 0b01, Acq = Acq, Rel = Rel in def H : BaseCAS; + let Sz = 0b10, Acq = Acq, Rel = Rel in def W : BaseCAS; + let Sz = 0b11, Acq = Acq, Rel = Rel in def X : BaseCAS; +} + +class BaseCASP + : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), + "casp" # order # size, "\t$Rs, $Rt, [$Rn]", + "$out = $Rs",[]>, + Sched<[WriteAtomic]> { + let NP = 0; +} + +multiclass CompareAndSwapPair Acq, bits<1> Rel, string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel in + def W : BaseCASP; + let Sz = 0b01, Acq = Acq, Rel = Rel in + def X : BaseCASP; +} + +let Predicates = [HasLSE] in +class BaseSWP + : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swp" # order # size, + "\t$Rs, $Rt, [$Rn]","",[]>, + Sched<[WriteAtomic]> { + bits<2> Sz; + bit Acq; + bit Rel; + bits<5> Rs; + bits<3> opc = 0b000; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b111000; + let Inst{23} = Acq; + let Inst{22} = Rel; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = 0b1; + let Inst{14-12} = opc; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + let Predicates = [HasLSE]; +} + +multiclass Swap Acq, bits<1> Rel, string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel in def B : BaseSWP; + let Sz = 0b01, Acq = Acq, Rel = Rel in def H : BaseSWP; + let Sz = 0b10, Acq = Acq, Rel = Rel in def W : BaseSWP; + let Sz = 0b11, Acq = Acq, Rel = Rel in def X : BaseSWP; +} + +let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseLDOPregister + : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ld" # op # order # size, + "\t$Rs, $Rt, [$Rn]","",[]>, + Sched<[WriteAtomic]> { + bits<2> Sz; + bit Acq; + bit Rel; + bits<5> Rs; + bits<3> opc; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b111000; + let Inst{23} = Acq; + let Inst{22} = Rel; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = 0b0; + let Inst{14-12} = opc; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + let Predicates = [HasLSE]; +} + +multiclass LDOPregister opc, string op, bits<1> Acq, bits<1> Rel, + string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel, opc = opc in + def B : BaseLDOPregister; + let Sz = 0b01, Acq = Acq, Rel = Rel, opc = opc in + def H : BaseLDOPregister; + let Sz = 0b10, Acq = Acq, Rel = Rel, opc = opc in + def W : BaseLDOPregister; + let Sz = 0b11, Acq = Acq, Rel = Rel, opc = opc in + def X : BaseLDOPregister; +} + +// Differing SrcRHS and DstRHS allow you to cover CLR & SUB by giving a more +// complex DAG for DstRHS. +let Predicates = [HasLSE] in +multiclass LDOPregister_patterns_ord_dag { + def : Pat<(!cast(op#"_"#size#"_monotonic") GPR64sp:$Rn, SrcRHS), + (!cast(inst # suffix) DstRHS, GPR64sp:$Rn)>; + def : Pat<(!cast(op#"_"#size#"_acquire") GPR64sp:$Rn, SrcRHS), + (!cast(inst # "A" # suffix) DstRHS, GPR64sp:$Rn)>; + def : Pat<(!cast(op#"_"#size#"_release") GPR64sp:$Rn, SrcRHS), + (!cast(inst # "L" # suffix) DstRHS, GPR64sp:$Rn)>; + def : Pat<(!cast(op#"_"#size#"_acq_rel") GPR64sp:$Rn, SrcRHS), + (!cast(inst # "AL" # suffix) DstRHS, GPR64sp:$Rn)>; + def : Pat<(!cast(op#"_"#size#"_seq_cst") GPR64sp:$Rn, SrcRHS), + (!cast(inst # "AL" # suffix) DstRHS, GPR64sp:$Rn)>; +} + +multiclass LDOPregister_patterns_ord { + defm : LDOPregister_patterns_ord_dag; +} + +multiclass LDOPregister_patterns_ord_mod { + defm : LDOPregister_patterns_ord_dag; +} + +multiclass LDOPregister_patterns { + defm : LDOPregister_patterns_ord; + defm : LDOPregister_patterns_ord; + defm : LDOPregister_patterns_ord; + defm : LDOPregister_patterns_ord; +} + +multiclass LDOPregister_patterns_mod { + defm : LDOPregister_patterns_ord_mod(mod#Xrr) XZR, GPR64:$Rm))>; + defm : LDOPregister_patterns_ord_mod(mod#Wrr) WZR, GPR32:$Rm))>; + defm : LDOPregister_patterns_ord_mod(mod#Wrr) WZR, GPR32:$Rm))>; + defm : LDOPregister_patterns_ord_mod(mod#Wrr) WZR, GPR32:$Rm))>; +} + +let Predicates = [HasLSE] in +multiclass CASregister_patterns_ord_dag { + def : Pat<(!cast(op#"_"#size#"_monotonic") GPR64sp:$Rn, OLD, NEW), + (!cast(inst # suffix) OLD, NEW, GPR64sp:$Rn)>; + def : Pat<(!cast(op#"_"#size#"_acquire") GPR64sp:$Rn, OLD, NEW), + (!cast(inst # "A" # suffix) OLD, NEW, GPR64sp:$Rn)>; + def : Pat<(!cast(op#"_"#size#"_release") GPR64sp:$Rn, OLD, NEW), + (!cast(inst # "L" # suffix) OLD, NEW, GPR64sp:$Rn)>; + def : Pat<(!cast(op#"_"#size#"_acq_rel") GPR64sp:$Rn, OLD, NEW), + (!cast(inst # "AL" # suffix) OLD, NEW, GPR64sp:$Rn)>; + def : Pat<(!cast(op#"_"#size#"_seq_cst") GPR64sp:$Rn, OLD, NEW), + (!cast(inst # "AL" # suffix) OLD, NEW, GPR64sp:$Rn)>; +} + +multiclass CASregister_patterns_ord { + defm : CASregister_patterns_ord_dag; +} + +multiclass CASregister_patterns { + defm : CASregister_patterns_ord; + defm : CASregister_patterns_ord; + defm : CASregister_patterns_ord; + defm : CASregister_patterns_ord; +} + +let Predicates = [HasLSE] in +class BaseSTOPregister : + InstAlias; + +multiclass STOPregister { + def : BaseSTOPregister(instr # "LB")>; + def : BaseSTOPregister(instr # "LH")>; + def : BaseSTOPregister(instr # "LW")>; + def : BaseSTOPregister(instr # "LX")>; + def : BaseSTOPregister(instr # "B")>; + def : BaseSTOPregister(instr # "H")>; + def : BaseSTOPregister(instr # "W")>; + def : BaseSTOPregister(instr # "X")>; +} + +//---------------------------------------------------------------------------- +// Allow the size specifier tokens to be upper case, not just lower. +def : TokenAlias<".4B", ".4b">; // Add dot product +def : TokenAlias<".8B", ".8b">; +def : TokenAlias<".4H", ".4h">; +def : TokenAlias<".2S", ".2s">; +def : TokenAlias<".1D", ".1d">; +def : TokenAlias<".16B", ".16b">; +def : TokenAlias<".8H", ".8h">; +def : TokenAlias<".4S", ".4s">; +def : TokenAlias<".2D", ".2d">; +def : TokenAlias<".1Q", ".1q">; +def : TokenAlias<".2H", ".2h">; +def : TokenAlias<".B", ".b">; +def : TokenAlias<".H", ".h">; +def : TokenAlias<".S", ".s">; +def : TokenAlias<".D", ".d">; +def : TokenAlias<".Q", ".q">; diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td new file mode 100644 index 000000000..d6b8bb5d8 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td @@ -0,0 +1,6494 @@ +//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AArch64 Instruction definitions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ARM Instruction Predicate Definitions. +// +def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, + AssemblerPredicate<"HasV8_1aOps", "armv8.1a">; +def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, + AssemblerPredicate<"HasV8_2aOps", "armv8.2a">; +def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">, + AssemblerPredicate<"HasV8_3aOps", "armv8.3a">; +def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">, + AssemblerPredicate<"HasV8_4aOps", "armv8.4a">; +def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, + AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">; +def HasNEON : Predicate<"Subtarget->hasNEON()">, + AssemblerPredicate<"FeatureNEON", "neon">; +def HasCrypto : Predicate<"Subtarget->hasCrypto()">, + AssemblerPredicate<"FeatureCrypto", "crypto">; +def HasSM4 : Predicate<"Subtarget->hasSM4()">, + AssemblerPredicate<"FeatureSM4", "sm4">; +def HasSHA3 : Predicate<"Subtarget->hasSHA3()">, + AssemblerPredicate<"FeatureSHA3", "sha3">; +def HasSHA2 : Predicate<"Subtarget->hasSHA2()">, + AssemblerPredicate<"FeatureSHA2", "sha2">; +def HasAES : Predicate<"Subtarget->hasAES()">, + AssemblerPredicate<"FeatureAES", "aes">; +def HasDotProd : Predicate<"Subtarget->hasDotProd()">, + AssemblerPredicate<"FeatureDotProd", "dotprod">; +def HasCRC : Predicate<"Subtarget->hasCRC()">, + AssemblerPredicate<"FeatureCRC", "crc">; +def HasLSE : Predicate<"Subtarget->hasLSE()">, + AssemblerPredicate<"FeatureLSE", "lse">; +def HasRAS : Predicate<"Subtarget->hasRAS()">, + AssemblerPredicate<"FeatureRAS", "ras">; +def HasRDM : Predicate<"Subtarget->hasRDM()">, + AssemblerPredicate<"FeatureRDM", "rdm">; +def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">; +def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, + AssemblerPredicate<"FeatureFullFP16", "fullfp16">; +def HasSPE : Predicate<"Subtarget->hasSPE()">, + AssemblerPredicate<"FeatureSPE", "spe">; +def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">, + AssemblerPredicate<"FeatureFuseAES", + "fuse-aes">; +def HasSVE : Predicate<"Subtarget->hasSVE()">, + AssemblerPredicate<"FeatureSVE", "sve">; +def HasRCPC : Predicate<"Subtarget->hasRCPC()">, + AssemblerPredicate<"FeatureRCPC", "rcpc">; + +def IsLE : Predicate<"Subtarget->isLittleEndian()">; +def IsBE : Predicate<"!Subtarget->isLittleEndian()">; +def UseAlternateSExtLoadCVTF32 + : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">; + +def UseNegativeImmediates + : Predicate<"false">, AssemblerPredicate<"!FeatureNoNegativeImmediates", + "NegativeImmediates">; + + +//===----------------------------------------------------------------------===// +// AArch64-specific DAG Nodes. +// + +// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS +def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, + [SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, SDTCisVT<1, i32>]>; + +// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS +def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisInt<0>, + SDTCisVT<3, i32>]>; + +// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS +def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, + [SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, + SDTCisVT<1, i32>, + SDTCisVT<4, i32>]>; + +def SDT_AArch64Brcond : SDTypeProfile<0, 3, + [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>]>; +def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; +def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, + SDTCisVT<2, OtherVT>]>; + + +def SDT_AArch64CSel : SDTypeProfile<1, 4, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisInt<3>, + SDTCisVT<4, i32>]>; +def SDT_AArch64CCMP : SDTypeProfile<1, 5, + [SDTCisVT<0, i32>, + SDTCisInt<1>, + SDTCisSameAs<1, 2>, + SDTCisInt<3>, + SDTCisInt<4>, + SDTCisVT<5, i32>]>; +def SDT_AArch64FCCMP : SDTypeProfile<1, 5, + [SDTCisVT<0, i32>, + SDTCisFP<1>, + SDTCisSameAs<1, 2>, + SDTCisInt<3>, + SDTCisInt<4>, + SDTCisVT<5, i32>]>; +def SDT_AArch64FCmp : SDTypeProfile<0, 2, + [SDTCisFP<0>, + SDTCisSameAs<0, 1>]>; +def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; +def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; +def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>]>; +def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; +def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; +def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisInt<2>, SDTCisInt<3>]>; +def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; +def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, SDTCisInt<3>]>; +def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; + +def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; +def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; +def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; +def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>; +def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, + SDTCisSameAs<0,3>]>; +def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; +def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; + +def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; + +def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, + SDTCisPtrTy<1>]>; + +// Generates the general dynamic sequences, i.e. +// adrp x0, :tlsdesc:var +// ldr x1, [x0, #:tlsdesc_lo12:var] +// add x0, x0, #:tlsdesc_lo12:var +// .tlsdesccall var +// blr x1 + +// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here) +// number of operands (the variable) +def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1, + [SDTCisPtrTy<0>]>; + +def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, + [SDTCisVT<0, i64>, SDTCisVT<1, i32>, + SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, + SDTCisSameAs<1, 4>]>; + + +// Node definitions. +def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; +def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; +def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; +def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", + SDCallSeqStart<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>, + [SDNPHasChain, SDNPOutGlue]>; +def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", + SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64call : SDNode<"AArch64ISD::CALL", + SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, + [SDNPHasChain]>; +def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, + [SDNPHasChain]>; +def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, + [SDNPHasChain]>; +def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz, + [SDNPHasChain]>; +def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, + [SDNPHasChain]>; + + +def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; +def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; +def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; +def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; +def AArch64retflag : SDNode<"AArch64ISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; +def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; +def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, + [SDNPCommutative]>; +def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; +def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, + [SDNPCommutative]>; +def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; +def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; + +def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; +def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; +def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; + +def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; + +def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; + +def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; +def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; +def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; +def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; +def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; + +def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; +def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; +def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; +def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; +def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; +def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; + +def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; +def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; +def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; +def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>; +def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>; +def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>; +def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>; + +def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>; +def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>; +def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; +def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; + +def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; +def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; +def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; +def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; +def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; +def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; +def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; +def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; + +def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>; +def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; +def AArch64bsl: SDNode<"AArch64ISD::BSL", SDT_AArch64trivec>; + +def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; +def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; +def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>; +def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>; +def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>; + +def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; +def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; +def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; + +def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>; +def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>; +def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>; +def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; +def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; +def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), + (AArch64not (AArch64cmeqz (and node:$LHS, node:$RHS)))>; + +def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; +def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; +def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>; +def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>; +def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>; + +def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; +def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; + +def AArch64neg : SDNode<"AArch64ISD::NEG", SDT_AArch64unvec>; + +def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, + [SDNPHasChain, SDNPSideEffect]>; + +def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; +def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; + +def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", + SDT_AArch64TLSDescCallSeq, + [SDNPInGlue, SDNPOutGlue, SDNPHasChain, + SDNPVariadic]>; + + +def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", + SDT_AArch64WrapperLarge>; + +def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>; + +def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, + SDTCisSameAs<1, 2>]>; +def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>; +def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>; + +def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>; +def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>; +def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>; +def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>; + +def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; +def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; +def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; +def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; +def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; +def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; + +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// + +// AArch64 Instruction Predicate Definitions. +// We could compute these on a per-module basis but doing so requires accessing +// the Function object through the Subtarget and objections were raised +// to that (see post-commit review comments for r301750). +let RecomputePerFunction = 1 in { + def ForCodeSize : Predicate<"MF->getFunction().optForSize()">; + def NotForCodeSize : Predicate<"!MF->getFunction().optForSize()">; + // Avoid generating STRQro if it is slow, unless we're optimizing for code size. + def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction().optForSize()">; +} + +include "AArch64InstrFormats.td" +include "SVEInstrFormats.td" + +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Miscellaneous instructions. +//===----------------------------------------------------------------------===// + +let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { +// We set Sched to empty list because we expect these instructions to simply get +// removed in most cases. +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(AArch64callseq_start timm:$amt1, timm:$amt2)]>, + Sched<[]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(AArch64callseq_end timm:$amt1, timm:$amt2)]>, + Sched<[]>; +} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 + +let isReMaterializable = 1, isCodeGenOnly = 1 in { +// FIXME: The following pseudo instructions are only needed because remat +// cannot handle multiple instructions. When that changes, they can be +// removed, along with the AArch64Wrapper node. + +let AddedComplexity = 10 in +def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr), + [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, + Sched<[WriteLDAdr]>; + +// The MOVaddr instruction should match only when the add is not folded +// into a load or store address. +def MOVaddr + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), + tglobaladdr:$low))]>, + Sched<[WriteAdrAdr]>; +def MOVaddrJT + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), + tjumptable:$low))]>, + Sched<[WriteAdrAdr]>; +def MOVaddrCP + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), + tconstpool:$low))]>, + Sched<[WriteAdrAdr]>; +def MOVaddrBA + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), + tblockaddress:$low))]>, + Sched<[WriteAdrAdr]>; +def MOVaddrTLS + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), + tglobaltlsaddr:$low))]>, + Sched<[WriteAdrAdr]>; +def MOVaddrEXT + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), + texternalsym:$low))]>, + Sched<[WriteAdrAdr]>; +// Normally AArch64addlow either gets folded into a following ldr/str, +// or together with an adrp into MOVaddr above. For cases with TLS, it +// might appear without either of them, so allow lowering it into a plain +// add. +def ADDlowTLS + : Pseudo<(outs GPR64:$dst), (ins GPR64:$src, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow GPR64:$src, + tglobaltlsaddr:$low))]>, + Sched<[WriteAdr]>; + +} // isReMaterializable, isCodeGenOnly + +def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), + (LOADgot tglobaltlsaddr:$addr)>; + +def : Pat<(AArch64LOADgot texternalsym:$addr), + (LOADgot texternalsym:$addr)>; + +def : Pat<(AArch64LOADgot tconstpool:$addr), + (LOADgot tconstpool:$addr)>; + +//===----------------------------------------------------------------------===// +// System instructions. +//===----------------------------------------------------------------------===// + +def HINT : HintI<"hint">; +def : InstAlias<"nop", (HINT 0b000)>; +def : InstAlias<"yield",(HINT 0b001)>; +def : InstAlias<"wfe", (HINT 0b010)>; +def : InstAlias<"wfi", (HINT 0b011)>; +def : InstAlias<"sev", (HINT 0b100)>; +def : InstAlias<"sevl", (HINT 0b101)>; +def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>; +def : InstAlias<"csdb", (HINT 20)>; + +// v8.2a Statistical Profiling extension +def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>; + +// As far as LLVM is concerned this writes to the system's exclusive monitors. +let mayLoad = 1, mayStore = 1 in +def CLREX : CRmSystemI; + +// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot +// model patterns with sufficiently fine granularity. +let mayLoad = ?, mayStore = ? in { +def DMB : CRmSystemI; + +def DSB : CRmSystemI; + +def ISB : CRmSystemI; + +def TSB : CRmSystemI { + let CRm = 0b0010; + let Inst{12} = 0; + let Predicates = [HasV8_4a]; +} +} + +// ARMv8.2 Dot Product +let Predicates = [HasDotProd] in { +defm SDOT : SIMDThreeSameVectorDot<0, "sdot", int_aarch64_neon_sdot>; +defm UDOT : SIMDThreeSameVectorDot<1, "udot", int_aarch64_neon_udot>; +defm SDOTlane : SIMDThreeSameVectorDotIndex<0, "sdot", int_aarch64_neon_sdot>; +defm UDOTlane : SIMDThreeSameVectorDotIndex<1, "udot", int_aarch64_neon_udot>; +} + +// Armv8.2-A Crypto extensions +let Predicates = [HasSHA3] in { +def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">; +def SHA512H2 : CryptoRRRTied<0b0, 0b01, "sha512h2">; +def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">; +def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">; +def RAX1 : CryptoRRR_2D<0b0,0b11, "rax1">; +def EOR3 : CryptoRRRR_16B<0b00, "eor3">; +def BCAX : CryptoRRRR_16B<0b01, "bcax">; +def XAR : CryptoRRRi6<"xar">; +} // HasSHA3 + +let Predicates = [HasSM4] in { +def SM3TT1A : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">; +def SM3TT1B : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">; +def SM3TT2A : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">; +def SM3TT2B : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">; +def SM3SS1 : CryptoRRRR_4S<0b10, "sm3ss1">; +def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">; +def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">; +def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">; +def SM4E : CryptoRRTied_4S<0b0, 0b01, "sm4e">; +} // HasSM4 + +let Predicates = [HasRCPC] in { + // v8.3 Release Consistent Processor Consistent support, optional in v8.2. + def LDAPRB : RCPCLoad<0b00, "ldaprb", GPR32>; + def LDAPRH : RCPCLoad<0b01, "ldaprh", GPR32>; + def LDAPRW : RCPCLoad<0b10, "ldapr", GPR32>; + def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>; +} + +// v8.3a complex add and multiply-accumulate. No predicate here, that is done +// inside the multiclass as the FP16 versions need different predicates. +defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop, + "fcmla", null_frag>; +defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd, + "fcadd", null_frag>; +defm FCMLA : SIMDIndexedTiedComplexHSD<1, 0, 1, complexrotateop, "fcmla", + null_frag>; + +// v8.3a Pointer Authentication +// These instructions inhabit part of the hint space and so can be used for +// armv8 targets +let Uses = [LR], Defs = [LR] in { + def PACIAZ : SystemNoOperands<0b000, "paciaz">; + def PACIBZ : SystemNoOperands<0b010, "pacibz">; + def AUTIAZ : SystemNoOperands<0b100, "autiaz">; + def AUTIBZ : SystemNoOperands<0b110, "autibz">; +} +let Uses = [LR, SP], Defs = [LR] in { + def PACIASP : SystemNoOperands<0b001, "paciasp">; + def PACIBSP : SystemNoOperands<0b011, "pacibsp">; + def AUTIASP : SystemNoOperands<0b101, "autiasp">; + def AUTIBSP : SystemNoOperands<0b111, "autibsp">; +} +let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in { + def PACIA1716 : SystemNoOperands<0b000, "pacia1716">; + def PACIB1716 : SystemNoOperands<0b010, "pacib1716">; + def AUTIA1716 : SystemNoOperands<0b100, "autia1716">; + def AUTIB1716 : SystemNoOperands<0b110, "autib1716">; +} + +let Uses = [LR], Defs = [LR], CRm = 0b0000 in { + def XPACLRI : SystemNoOperands<0b111, "xpaclri">; +} + +// These pointer authentication isntructions require armv8.3a +let Predicates = [HasV8_3a] in { + multiclass SignAuth prefix, bits<3> prefix_z, string asm> { + def IA : SignAuthOneData; + def IB : SignAuthOneData; + def DA : SignAuthOneData; + def DB : SignAuthOneData; + def IZA : SignAuthZero; + def DZA : SignAuthZero; + def IZB : SignAuthZero; + def DZB : SignAuthZero; + } + + defm PAC : SignAuth<0b000, 0b010, "pac">; + defm AUT : SignAuth<0b001, 0b011, "aut">; + + def XPACI : SignAuthZero<0b100, 0b00, "xpaci">; + def XPACD : SignAuthZero<0b100, 0b01, "xpacd">; + def PACGA : SignAuthTwoOperand<0b1100, "pacga", null_frag>; + + // Combined Instructions + def BRAA : AuthBranchTwoOperands<0, 0, "braa">; + def BRAB : AuthBranchTwoOperands<0, 1, "brab">; + def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">; + def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">; + + def BRAAZ : AuthOneOperand<0b000, 0, "braaz">; + def BRABZ : AuthOneOperand<0b000, 1, "brabz">; + def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">; + def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">; + + let isReturn = 1, isTerminator = 1, isBarrier = 1 in { + def RETAA : AuthReturn<0b010, 0, "retaa">; + def RETAB : AuthReturn<0b010, 1, "retab">; + def ERETAA : AuthReturn<0b100, 0, "eretaa">; + def ERETAB : AuthReturn<0b100, 1, "eretab">; + } + + defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>; + defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>; + + // v8.3a floating point conversion for javascript + let Predicates = [HasV8_3a, HasFPARMv8] in + def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32, + "fjcvtzs", []> { + let Inst{31} = 0; + } + +} // HasV8_3a + +// v8.4 Flag manipulation instructions +let Predicates = [HasV8_4a] in { +def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> { + let Inst{20-5} = 0b0000001000000000; +} +def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">; +def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">; +def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif", + "{\t$Rn, $imm, $mask}">; +} // HasV8_4a + +def : InstAlias<"clrex", (CLREX 0xf)>; +def : InstAlias<"isb", (ISB 0xf)>; + +def MRS : MRSI; +def MSR : MSRI; +def MSRpstateImm1 : MSRpstateImm0_1; +def MSRpstateImm4 : MSRpstateImm0_15; + +// The thread pointer (on Linux, at least, where this has been implemented) is +// TPIDR_EL0. +def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), + [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>; + +// The cycle counter PMC register is PMCCNTR_EL0. +let Predicates = [HasPerfMon] in +def : Pat<(readcyclecounter), (MRS 0xdce8)>; + +// FPCR register +def : Pat<(i64 (int_aarch64_get_fpcr)), (MRS 0xda20)>; + +// Generic system instructions +def SYSxt : SystemXtI<0, "sys">; +def SYSLxt : SystemLXtI<1, "sysl">; + +def : InstAlias<"sys $op1, $Cn, $Cm, $op2", + (SYSxt imm0_7:$op1, sys_cr_op:$Cn, + sys_cr_op:$Cm, imm0_7:$op2, XZR)>; + +//===----------------------------------------------------------------------===// +// Move immediate instructions. +//===----------------------------------------------------------------------===// + +defm MOVK : InsertImmediate<0b11, "movk">; +defm MOVN : MoveImmediate<0b00, "movn">; + +let PostEncoderMethod = "fixMOVZ" in +defm MOVZ : MoveImmediate<0b10, "movz">; + +// First group of aliases covers an implicit "lsl #0". +def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0), 0>; +def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0), 0>; +def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>; +def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>; +def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>; +def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>; + +// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; + +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; + +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g3:$sym, 48), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0), 0>; + +def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; +def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; + +def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; +def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; + +def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0), 0>; + +// Final group of aliases covers true "mov $Rd, $imm" cases. +multiclass movw_mov_alias { + def _asmoperand : AsmOperandClass { + let Name = basename # width # "_lsl" # shift # "MovAlias"; + let PredicateMethod = "is" # basename # "MovAlias<" # width # ", " + # shift # ">"; + let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">"; + } + + def _movimm : Operand { + let ParserMatchClass = !cast(NAME # "_asmoperand"); + } + + def : InstAlias<"mov $Rd, $imm", + (INST GPR:$Rd, !cast(NAME # "_movimm"):$imm, shift)>; +} + +defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>; +defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>; + +defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>; +defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>; +defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>; +defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>; + +defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>; +defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>; + +defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>; +defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>; +defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>; +defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>; + +let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, + isAsCheapAsAMove = 1 in { +// FIXME: The following pseudo instructions are only needed because remat +// cannot handle multiple instructions. When that changes, we can select +// directly to the real instructions and get rid of these pseudos. + +def MOVi32imm + : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), + [(set GPR32:$dst, imm:$src)]>, + Sched<[WriteImm]>; +def MOVi64imm + : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), + [(set GPR64:$dst, imm:$src)]>, + Sched<[WriteImm]>; +} // isReMaterializable, isCodeGenOnly + +// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the +// eventual expansion code fewer bits to worry about getting right. Marshalling +// the types is a little tricky though: +def i64imm_32bit : ImmLeaf(Imm); +}]>; + +def s64imm_32bit : ImmLeaf(Imm); + return Imm64 >= std::numeric_limits::min() && + Imm64 <= std::numeric_limits::max(); +}]>; + +def trunc_imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">, + GISDNodeXFormEquiv; + +def : Pat<(i64 i64imm_32bit:$src), + (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; + +// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model). +def bitcast_fpimm_to_i32 : SDNodeXFormgetTargetConstant( + N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +def bitcast_fpimm_to_i64 : SDNodeXFormgetTargetConstant( + N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); +}]>; + + +def : Pat<(f32 fpimm:$in), + (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>; +def : Pat<(f64 fpimm:$in), + (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>; + + +// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK +// sequences. +def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, + tglobaladdr:$g1, tglobaladdr:$g0), + (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0), + tglobaladdr:$g1, 16), + tglobaladdr:$g2, 32), + tglobaladdr:$g3, 48)>; + +def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, + tblockaddress:$g1, tblockaddress:$g0), + (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0), + tblockaddress:$g1, 16), + tblockaddress:$g2, 32), + tblockaddress:$g3, 48)>; + +def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2, + tconstpool:$g1, tconstpool:$g0), + (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0), + tconstpool:$g1, 16), + tconstpool:$g2, 32), + tconstpool:$g3, 48)>; + +def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2, + tjumptable:$g1, tjumptable:$g0), + (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0), + tjumptable:$g1, 16), + tjumptable:$g2, 32), + tjumptable:$g3, 48)>; + + +//===----------------------------------------------------------------------===// +// Arithmetic instructions. +//===----------------------------------------------------------------------===// + +// Add/subtract with carry. +defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>; +defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>; + +def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; +def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; +def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; +def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; + +// Add/subtract +defm ADD : AddSub<0, "add", "sub", add>; +defm SUB : AddSub<1, "sub", "add">; + +def : InstAlias<"mov $dst, $src", + (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; +def : InstAlias<"mov $dst, $src", + (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>; +def : InstAlias<"mov $dst, $src", + (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>; +def : InstAlias<"mov $dst, $src", + (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; + +defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">; +defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; + +// Use SUBS instead of SUB to enable CSE between SUBS and SUB. +def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), + (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; +def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), + (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; +def : Pat<(sub GPR32:$Rn, GPR32:$Rm), + (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(sub GPR64:$Rn, GPR64:$Rm), + (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; +def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), + (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; +def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), + (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; +let AddedComplexity = 1 in { +def : Pat<(sub GPR32sp:$R2, arith_extended_reg32:$R3), + (SUBSWrx GPR32sp:$R2, arith_extended_reg32:$R3)>; +def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64:$R3), + (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64:$R3)>; +} + +// Because of the immediate format for add/sub-imm instructions, the +// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). +// These patterns capture that transformation. +let AddedComplexity = 1 in { +def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), + (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; +def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), + (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; +def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), + (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; +def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), + (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; +} + +// Because of the immediate format for add/sub-imm instructions, the +// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). +// These patterns capture that transformation. +let AddedComplexity = 1 in { +def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), + (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; +def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), + (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; +def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), + (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; +def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), + (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; +} + +def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; +def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; +def : InstAlias<"neg $dst, $src$shift", + (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; +def : InstAlias<"neg $dst, $src$shift", + (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; + +def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; +def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; +def : InstAlias<"negs $dst, $src$shift", + (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; +def : InstAlias<"negs $dst, $src$shift", + (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; + + +// Unsigned/Signed divide +defm UDIV : Div<0, "udiv", udiv>; +defm SDIV : Div<1, "sdiv", sdiv>; + +def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>; +def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>; + +// Variable shift +defm ASRV : Shift<0b10, "asr", sra>; +defm LSLV : Shift<0b00, "lsl", shl>; +defm LSRV : Shift<0b01, "lsr", srl>; +defm RORV : Shift<0b11, "ror", rotr>; + +def : ShiftAlias<"asrv", ASRVWr, GPR32>; +def : ShiftAlias<"asrv", ASRVXr, GPR64>; +def : ShiftAlias<"lslv", LSLVWr, GPR32>; +def : ShiftAlias<"lslv", LSLVXr, GPR64>; +def : ShiftAlias<"lsrv", LSRVWr, GPR32>; +def : ShiftAlias<"lsrv", LSRVXr, GPR64>; +def : ShiftAlias<"rorv", RORVWr, GPR32>; +def : ShiftAlias<"rorv", RORVXr, GPR64>; + +// Multiply-add +let AddedComplexity = 5 in { +defm MADD : MulAccum<0, "madd", add>; +defm MSUB : MulAccum<1, "msub", sub>; + +def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), + (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; +def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), + (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; + +def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), + (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; +def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), + (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; +def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)), + (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; +def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)), + (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; +} // AddedComplexity = 5 + +let AddedComplexity = 5 in { +def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; +def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; +def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; +def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; + +def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), + (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; +def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), + (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; + +def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), + (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; +def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), + (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; + +def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))), + (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; +def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))), + (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; +def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))), + (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), + (MOVi32imm (trunc_imm imm:$C)), XZR)>; + +def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), + (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; +def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), + (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; +def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))), + (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), + (MOVi32imm (trunc_imm imm:$C)), XZR)>; + +def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)), + (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; +def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)), + (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; +def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)), + GPR64:$Ra)), + (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), + (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; + +def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), + (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; +def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), + (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; +def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32), + (s64imm_32bit:$C)))), + (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), + (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; +} // AddedComplexity = 5 + +def : MulAccumWAlias<"mul", MADDWrrr>; +def : MulAccumXAlias<"mul", MADDXrrr>; +def : MulAccumWAlias<"mneg", MSUBWrrr>; +def : MulAccumXAlias<"mneg", MSUBXrrr>; +def : WideMulAccumAlias<"smull", SMADDLrrr>; +def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; +def : WideMulAccumAlias<"umull", UMADDLrrr>; +def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; + +// Multiply-high +def SMULHrr : MulHi<0b010, "smulh", mulhs>; +def UMULHrr : MulHi<0b110, "umulh", mulhu>; + +// CRC32 +def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">; +def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">; +def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">; +def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">; + +def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">; +def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; +def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; +def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; + +// v8.1 atomic CAS +defm CAS : CompareAndSwap<0, 0, "">; +defm CASA : CompareAndSwap<1, 0, "a">; +defm CASL : CompareAndSwap<0, 1, "l">; +defm CASAL : CompareAndSwap<1, 1, "al">; + +// v8.1 atomic CASP +defm CASP : CompareAndSwapPair<0, 0, "">; +defm CASPA : CompareAndSwapPair<1, 0, "a">; +defm CASPL : CompareAndSwapPair<0, 1, "l">; +defm CASPAL : CompareAndSwapPair<1, 1, "al">; + +// v8.1 atomic SWP +defm SWP : Swap<0, 0, "">; +defm SWPA : Swap<1, 0, "a">; +defm SWPL : Swap<0, 1, "l">; +defm SWPAL : Swap<1, 1, "al">; + +// v8.1 atomic LD(register). Performs load and then ST(register) +defm LDADD : LDOPregister<0b000, "add", 0, 0, "">; +defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">; +defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">; +defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">; + +defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">; +defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">; +defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">; +defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">; + +defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">; +defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">; +defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">; +defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">; + +defm LDSET : LDOPregister<0b011, "set", 0, 0, "">; +defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">; +defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">; +defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">; + +defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">; +defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">; +defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">; +defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">; + +defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">; +defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">; +defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">; +defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">; + +defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">; +defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">; +defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">; +defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">; + +defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">; +defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">; +defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">; +defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">; + +// v8.1 atomic ST(register) as aliases to "LD(register) when Rt=xZR" +defm : STOPregister<"stadd","LDADD">; // STADDx +defm : STOPregister<"stclr","LDCLR">; // STCLRx +defm : STOPregister<"steor","LDEOR">; // STEORx +defm : STOPregister<"stset","LDSET">; // STSETx +defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx +defm : STOPregister<"stsmin","LDSMIN">;// STSMINx +defm : STOPregister<"stumax","LDUMAX">;// STUMAXx +defm : STOPregister<"stumin","LDUMIN">;// STUMINx + +//===----------------------------------------------------------------------===// +// Logical instructions. +//===----------------------------------------------------------------------===// + +// (immediate) +defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">; +defm AND : LogicalImm<0b00, "and", and, "bic">; +defm EOR : LogicalImm<0b10, "eor", xor, "eon">; +defm ORR : LogicalImm<0b01, "orr", or, "orn">; + +// FIXME: these aliases *are* canonical sometimes (when movz can't be +// used). Actually, it seems to be working right now, but putting logical_immXX +// here is a bit dodgy on the AsmParser side too. +def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, + logical_imm32:$imm), 0>; +def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, + logical_imm64:$imm), 0>; + + +// (register) +defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; +defm BICS : LogicalRegS<0b11, 1, "bics", + BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>; +defm AND : LogicalReg<0b00, 0, "and", and>; +defm BIC : LogicalReg<0b00, 1, "bic", + BinOpFrag<(and node:$LHS, (not node:$RHS))>>; +defm EON : LogicalReg<0b10, 1, "eon", + BinOpFrag<(not (xor node:$LHS, node:$RHS))>>; +defm EOR : LogicalReg<0b10, 0, "eor", xor>; +defm ORN : LogicalReg<0b01, 1, "orn", + BinOpFrag<(or node:$LHS, (not node:$RHS))>>; +defm ORR : LogicalReg<0b01, 0, "orr", or>; + +def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>; +def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>; + +def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>; +def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>; + +def : InstAlias<"mvn $Wd, $Wm$sh", + (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>; +def : InstAlias<"mvn $Xd, $Xm$sh", + (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>; + +def : InstAlias<"tst $src1, $src2", + (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>; +def : InstAlias<"tst $src1, $src2", + (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>; + +def : InstAlias<"tst $src1, $src2", + (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>; +def : InstAlias<"tst $src1, $src2", + (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>; + +def : InstAlias<"tst $src1, $src2$sh", + (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>; +def : InstAlias<"tst $src1, $src2$sh", + (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>; + + +def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; +def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; + + +//===----------------------------------------------------------------------===// +// One operand data processing instructions. +//===----------------------------------------------------------------------===// + +defm CLS : OneOperandData<0b101, "cls">; +defm CLZ : OneOperandData<0b100, "clz", ctlz>; +defm RBIT : OneOperandData<0b000, "rbit", bitreverse>; + +def REV16Wr : OneWRegData<0b001, "rev16", + UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; +def REV16Xr : OneXRegData<0b001, "rev16", null_frag>; + +def : Pat<(cttz GPR32:$Rn), + (CLZWr (RBITWr GPR32:$Rn))>; +def : Pat<(cttz GPR64:$Rn), + (CLZXr (RBITXr GPR64:$Rn))>; +def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)), + (i32 1))), + (CLSWr GPR32:$Rn)>; +def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), + (i64 1))), + (CLSXr GPR64:$Rn)>; + +// Unlike the other one operand instructions, the instructions with the "rev" +// mnemonic do *not* just different in the size bit, but actually use different +// opcode bits for the different sizes. +def REVWr : OneWRegData<0b010, "rev", bswap>; +def REVXr : OneXRegData<0b011, "rev", bswap>; +def REV32Xr : OneXRegData<0b010, "rev32", + UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; + +def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>; + +// The bswap commutes with the rotr so we want a pattern for both possible +// orders. +def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; +def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; + +//===----------------------------------------------------------------------===// +// Bitfield immediate extraction instruction. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in +defm EXTR : ExtractImm<"extr">; +def : InstAlias<"ror $dst, $src, $shift", + (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; +def : InstAlias<"ror $dst, $src, $shift", + (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; + +def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), + (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; +def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), + (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; + +//===----------------------------------------------------------------------===// +// Other bitfield immediate instructions. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in { +defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; +defm SBFM : BitfieldImm<0b00, "sbfm">; +defm UBFM : BitfieldImm<0b10, "ubfm">; +} + +def i32shift_a : Operand, SDNodeXFormgetZExtValue()) & 0x1f; + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +def i32shift_b : Operand, SDNodeXFormgetZExtValue(); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +// min(7, 31 - shift_amt) +def i32shift_sext_i8 : Operand, SDNodeXFormgetZExtValue(); + enc = enc > 7 ? 7 : enc; + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +// min(15, 31 - shift_amt) +def i32shift_sext_i16 : Operand, SDNodeXFormgetZExtValue(); + enc = enc > 15 ? 15 : enc; + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +def i64shift_a : Operand, SDNodeXFormgetZExtValue()) & 0x3f; + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +def i64shift_b : Operand, SDNodeXFormgetZExtValue(); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +// min(7, 63 - shift_amt) +def i64shift_sext_i8 : Operand, SDNodeXFormgetZExtValue(); + enc = enc > 7 ? 7 : enc; + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +// min(15, 63 - shift_amt) +def i64shift_sext_i16 : Operand, SDNodeXFormgetZExtValue(); + enc = enc > 15 ? 15 : enc; + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +// min(31, 63 - shift_amt) +def i64shift_sext_i32 : Operand, SDNodeXFormgetZExtValue(); + enc = enc > 31 ? 31 : enc; + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), + (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), + (i64 (i32shift_b imm0_31:$imm)))>; +def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), + (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), + (i64 (i64shift_b imm0_63:$imm)))>; + +let AddedComplexity = 10 in { +def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), + (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; +def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), + (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; +} + +def : InstAlias<"asr $dst, $src, $shift", + (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; +def : InstAlias<"asr $dst, $src, $shift", + (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; +def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; +def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; +def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; +def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; +def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; + +def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), + (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; +def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), + (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; + +def : InstAlias<"lsr $dst, $src, $shift", + (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; +def : InstAlias<"lsr $dst, $src, $shift", + (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; +def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; +def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; +def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; +def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; +def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; + +//===----------------------------------------------------------------------===// +// Conditional comparison instructions. +//===----------------------------------------------------------------------===// +defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; +defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; + +//===----------------------------------------------------------------------===// +// Conditional select instructions. +//===----------------------------------------------------------------------===// +defm CSEL : CondSelect<0, 0b00, "csel">; + +def inc : PatFrag<(ops node:$in), (add node:$in, 1)>; +defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; +defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; +defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; + +def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), + (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; +def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), + (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; +def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), + (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; +def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), + (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; +def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), + (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; +def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), + (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; + +def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), + (CSINCWr WZR, WZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), + (CSINCXr XZR, XZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV), + (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV), + (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV), + (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; +def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV), + (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; +def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), + (CSINVWr WZR, WZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), + (CSINVXr XZR, XZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV), + (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV), + (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV), + (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; +def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV), + (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; + +// The inverse of the condition code from the alias instruction is what is used +// in the aliased instruction. The parser all ready inverts the condition code +// for these aliases. +def : InstAlias<"cset $dst, $cc", + (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; +def : InstAlias<"cset $dst, $cc", + (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; + +def : InstAlias<"csetm $dst, $cc", + (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; +def : InstAlias<"csetm $dst, $cc", + (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; + +def : InstAlias<"cinc $dst, $src, $cc", + (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; +def : InstAlias<"cinc $dst, $src, $cc", + (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; + +def : InstAlias<"cinv $dst, $src, $cc", + (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; +def : InstAlias<"cinv $dst, $src, $cc", + (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; + +def : InstAlias<"cneg $dst, $src, $cc", + (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; +def : InstAlias<"cneg $dst, $src, $cc", + (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; + +//===----------------------------------------------------------------------===// +// PC-relative instructions. +//===----------------------------------------------------------------------===// +let isReMaterializable = 1 in { +let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { +def ADR : ADRI<0, "adr", adrlabel, []>; +} // hasSideEffects = 0 + +def ADRP : ADRI<1, "adrp", adrplabel, + [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>; +} // isReMaterializable = 1 + +// page address of a constant pool entry, block address +def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; +def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; +def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>; + +//===----------------------------------------------------------------------===// +// Unconditional branch (register) instructions. +//===----------------------------------------------------------------------===// + +let isReturn = 1, isTerminator = 1, isBarrier = 1 in { +def RET : BranchReg<0b0010, "ret", []>; +def DRPS : SpecialReturn<0b0101, "drps">; +def ERET : SpecialReturn<0b0100, "eret">; +} // isReturn = 1, isTerminator = 1, isBarrier = 1 + +// Default to the LR register. +def : InstAlias<"ret", (RET LR)>; + +let isCall = 1, Defs = [LR], Uses = [SP] in { +def BLR : BranchReg<0b0001, "blr", [(AArch64call GPR64:$Rn)]>; +} // isCall + +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { +def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; +} // isBranch, isTerminator, isBarrier, isIndirectBranch + +// Create a separate pseudo-instruction for codegen to use so that we don't +// flag lr as used in every function. It'll be restored before the RET by the +// epilogue if it's legitimately used. +def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]>, + Sched<[WriteBrReg]> { + let isTerminator = 1; + let isBarrier = 1; + let isReturn = 1; +} + +// This is a directive-like pseudo-instruction. The purpose is to insert an +// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction +// (which in the usual case is a BLR). +let hasSideEffects = 1 in +def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> { + let AsmString = ".tlsdesccall $sym"; +} + +// FIXME: maybe the scratch register used shouldn't be fixed to X1? +// FIXME: can "hasSideEffects be dropped? +let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, + isCodeGenOnly = 1 in +def TLSDESC_CALLSEQ + : Pseudo<(outs), (ins i64imm:$sym), + [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>, + Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>; +def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), + (TLSDESC_CALLSEQ texternalsym:$sym)>; + +//===----------------------------------------------------------------------===// +// Conditional branch (immediate) instruction. +//===----------------------------------------------------------------------===// +def Bcc : BranchCond; + +//===----------------------------------------------------------------------===// +// Compare-and-branch instructions. +//===----------------------------------------------------------------------===// +defm CBZ : CmpBranch<0, "cbz", AArch64cbz>; +defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>; + +//===----------------------------------------------------------------------===// +// Test-bit-and-branch instructions. +//===----------------------------------------------------------------------===// +defm TBZ : TestBranch<0, "tbz", AArch64tbz>; +defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>; + +//===----------------------------------------------------------------------===// +// Unconditional branch (immediate) instructions. +//===----------------------------------------------------------------------===// +let isBranch = 1, isTerminator = 1, isBarrier = 1 in { +def B : BranchImm<0, "b", [(br bb:$addr)]>; +} // isBranch, isTerminator, isBarrier + +let isCall = 1, Defs = [LR], Uses = [SP] in { +def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>; +} // isCall +def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; + +//===----------------------------------------------------------------------===// +// Exception generation instructions. +//===----------------------------------------------------------------------===// +let isTrap = 1 in { +def BRK : ExceptionGeneration<0b001, 0b00, "brk">; +} +def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; +def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; +def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">; +def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; +def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; +def SMC : ExceptionGeneration<0b000, 0b11, "smc">; +def SVC : ExceptionGeneration<0b000, 0b01, "svc">; + +// DCPSn defaults to an immediate operand of zero if unspecified. +def : InstAlias<"dcps1", (DCPS1 0)>; +def : InstAlias<"dcps2", (DCPS2 0)>; +def : InstAlias<"dcps3", (DCPS3 0)>; + +//===----------------------------------------------------------------------===// +// Load instructions. +//===----------------------------------------------------------------------===// + +// Pair (indexed, offset) +defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">; +defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">; +defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">; +defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">; +defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">; + +defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">; + +// Pair (pre-indexed) +def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">; +def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">; +def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; +def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; +def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; + +def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; + +// Pair (post-indexed) +def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">; +def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">; +def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; +def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; +def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; + +def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; + + +// Pair (no allocate) +defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">; +defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">; +defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">; +defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">; +defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">; + +//--- +// (register offset) +//--- + +// Integer +defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>; +defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>; +defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>; +defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; + +// Floating-point +defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", untyped, load>; +defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>; +defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>; +defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>; +defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>; + +// Load sign-extended half-word +defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>; +defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>; + +// Load sign-extended byte +defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>; +defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>; + +// Load sign-extended word +defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>; + +// Pre-fetch. +defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">; + +// For regular load, we do not have any alignment requirement. +// Thus, it is safe to directly map the vector loads with interesting +// addressing modes. +// FIXME: We could do the same for bitconvert to floating point vectors. +multiclass ScalToVecROLoadPat { + def : Pat<(VecTy (scalar_to_vector (ScalTy + (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))), + (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), + (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset), + sub)>; + + def : Pat<(VecTy (scalar_to_vector (ScalTy + (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))), + (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), + (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset), + sub)>; +} + +let AddedComplexity = 10 in { +defm : ScalToVecROLoadPat; +defm : ScalToVecROLoadPat; + +defm : ScalToVecROLoadPat; +defm : ScalToVecROLoadPat; + +defm : ScalToVecROLoadPat; +defm : ScalToVecROLoadPat; + +defm : ScalToVecROLoadPat; +defm : ScalToVecROLoadPat; + +defm : ScalToVecROLoadPat; +defm : ScalToVecROLoadPat; + +defm : ScalToVecROLoadPat; + +defm : ScalToVecROLoadPat; + + +def : Pat <(v1i64 (scalar_to_vector (i64 + (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend64:$extend))))), + (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; + +def : Pat <(v1i64 (scalar_to_vector (i64 + (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend64:$extend))))), + (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; +} + +// Match all load 64 bits width whose type is compatible with FPR64 +multiclass VecROLoadPat { + + def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), + (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; + + def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), + (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; +} + +let AddedComplexity = 10 in { +let Predicates = [IsLE] in { + // We must do vector loads with LD1 in big-endian. + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; +} + +defm : VecROLoadPat; +defm : VecROLoadPat; + +// Match all load 128 bits width whose type is compatible with FPR128 +let Predicates = [IsLE] in { + // We must do vector loads with LD1 in big-endian. + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; +} +} // AddedComplexity = 10 + +// zextload -> i64 +multiclass ExtLoadTo64ROPat { + def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), + (SUBREG_TO_REG (i64 0), + (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), + sub_32)>; + + def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), + (SUBREG_TO_REG (i64 0), + (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), + sub_32)>; +} + +let AddedComplexity = 10 in { + defm : ExtLoadTo64ROPat; + defm : ExtLoadTo64ROPat; + defm : ExtLoadTo64ROPat; + + // zextloadi1 -> zextloadi8 + defm : ExtLoadTo64ROPat; + + // extload -> zextload + defm : ExtLoadTo64ROPat; + defm : ExtLoadTo64ROPat; + defm : ExtLoadTo64ROPat; + + // extloadi1 -> zextloadi8 + defm : ExtLoadTo64ROPat; +} + + +// zextload -> i64 +multiclass ExtLoadTo32ROPat { + def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), + (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; + + def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), + (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; + +} + +let AddedComplexity = 10 in { + // extload -> zextload + defm : ExtLoadTo32ROPat; + defm : ExtLoadTo32ROPat; + defm : ExtLoadTo32ROPat; + + // zextloadi1 -> zextloadi8 + defm : ExtLoadTo32ROPat; +} + +//--- +// (unsigned immediate) +//--- +defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr", + [(set GPR64z:$Rt, + (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; +defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr", + [(set GPR32z:$Rt, + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; +defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr", + [(set FPR8Op:$Rt, + (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; +defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr", + [(set (f16 FPR16Op:$Rt), + (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; +defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr", + [(set (f32 FPR32Op:$Rt), + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; +defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr", + [(set (f64 FPR64Op:$Rt), + (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; +defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr", + [(set (f128 FPR128Op:$Rt), + (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; + +// For regular load, we do not have any alignment requirement. +// Thus, it is safe to directly map the vector loads with interesting +// addressing modes. +// FIXME: We could do the same for bitconvert to floating point vectors. +def : Pat <(v8i8 (scalar_to_vector (i32 + (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), + (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; +def : Pat <(v16i8 (scalar_to_vector (i32 + (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; +def : Pat <(v4i16 (scalar_to_vector (i32 + (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; +def : Pat <(v8i16 (scalar_to_vector (i32 + (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; +def : Pat <(v2i32 (scalar_to_vector (i32 + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), + (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), + (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; +def : Pat <(v4i32 (scalar_to_vector (i32 + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; +def : Pat <(v1i64 (scalar_to_vector (i64 + (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat <(v2i64 (scalar_to_vector (i64 + (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>; + +// Match all load 64 bits width whose type is compatible with FPR64 +let Predicates = [IsLE] in { + // We must use LD1 to perform vector loads in big-endian. + def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; +} +def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; + +// Match all load 128 bits width whose type is compatible with FPR128 +let Predicates = [IsLE] in { + // We must use LD1 to perform vector loads in big-endian. + def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; +} +def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + +defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh", + [(set GPR32:$Rt, + (zextloadi16 (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)))]>; +defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb", + [(set GPR32:$Rt, + (zextloadi8 (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)))]>; +// zextload -> i64 +def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; +def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), + (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; + +// zextloadi1 -> zextloadi8 +def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; + +// extload -> zextload +def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), + (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), + (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; +def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), + (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; +def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; +def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; + +// load sign-extended half-word +defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh", + [(set GPR32:$Rt, + (sextloadi16 (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)))]>; +defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh", + [(set GPR64:$Rt, + (sextloadi16 (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)))]>; + +// load sign-extended byte +defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb", + [(set GPR32:$Rt, + (sextloadi8 (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)))]>; +defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb", + [(set GPR64:$Rt, + (sextloadi8 (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)))]>; + +// load sign-extended word +defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", + [(set GPR64:$Rt, + (sextloadi32 (am_indexed32 GPR64sp:$Rn, + uimm12s4:$offset)))]>; + +// load zero-extended word +def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), + (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; + +// Pre-fetch. +def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", + [(AArch64Prefetch imm:$Rt, + (am_indexed64 GPR64sp:$Rn, + uimm12s8:$offset))]>; + +def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; + +//--- +// (literal) +def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr">; +def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr">; +def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr">; +def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr">; +def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr">; + +// load sign-extended word +def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw">; + +// prefetch +def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; +// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>; + +//--- +// (unscaled immediate) +defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur", + [(set GPR64z:$Rt, + (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur", + [(set GPR32z:$Rt, + (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur", + [(set FPR8Op:$Rt, + (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur", + [(set FPR16Op:$Rt, + (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur", + [(set (f32 FPR32Op:$Rt), + (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur", + [(set (f64 FPR64Op:$Rt), + (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur", + [(set (f128 FPR128Op:$Rt), + (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>; + +defm LDURHH + : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh", + [(set GPR32:$Rt, + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURBB + : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb", + [(set GPR32:$Rt, + (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; + +// Match all load 64 bits width whose type is compatible with FPR64 +let Predicates = [IsLE] in { + def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; +} +def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + +// Match all load 128 bits width whose type is compatible with FPR128 +let Predicates = [IsLE] in { + def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; +} + +// anyext -> zext +def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (LDURHHi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; +// unscaled zext +def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (LDURHHi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; + + +//--- +// LDR mnemonics fall back to LDUR for negative or unaligned offsets. + +// Define new assembler match classes as we want to only match these when +// the don't otherwise match the scaled addressing mode for LDR/STR. Don't +// associate a DiagnosticType either, as we want the diagnostic for the +// canonical form (the scaled operand) to take precedence. +class SImm9OffsetOperand : AsmOperandClass { + let Name = "SImm9OffsetFB" # Width; + let PredicateMethod = "isSImm9OffsetFB<" # Width # ">"; + let RenderMethod = "addImmOperands"; +} + +def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>; +def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>; +def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>; +def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>; +def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>; + +def simm9_offset_fb8 : Operand { + let ParserMatchClass = SImm9OffsetFB8Operand; +} +def simm9_offset_fb16 : Operand { + let ParserMatchClass = SImm9OffsetFB16Operand; +} +def simm9_offset_fb32 : Operand { + let ParserMatchClass = SImm9OffsetFB32Operand; +} +def simm9_offset_fb64 : Operand { + let ParserMatchClass = SImm9OffsetFB64Operand; +} +def simm9_offset_fb128 : Operand { + let ParserMatchClass = SImm9OffsetFB128Operand; +} + +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; + +// zextload -> i64 +def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; + +// load sign-extended half-word +defm LDURSHW + : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh", + [(set GPR32:$Rt, + (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURSHX + : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh", + [(set GPR64:$Rt, + (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; + +// load sign-extended byte +defm LDURSBW + : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb", + [(set GPR32:$Rt, + (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURSBX + : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb", + [(set GPR64:$Rt, + (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; + +// load sign-extended word +defm LDURSW + : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw", + [(set GPR64:$Rt, + (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; + +// zero and sign extending aliases from generic LDR* mnemonics to LDUR*. +def : InstAlias<"ldrb $Rt, [$Rn, $offset]", + (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"ldrh $Rt, [$Rn, $offset]", + (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", + (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", + (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", + (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", + (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"ldrsw $Rt, [$Rn, $offset]", + (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; + +// Pre-fetch. +defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", + [(AArch64Prefetch imm:$Rt, + (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; + +//--- +// (unscaled immediate, unprivileged) +defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; +defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; + +defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; +defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; + +// load sign-extended half-word +defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; +defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; + +// load sign-extended byte +defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; +defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; + +// load sign-extended word +defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; + +//--- +// (immediate pre-indexed) +def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">; +def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">; +def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op, "ldr">; +def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">; +def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">; +def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">; +def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">; + +// load sign-extended half-word +def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; +def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; + +// load sign-extended byte +def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; +def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; + +// load zero-extended byte +def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">; +def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">; + +// load sign-extended word +def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; + +//--- +// (immediate post-indexed) +def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">; +def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">; +def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op, "ldr">; +def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">; +def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">; +def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">; +def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">; + +// load sign-extended half-word +def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; +def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; + +// load sign-extended byte +def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; +def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; + +// load zero-extended byte +def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">; +def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">; + +// load sign-extended word +def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; + +//===----------------------------------------------------------------------===// +// Store instructions. +//===----------------------------------------------------------------------===// + +// Pair (indexed, offset) +// FIXME: Use dedicated range-checked addressing mode operand here. +defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">; +defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">; +defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">; +defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">; +defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">; + +// Pair (pre-indexed) +def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">; +def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">; +def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">; +def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">; +def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">; + +// Pair (pre-indexed) +def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">; +def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">; +def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">; +def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">; +def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">; + +// Pair (no allocate) +defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">; +defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">; +defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">; +defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">; +defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">; + +//--- +// (Register offset) + +// Integer +defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>; +defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>; +defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>; +defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>; + + +// Floating-point +defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", untyped, store>; +defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>; +defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>; +defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>; +defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str", f128, store>; + +let Predicates = [UseSTRQro], AddedComplexity = 10 in { + def : Pat<(store (f128 FPR128:$Rt), + (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend128:$extend)), + (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>; + def : Pat<(store (f128 FPR128:$Rt), + (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend128:$extend)), + (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>; +} + +multiclass TruncStoreFrom64ROPat { + + def : Pat<(storeop GPR64:$Rt, + (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), + (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32), + GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; + + def : Pat<(storeop GPR64:$Rt, + (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), + (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32), + GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; +} + +let AddedComplexity = 10 in { + // truncstore i64 + defm : TruncStoreFrom64ROPat; + defm : TruncStoreFrom64ROPat; + defm : TruncStoreFrom64ROPat; +} + +multiclass VecROStorePat { + def : Pat<(store (VecTy FPR:$Rt), + (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), + (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; + + def : Pat<(store (VecTy FPR:$Rt), + (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), + (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; +} + +let AddedComplexity = 10 in { +// Match all store 64 bits width whose type is compatible with FPR64 +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; +} + +defm : VecROStorePat; +defm : VecROStorePat; + +// Match all store 128 bits width whose type is compatible with FPR128 +let Predicates = [IsLE, UseSTRQro] in { + // We must use ST1 to store vectors in big-endian. + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; +} +} // AddedComplexity = 10 + +// Match stores from lane 0 to the appropriate subreg's store. +multiclass VecROStoreLane0Pat { + + def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)), + (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), + (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), + GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; + + def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)), + (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), + (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), + GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; +} + +let AddedComplexity = 19 in { + defm : VecROStoreLane0Pat; + defm : VecROStoreLane0Pat; + defm : VecROStoreLane0Pat; + defm : VecROStoreLane0Pat; + defm : VecROStoreLane0Pat; + defm : VecROStoreLane0Pat; +} + +//--- +// (unsigned immediate) +defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str", + [(store GPR64z:$Rt, + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; +defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str", + [(store GPR32z:$Rt, + (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; +defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str", + [(store FPR8Op:$Rt, + (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; +defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str", + [(store (f16 FPR16Op:$Rt), + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; +defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str", + [(store (f32 FPR32Op:$Rt), + (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; +defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str", + [(store (f64 FPR64Op:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; +defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>; + +defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh", + [(truncstorei16 GPR32z:$Rt, + (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset))]>; +defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb", + [(truncstorei8 GPR32z:$Rt, + (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset))]>; + +let AddedComplexity = 10 in { + +// Match all store 64 bits width whose type is compatible with FPR64 +def : Pat<(store (v1i64 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(store (v1f64 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; + +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + def : Pat<(store (v2f32 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(store (v8i8 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(store (v4i16 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(store (v2i32 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(store (v4f16 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; +} + +// Match all store 128 bits width whose type is compatible with FPR128 +def : Pat<(store (f128 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + def : Pat<(store (v4f32 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v2f64 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v16i8 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v8i16 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v4i32 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v2i64 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v8f16 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; +} + +// truncstore i64 +def : Pat<(truncstorei32 GPR64:$Rt, + (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), + (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>; +def : Pat<(truncstorei16 GPR64:$Rt, + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), + (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)), + (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>; + +} // AddedComplexity = 10 + +// Match stores from lane 0 to the appropriate subreg's store. +multiclass VecStoreLane0Pat { + def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), 0)), + (UIAddrMode GPR64sp:$Rn, IndexType:$offset)), + (STR (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), + GPR64sp:$Rn, IndexType:$offset)>; +} + +let AddedComplexity = 19 in { + defm : VecStoreLane0Pat; + defm : VecStoreLane0Pat; + defm : VecStoreLane0Pat; + defm : VecStoreLane0Pat; + defm : VecStoreLane0Pat; + defm : VecStoreLane0Pat; +} + +//--- +// (unscaled immediate) +defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur", + [(store GPR64z:$Rt, + (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; +defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur", + [(store GPR32z:$Rt, + (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; +defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur", + [(store FPR8Op:$Rt, + (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; +defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur", + [(store (f16 FPR16Op:$Rt), + (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; +defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur", + [(store (f32 FPR32Op:$Rt), + (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; +defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur", + [(store (f64 FPR64Op:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; +defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur", + [(store (f128 FPR128Op:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>; +defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh", + [(truncstorei16 GPR32z:$Rt, + (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; +defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb", + [(truncstorei8 GPR32z:$Rt, + (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; + +// Armv8.4 LDAPR & STLR with Immediate Offset instruction +let Predicates = [HasV8_4a] in { +defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>; +defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>; +defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>; +defm STLURX : BaseStoreUnscaleV84<"stlur", 0b11, 0b00, GPR64>; +defm LDAPURB : BaseLoadUnscaleV84<"ldapurb", 0b00, 0b01, GPR32>; +defm LDAPURSBW : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>; +defm LDAPURSBX : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>; +defm LDAPURH : BaseLoadUnscaleV84<"ldapurh", 0b01, 0b01, GPR32>; +defm LDAPURSHW : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>; +defm LDAPURSHX : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>; +defm LDAPUR : BaseLoadUnscaleV84<"ldapur", 0b10, 0b01, GPR32>; +defm LDAPURSW : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>; +defm LDAPURX : BaseLoadUnscaleV84<"ldapur", 0b11, 0b01, GPR64>; +} + +// Match all store 64 bits width whose type is compatible with FPR64 +def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; + +let AddedComplexity = 10 in { + +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + def : Pat<(store (v2f32 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v8i8 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v4i16 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v2i32 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v4f16 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; +} + +// Match all store 128 bits width whose type is compatible with FPR128 +def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + def : Pat<(store (v4f32 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v2f64 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v16i8 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v8i16 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v4i32 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v2i64 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v2f64 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v8f16 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; +} + +} // AddedComplexity = 10 + +// unscaled i64 truncating stores +def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), + (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), + (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), + (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; + +// Match stores from lane 0 to the appropriate subreg's store. +multiclass VecStoreULane0Pat { + defm : VecStoreLane0Pat; +} + +let AddedComplexity = 19 in { + defm : VecStoreULane0Pat; + defm : VecStoreULane0Pat; + defm : VecStoreULane0Pat; + defm : VecStoreULane0Pat; + defm : VecStoreULane0Pat; + defm : VecStoreULane0Pat; +} + +//--- +// STR mnemonics fall back to STUR for negative or unaligned offsets. +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; + +def : InstAlias<"strb $Rt, [$Rn, $offset]", + (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"strh $Rt, [$Rn, $offset]", + (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; + +//--- +// (unscaled immediate, unprivileged) +defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; +defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; + +defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; +defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; + +//--- +// (immediate pre-indexed) +def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>; +def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>; +def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, untyped>; +def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>; +def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>; +def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>; +def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>; + +def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8, i32>; +def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>; + +// truncstore i64 +def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; +def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; +def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; + +def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + +def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + +//--- +// (immediate post-indexed) +def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>; +def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>; +def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, untyped>; +def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>; +def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>; +def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>; +def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>; + +def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>; +def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>; + +// truncstore i64 +def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; +def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; +def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; + +def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + +def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + +//===----------------------------------------------------------------------===// +// Load/store exclusive instructions. +//===----------------------------------------------------------------------===// + +def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">; +def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">; +def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">; +def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">; + +def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">; +def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">; +def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">; +def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">; + +def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">; +def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">; +def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">; +def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">; + +def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">; +def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">; +def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">; +def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">; + +def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">; +def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">; +def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">; +def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">; + +def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">; +def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">; +def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">; +def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">; + +def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">; +def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">; + +def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">; +def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">; + +def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">; +def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; + +def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; +def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; + +let Predicates = [HasV8_1a] in { + // v8.1a "Limited Order Region" extension load-acquire instructions + def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">; + def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">; + def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">; + def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">; + + // v8.1a "Limited Order Region" extension store-release instructions + def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">; + def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">; + def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">; + def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">; +} + +//===----------------------------------------------------------------------===// +// Scaled floating point to integer conversion instructions. +//===----------------------------------------------------------------------===// + +defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>; +defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>; +defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>; +defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>; +defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>; +defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>; +defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>; +defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; +defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", fp_to_sint>; +defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", fp_to_uint>; +defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", fp_to_sint>; +defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", fp_to_uint>; + +multiclass FPToIntegerIntPats { + def : Pat<(i32 (round f16:$Rn)), (!cast(INST # UWHr) $Rn)>; + def : Pat<(i64 (round f16:$Rn)), (!cast(INST # UXHr) $Rn)>; + def : Pat<(i32 (round f32:$Rn)), (!cast(INST # UWSr) $Rn)>; + def : Pat<(i64 (round f32:$Rn)), (!cast(INST # UXSr) $Rn)>; + def : Pat<(i32 (round f64:$Rn)), (!cast(INST # UWDr) $Rn)>; + def : Pat<(i64 (round f64:$Rn)), (!cast(INST # UXDr) $Rn)>; + + def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), + (!cast(INST # SWHri) $Rn, $scale)>; + def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), + (!cast(INST # SXHri) $Rn, $scale)>; + def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), + (!cast(INST # SWSri) $Rn, $scale)>; + def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), + (!cast(INST # SXSri) $Rn, $scale)>; + def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))), + (!cast(INST # SWDri) $Rn, $scale)>; + def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))), + (!cast(INST # SXDri) $Rn, $scale)>; +} + +defm : FPToIntegerIntPats; +defm : FPToIntegerIntPats; + +multiclass FPToIntegerPats { + def : Pat<(i32 (to_int (round f32:$Rn))), + (!cast(INST # UWSr) f32:$Rn)>; + def : Pat<(i64 (to_int (round f32:$Rn))), + (!cast(INST # UXSr) f32:$Rn)>; + def : Pat<(i32 (to_int (round f64:$Rn))), + (!cast(INST # UWDr) f64:$Rn)>; + def : Pat<(i64 (to_int (round f64:$Rn))), + (!cast(INST # UXDr) f64:$Rn)>; +} + +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; + +//===----------------------------------------------------------------------===// +// Scaled integer to floating point conversion instructions. +//===----------------------------------------------------------------------===// + +defm SCVTF : IntegerToFP<0, "scvtf", sint_to_fp>; +defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>; + +//===----------------------------------------------------------------------===// +// Unscaled integer to floating point conversion instruction. +//===----------------------------------------------------------------------===// + +defm FMOV : UnscaledConversion<"fmov">; + +// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable +let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in { +def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>, + Sched<[WriteF]>, Requires<[HasFullFP16]>; +def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, + Sched<[WriteF]>; +def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, + Sched<[WriteF]>; +} +// Similarly add aliases +def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>, + Requires<[HasFullFP16]>; +def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>; +def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>; + +//===----------------------------------------------------------------------===// +// Floating point conversion instruction. +//===----------------------------------------------------------------------===// + +defm FCVT : FPConversion<"fcvt">; + +//===----------------------------------------------------------------------===// +// Floating point single operand instructions. +//===----------------------------------------------------------------------===// + +defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>; +defm FMOV : SingleOperandFPData<0b0000, "fmov">; +defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>; +defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>; +defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>; +defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>; +defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_aarch64_neon_frintn>; +defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>; + +def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))), + (FRINTNDr FPR64:$Rn)>; + +defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>; +defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>; + +let SchedRW = [WriteFDiv] in { +defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>; +} + +//===----------------------------------------------------------------------===// +// Floating point two operand instructions. +//===----------------------------------------------------------------------===// + +defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>; +let SchedRW = [WriteFDiv] in { +defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>; +} +defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>; +defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaxnan>; +defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>; +defm FMIN : TwoOperandFPData<0b0101, "fmin", fminnan>; +let SchedRW = [WriteFMul] in { +defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>; +defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>; +} +defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>; + +def : Pat<(v1f64 (fmaxnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(v1f64 (fminnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FMINDrr FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; + +//===----------------------------------------------------------------------===// +// Floating point three operand instructions. +//===----------------------------------------------------------------------===// + +defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>; +defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", + TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; +defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", + TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >; +defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", + TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; + +// The following def pats catch the case where the LHS of an FMA is negated. +// The TriOpFrag above catches the case where the middle operand is negated. + +// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike +// the NEON variant. +def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), + (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; + +def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), + (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; + +// We handled -(a + b*c) for FNMADD above, now it's time for "(-a) + (-b)*c" and +// "(-a) + b*(-c)". +def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), + (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; + +def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), + (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; + +def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))), + (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; + +def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))), + (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; + +//===----------------------------------------------------------------------===// +// Floating point comparison instructions. +//===----------------------------------------------------------------------===// + +defm FCMPE : FPComparison<1, "fcmpe">; +defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>; + +//===----------------------------------------------------------------------===// +// Floating point conditional comparison instructions. +//===----------------------------------------------------------------------===// + +defm FCCMPE : FPCondComparison<1, "fccmpe">; +defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>; + +//===----------------------------------------------------------------------===// +// Floating point conditional select instruction. +//===----------------------------------------------------------------------===// + +defm FCSEL : FPCondSelect<"fcsel">; + +// CSEL instructions providing f128 types need to be handled by a +// pseudo-instruction since the eventual code will need to introduce basic +// blocks and control flow. +def F128CSEL : Pseudo<(outs FPR128:$Rd), + (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), + [(set (f128 FPR128:$Rd), + (AArch64csel FPR128:$Rn, FPR128:$Rm, + (i32 imm:$cond), NZCV))]> { + let Uses = [NZCV]; + let usesCustomInserter = 1; + let hasNoSchedulingInfo = 1; +} + + +//===----------------------------------------------------------------------===// +// Floating point immediate move. +//===----------------------------------------------------------------------===// + +let isReMaterializable = 1 in { +defm FMOV : FPMoveImmediate<"fmov">; +} + +//===----------------------------------------------------------------------===// +// Advanced SIMD two vector instructions. +//===----------------------------------------------------------------------===// + +defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", + int_aarch64_neon_uabd>; +// Match UABDL in log2-shuffle patterns. +def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)), + (zext (v8i8 V64:$opB))))), + (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; +def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), + (v8i16 (add (sub (zext (v8i8 V64:$opA)), + (zext (v8i8 V64:$opB))), + (AArch64vashr v8i16:$src, (i32 15))))), + (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; +def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 V128:$opA)), + (zext (extract_high_v16i8 V128:$opB))))), + (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; +def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), + (v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)), + (zext (extract_high_v16i8 V128:$opB))), + (AArch64vashr v8i16:$src, (i32 15))))), + (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; +def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)), + (zext (v4i16 V64:$opB))))), + (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; +def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 V128:$opA)), + (zext (extract_high_v8i16 V128:$opB))))), + (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>; +def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)), + (zext (v2i32 V64:$opB))))), + (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>; +def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 V128:$opA)), + (zext (extract_high_v4i32 V128:$opB))))), + (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>; + +defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>; +defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; +defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; +defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>; +defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>; +defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>; +defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>; +defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; +defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; +defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>; + +defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; +defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; +defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; +defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>; +defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; +defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>; +defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>; +defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; +def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))), + (FCVTLv4i16 V64:$Rn)>; +def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), + (i64 4)))), + (FCVTLv8i16 V128:$Rn)>; +def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; +def : Pat<(v2f64 (fpextend (v2f32 (extract_subvector (v4f32 V128:$Rn), + (i64 2))))), + (FCVTLv4i32 V128:$Rn)>; + +def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; +def : Pat<(v4f32 (fpextend (v4f16 (extract_subvector (v8f16 V128:$Rn), + (i64 4))))), + (FCVTLv8i16 V128:$Rn)>; + +defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; +defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; +defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>; +defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>; +defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; +def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))), + (FCVTNv4i16 V128:$Rn)>; +def : Pat<(concat_vectors V64:$Rd, + (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), + (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; +def : Pat<(v2f32 (fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; +def : Pat<(v4f16 (fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>; +def : Pat<(concat_vectors V64:$Rd, (v2f32 (fpround (v2f64 V128:$Rn)))), + (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; +defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; +defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; +defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", + int_aarch64_neon_fcvtxn>; +defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>; +defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>; + +def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>; +def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>; +def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>; +def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>; +def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>; + +def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>; +def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>; +def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>; +def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>; +def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>; + +defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>; +defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; +defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>; +defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>; +defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>; +defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>; +defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>; +defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>; +defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>; +defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; +defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>; +defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", + UnOpFrag<(sub immAllZerosV, node:$LHS)> >; +defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; +// Aliases for MVN -> NOT. +def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}", + (NOTv8i8 V64:$Vd, V64:$Vn)>; +def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}", + (NOTv16i8 V128:$Vd, V128:$Vn)>; + +def : Pat<(AArch64neg (v8i8 V64:$Rn)), (NEGv8i8 V64:$Rn)>; +def : Pat<(AArch64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>; +def : Pat<(AArch64neg (v4i16 V64:$Rn)), (NEGv4i16 V64:$Rn)>; +def : Pat<(AArch64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>; +def : Pat<(AArch64neg (v2i32 V64:$Rn)), (NEGv2i32 V64:$Rn)>; +def : Pat<(AArch64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>; +def : Pat<(AArch64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>; + +def : Pat<(AArch64not (v8i8 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(AArch64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(AArch64not (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(AArch64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(AArch64not (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(AArch64not (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(AArch64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(AArch64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; + +def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; + +defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>; +defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>; +defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>; +defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>; +defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", + BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >; +defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>; +defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>; +defm SHLL : SIMDVectorLShiftLongBySizeBHS; +defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; +defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; +defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>; +defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>; +defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>; +defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", + BinOpFrag<(add node:$LHS, (int_aarch64_neon_uaddlp node:$RHS))> >; +defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", + int_aarch64_neon_uaddlp>; +defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>; +defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; +defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; +defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; +defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>; +defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; + +def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; +def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; +def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; +def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; +def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; +def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; + +// Patterns for vector long shift (by element width). These need to match all +// three of zext, sext and anyext so it's easier to pull the patterns out of the +// definition. +multiclass SIMDVectorLShiftLongBySizeBHSPats { + def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), + (SHLLv8i8 V64:$Rn)>; + def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)), + (SHLLv16i8 V128:$Rn)>; + def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), + (SHLLv4i16 V64:$Rn)>; + def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)), + (SHLLv8i16 V128:$Rn)>; + def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), + (SHLLv2i32 V64:$Rn)>; + def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)), + (SHLLv4i32 V128:$Rn)>; +} + +defm : SIMDVectorLShiftLongBySizeBHSPats; +defm : SIMDVectorLShiftLongBySizeBHSPats; +defm : SIMDVectorLShiftLongBySizeBHSPats; + +//===----------------------------------------------------------------------===// +// Advanced SIMD three vector instructions. +//===----------------------------------------------------------------------===// + +defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; +defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>; +defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>; +defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>; +defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; +defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; +defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; +defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; +defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>; +let Predicates = [HasNEON] in { +foreach VT = [ v2f32, v4f32, v2f64 ] in +def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast("FABD"#VT) VT:$Rn, VT:$Rm)>; +} +let Predicates = [HasNEON, HasFullFP16] in { +foreach VT = [ v4f16, v8f16 ] in +def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast("FABD"#VT) VT:$Rn, VT:$Rm)>; +} +defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>; +defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>; +defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>; +defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>; +defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; +defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; +defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; +defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>; +defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; +defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>; +defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; +defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaxnan>; +defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; +defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>; +defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; +defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminnan>; + +// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the +// instruction expects the addend first, while the fma intrinsic puts it last. +defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", + TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; +defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", + TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; + +// The following def pats catch the case where the LHS of an FMA is negated. +// The TriOpFrag above catches the case where the middle operand is negated. +def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)), + (FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>; + +def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), + (FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>; + +def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), + (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>; + +defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; +defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>; +defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; +defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; +defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>; +defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", + TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >; +defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", + TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >; +defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; +defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; +defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >; +defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>; +defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>; +defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; +defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; +defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>; +defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>; +defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>; +defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; +defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; +defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; +defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; +defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; +defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; +defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_aarch64_neon_srhadd>; +defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; +defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; +defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; +defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >; +defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>; +defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>; +defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; +defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; +defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>; +defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>; +defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>; +defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; +defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; +defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; +defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; +defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>; +defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; +defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; +defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", + int_aarch64_neon_sqadd>; +defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", + int_aarch64_neon_sqsub>; + +defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; +defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", + BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; +defm BIF : SIMDLogicalThreeVector<1, 0b11, "bif">; +defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>; +defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl", + TriOpFrag<(or (and node:$LHS, node:$MHS), (and (vnot node:$LHS), node:$RHS))>>; +defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; +defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", + BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; +defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; + + +def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), + (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; +def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), + (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; +def : Pat<(AArch64bsl (v2i32 V64:$Rd), V64:$Rn, V64:$Rm), + (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; +def : Pat<(AArch64bsl (v1i64 V64:$Rd), V64:$Rn, V64:$Rm), + (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; + +def : Pat<(AArch64bsl (v16i8 V128:$Rd), V128:$Rn, V128:$Rm), + (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; +def : Pat<(AArch64bsl (v8i16 V128:$Rd), V128:$Rn, V128:$Rm), + (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; +def : Pat<(AArch64bsl (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), + (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; +def : Pat<(AArch64bsl (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), + (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; + +def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", + (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>; +def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}", + (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; +def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}", + (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; +def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}", + (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; + +def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}", + (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>; +def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}", + (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; +def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}", + (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; +def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}", + (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; + +def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" # + "|cmls.8b\t$dst, $src1, $src2}", + (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" # + "|cmls.16b\t$dst, $src1, $src2}", + (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" # + "|cmls.4h\t$dst, $src1, $src2}", + (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" # + "|cmls.8h\t$dst, $src1, $src2}", + (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" # + "|cmls.2s\t$dst, $src1, $src2}", + (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" # + "|cmls.4s\t$dst, $src1, $src2}", + (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" # + "|cmls.2d\t$dst, $src1, $src2}", + (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; + +def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" # + "|cmlo.8b\t$dst, $src1, $src2}", + (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" # + "|cmlo.16b\t$dst, $src1, $src2}", + (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" # + "|cmlo.4h\t$dst, $src1, $src2}", + (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" # + "|cmlo.8h\t$dst, $src1, $src2}", + (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" # + "|cmlo.2s\t$dst, $src1, $src2}", + (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" # + "|cmlo.4s\t$dst, $src1, $src2}", + (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" # + "|cmlo.2d\t$dst, $src1, $src2}", + (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; + +def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" # + "|cmle.8b\t$dst, $src1, $src2}", + (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" # + "|cmle.16b\t$dst, $src1, $src2}", + (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" # + "|cmle.4h\t$dst, $src1, $src2}", + (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" # + "|cmle.8h\t$dst, $src1, $src2}", + (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" # + "|cmle.2s\t$dst, $src1, $src2}", + (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" # + "|cmle.4s\t$dst, $src1, $src2}", + (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" # + "|cmle.2d\t$dst, $src1, $src2}", + (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; + +def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" # + "|cmlt.8b\t$dst, $src1, $src2}", + (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" # + "|cmlt.16b\t$dst, $src1, $src2}", + (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" # + "|cmlt.4h\t$dst, $src1, $src2}", + (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" # + "|cmlt.8h\t$dst, $src1, $src2}", + (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" # + "|cmlt.2s\t$dst, $src1, $src2}", + (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" # + "|cmlt.4s\t$dst, $src1, $src2}", + (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # + "|cmlt.2d\t$dst, $src1, $src2}", + (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; + +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" # + "|fcmle.4h\t$dst, $src1, $src2}", + (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" # + "|fcmle.8h\t$dst, $src1, $src2}", + (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} +def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # + "|fcmle.2s\t$dst, $src1, $src2}", + (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" # + "|fcmle.4s\t$dst, $src1, $src2}", + (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # + "|fcmle.2d\t$dst, $src1, $src2}", + (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; + +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" # + "|fcmlt.4h\t$dst, $src1, $src2}", + (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" # + "|fcmlt.8h\t$dst, $src1, $src2}", + (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} +def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # + "|fcmlt.2s\t$dst, $src1, $src2}", + (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" # + "|fcmlt.4s\t$dst, $src1, $src2}", + (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # + "|fcmlt.2d\t$dst, $src1, $src2}", + (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; + +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" # + "|facle.4h\t$dst, $src1, $src2}", + (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" # + "|facle.8h\t$dst, $src1, $src2}", + (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} +def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # + "|facle.2s\t$dst, $src1, $src2}", + (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" # + "|facle.4s\t$dst, $src1, $src2}", + (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # + "|facle.2d\t$dst, $src1, $src2}", + (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; + +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" # + "|faclt.4h\t$dst, $src1, $src2}", + (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" # + "|faclt.8h\t$dst, $src1, $src2}", + (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} +def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # + "|faclt.2s\t$dst, $src1, $src2}", + (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" # + "|faclt.4s\t$dst, $src1, $src2}", + (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # + "|faclt.2d\t$dst, $src1, $src2}", + (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; + +//===----------------------------------------------------------------------===// +// Advanced SIMD three scalar instructions. +//===----------------------------------------------------------------------===// + +defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; +defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>; +defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>; +defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; +defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; +defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; +defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; +defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>; +def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FABD64 FPR64:$Rn, FPR64:$Rm)>; +let Predicates = [HasFullFP16] in { +def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>; +} +def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>; +def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>; +defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge", + int_aarch64_neon_facge>; +defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", + int_aarch64_neon_facgt>; +defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; +defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; +defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; +defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx>; +defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps>; +defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts>; +defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; +defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; +defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; +defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; +defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; +defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; +defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; +defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; +defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; +defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>; +defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>; +defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>; +defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; +defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; +defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; +let Predicates = [HasRDM] in { + defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">; + defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">; + def : Pat<(i32 (int_aarch64_neon_sqadd + (i32 FPR32:$Rd), + (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; + def : Pat<(i32 (int_aarch64_neon_sqsub + (i32 FPR32:$Rd), + (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; +} + +def : InstAlias<"cmls $dst, $src1, $src2", + (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"cmle $dst, $src1, $src2", + (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"cmlo $dst, $src1, $src2", + (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"cmlt $dst, $src1, $src2", + (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"fcmle $dst, $src1, $src2", + (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; +def : InstAlias<"fcmle $dst, $src1, $src2", + (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"fcmlt $dst, $src1, $src2", + (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; +def : InstAlias<"fcmlt $dst, $src1, $src2", + (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"facle $dst, $src1, $src2", + (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; +def : InstAlias<"facle $dst, $src1, $src2", + (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"faclt $dst, $src1, $src2", + (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; +def : InstAlias<"faclt $dst, $src1, $src2", + (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; + +//===----------------------------------------------------------------------===// +// Advanced SIMD three scalar instructions (mixed operands). +//===----------------------------------------------------------------------===// +defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", + int_aarch64_neon_sqdmulls_scalar>; +defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; +defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; + +def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd), + (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; +def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), + (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; + +//===----------------------------------------------------------------------===// +// Advanced SIMD two scalar instructions. +//===----------------------------------------------------------------------===// + +defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", abs>; +defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>; +defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; +defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; +defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>; +defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>; +defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; +defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>; +defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; +defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>; +defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; +defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">; +defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">; +defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">; +defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">; +defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">; +defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">; +defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">; +defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">; +def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; +defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; +defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; +defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">; +defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">; +defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">; +defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", + UnOpFrag<(sub immAllZerosV, node:$LHS)> >; +defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>; +defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; +defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; +defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; +defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; +defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", + int_aarch64_neon_suqadd>; +defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>; +defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; +defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", + int_aarch64_neon_usqadd>; + +def : Pat<(AArch64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>; + +def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), + (FCVTASv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), + (FCVTAUv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))), + (FCVTMSv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))), + (FCVTMUv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))), + (FCVTNSv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))), + (FCVTNUv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), + (FCVTPSv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), + (FCVTPUv1i64 FPR64:$Rn)>; + +def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))), + (FRECPEv1f16 FPR16:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))), + (FRECPEv1i32 FPR32:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))), + (FRECPEv1i64 FPR64:$Rn)>; +def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))), + (FRECPEv1i64 FPR64:$Rn)>; + +def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))), + (FRECPEv1i32 FPR32:$Rn)>; +def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))), + (FRECPEv2f32 V64:$Rn)>; +def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))), + (FRECPEv4f32 FPR128:$Rn)>; +def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))), + (FRECPEv1i64 FPR64:$Rn)>; +def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))), + (FRECPEv1i64 FPR64:$Rn)>; +def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))), + (FRECPEv2f64 FPR128:$Rn)>; + +def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))), + (FRECPS32 FPR32:$Rn, FPR32:$Rm)>; +def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))), + (FRECPSv2f32 V64:$Rn, V64:$Rm)>; +def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), + (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>; +def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))), + (FRECPS64 FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), + (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>; + +def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))), + (FRECPXv1f16 FPR16:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))), + (FRECPXv1i32 FPR32:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))), + (FRECPXv1i64 FPR64:$Rn)>; + +def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))), + (FRSQRTEv1f16 FPR16:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))), + (FRSQRTEv1i32 FPR32:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))), + (FRSQRTEv1i64 FPR64:$Rn)>; +def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))), + (FRSQRTEv1i64 FPR64:$Rn)>; + +def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))), + (FRSQRTEv1i32 FPR32:$Rn)>; +def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))), + (FRSQRTEv2f32 V64:$Rn)>; +def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))), + (FRSQRTEv4f32 FPR128:$Rn)>; +def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))), + (FRSQRTEv1i64 FPR64:$Rn)>; +def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))), + (FRSQRTEv1i64 FPR64:$Rn)>; +def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))), + (FRSQRTEv2f64 FPR128:$Rn)>; + +def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))), + (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>; +def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))), + (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>; +def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), + (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>; +def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))), + (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), + (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>; + +// If an integer is about to be converted to a floating point value, +// just load it on the floating point unit. +// Here are the patterns for 8 and 16-bits to float. +// 8-bits -> float. +multiclass UIntToFPROLoadPat { + def : Pat<(DstTy (uint_to_fp (SrcTy + (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, + ro.Wext:$extend))))), + (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), + (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), + sub))>; + + def : Pat<(DstTy (uint_to_fp (SrcTy + (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, + ro.Wext:$extend))))), + (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), + (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), + sub))>; +} + +defm : UIntToFPROLoadPat; +def : Pat <(f32 (uint_to_fp (i32 + (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), + (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; +def : Pat <(f32 (uint_to_fp (i32 + (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), + (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), + (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; +// 16-bits -> float. +defm : UIntToFPROLoadPat; +def : Pat <(f32 (uint_to_fp (i32 + (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; +def : Pat <(f32 (uint_to_fp (i32 + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), + (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), + (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; +// 32-bits are handled in target specific dag combine: +// performIntToFpCombine. +// 64-bits integer to 32-bits floating point, not possible with +// UCVTF on floating point registers (both source and destination +// must have the same size). + +// Here are the patterns for 8, 16, 32, and 64-bits to double. +// 8-bits -> double. +defm : UIntToFPROLoadPat; +def : Pat <(f64 (uint_to_fp (i32 + (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; +def : Pat <(f64 (uint_to_fp (i32 + (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; +// 16-bits -> double. +defm : UIntToFPROLoadPat; +def : Pat <(f64 (uint_to_fp (i32 + (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; +def : Pat <(f64 (uint_to_fp (i32 + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; +// 32-bits -> double. +defm : UIntToFPROLoadPat; +def : Pat <(f64 (uint_to_fp (i32 + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>; +def : Pat <(f64 (uint_to_fp (i32 + (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; +// 64-bits -> double are handled in target specific dag combine: +// performIntToFpCombine. + +//===----------------------------------------------------------------------===// +// Advanced SIMD three different-sized vector instructions. +//===----------------------------------------------------------------------===// + +defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>; +defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>; +defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; +defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; +defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>; +defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", + int_aarch64_neon_sabd>; +defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", + int_aarch64_neon_sabd>; +defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", + BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; +defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", + BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; +defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; +defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", + TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; +defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>; +defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", + int_aarch64_neon_sqadd>; +defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", + int_aarch64_neon_sqsub>; +defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", + int_aarch64_neon_sqdmull>; +defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", + BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; +defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", + BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; +defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", + int_aarch64_neon_uabd>; +defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", + BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>; +defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", + BinOpFrag<(add node:$LHS, (zext node:$RHS))>>; +defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; +defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", + TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; +defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>; +defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", + BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>; +defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", + BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>; + +// Additional patterns for SMULL and UMULL +multiclass Neon_mul_widen_patterns { + def : Pat<(v8i16 (opnode (v8i8 V64:$Rn), (v8i8 V64:$Rm))), + (INST8B V64:$Rn, V64:$Rm)>; + def : Pat<(v4i32 (opnode (v4i16 V64:$Rn), (v4i16 V64:$Rm))), + (INST4H V64:$Rn, V64:$Rm)>; + def : Pat<(v2i64 (opnode (v2i32 V64:$Rn), (v2i32 V64:$Rm))), + (INST2S V64:$Rn, V64:$Rm)>; +} + +defm : Neon_mul_widen_patterns; +defm : Neon_mul_widen_patterns; + +// Patterns for smull2/umull2. +multiclass Neon_mul_high_patterns { + def : Pat<(v8i16 (opnode (extract_high_v16i8 V128:$Rn), + (extract_high_v16i8 V128:$Rm))), + (INST8B V128:$Rn, V128:$Rm)>; + def : Pat<(v4i32 (opnode (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 V128:$Rm))), + (INST4H V128:$Rn, V128:$Rm)>; + def : Pat<(v2i64 (opnode (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 V128:$Rm))), + (INST2S V128:$Rn, V128:$Rm)>; +} + +defm : Neon_mul_high_patterns; +defm : Neon_mul_high_patterns; + +// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL +multiclass Neon_mulacc_widen_patterns { + def : Pat<(v8i16 (opnode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm))), + (INST8B V128:$Rd, V64:$Rn, V64:$Rm)>; + def : Pat<(v4i32 (opnode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm))), + (INST4H V128:$Rd, V64:$Rn, V64:$Rm)>; + def : Pat<(v2i64 (opnode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm))), + (INST2S V128:$Rd, V64:$Rn, V64:$Rm)>; +} + +defm : Neon_mulacc_widen_patterns< + TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>, + SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>; +defm : Neon_mulacc_widen_patterns< + TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>, + UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>; +defm : Neon_mulacc_widen_patterns< + TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>, + SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>; +defm : Neon_mulacc_widen_patterns< + TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>, + UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>; + +// Patterns for 64-bit pmull +def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm), + (PMULLv1i64 V64:$Rn, V64:$Rm)>; +def : Pat<(int_aarch64_neon_pmull64 (extractelt (v2i64 V128:$Rn), (i64 1)), + (extractelt (v2i64 V128:$Rm), (i64 1))), + (PMULLv2i64 V128:$Rn, V128:$Rm)>; + +// CodeGen patterns for addhn and subhn instructions, which can actually be +// written in LLVM IR without too much difficulty. + +// ADDHN +def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), + (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), + (i32 16))))), + (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), + (i32 32))))), + (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v8i8 V64:$Rd), + (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), + (i32 8))))), + (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v4i16 V64:$Rd), + (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), + (i32 16))))), + (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v2i32 V64:$Rd), + (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), + (i32 32))))), + (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; + +// SUBHN +def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), + (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), + (i32 16))))), + (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), + (i32 32))))), + (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v8i8 V64:$Rd), + (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), + (i32 8))))), + (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v4i16 V64:$Rd), + (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), + (i32 16))))), + (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v2i32 V64:$Rd), + (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), + (i32 32))))), + (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; + +//---------------------------------------------------------------------------- +// AdvSIMD bitwise extract from vector instruction. +//---------------------------------------------------------------------------- + +defm EXT : SIMDBitwiseExtract<"ext">; + +def : Pat<(v4i16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), + (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; +def : Pat<(v8i16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), + (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; +def : Pat<(v2i32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), + (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; +def : Pat<(v2f32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), + (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; +def : Pat<(v4i32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), + (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; +def : Pat<(v4f32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), + (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; +def : Pat<(v2i64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), + (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; +def : Pat<(v2f64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), + (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; +def : Pat<(v4f16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), + (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; +def : Pat<(v8f16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), + (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; + +// We use EXT to handle extract_subvector to copy the upper 64-bits of a +// 128-bit vector. +def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 8))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; +def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 4))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; +def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; +def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; +def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 4))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; +def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; +def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; + + +//---------------------------------------------------------------------------- +// AdvSIMD zip vector +//---------------------------------------------------------------------------- + +defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>; +defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>; +defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>; +defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; +defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; +defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; + +//---------------------------------------------------------------------------- +// AdvSIMD TBL/TBX instructions +//---------------------------------------------------------------------------- + +defm TBL : SIMDTableLookup< 0, "tbl">; +defm TBX : SIMDTableLookupTied<1, "tbx">; + +def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), + (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; +def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), + (TBLv16i8One V128:$Ri, V128:$Rn)>; + +def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd), + (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), + (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; +def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), + (v16i8 V128:$Ri), (v16i8 V128:$Rn))), + (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; + + +//---------------------------------------------------------------------------- +// AdvSIMD scalar CPY instruction +//---------------------------------------------------------------------------- + +defm CPY : SIMDScalarCPY<"cpy">; + +//---------------------------------------------------------------------------- +// AdvSIMD scalar pairwise instructions +//---------------------------------------------------------------------------- + +defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; +defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">; +defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">; +defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">; +defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">; +defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">; +def : Pat<(v2i64 (AArch64saddv V128:$Rn)), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; +def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; +def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), + (FADDPv2i32p V64:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), + (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; +def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), + (FADDPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))), + (FMAXNMPv2i32p V64:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))), + (FMAXNMPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))), + (FMAXPv2i32p V64:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))), + (FMAXPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))), + (FMINNMPv2i32p V64:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))), + (FMINNMPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))), + (FMINPv2i32p V64:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))), + (FMINPv2i64p V128:$Rn)>; + +//---------------------------------------------------------------------------- +// AdvSIMD INS/DUP instructions +//---------------------------------------------------------------------------- + +def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>; +def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>; +def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>; +def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>; +def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>; +def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>; +def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>; + +def DUPv2i64lane : SIMDDup64FromElement; +def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; +def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>; +def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>; +def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; +def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; +def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; + +def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))), + (v2f32 (DUPv2i32lane + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), + (i64 0)))>; +def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))), + (v4f32 (DUPv4i32lane + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), + (i64 0)))>; +def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))), + (v2f64 (DUPv2i64lane + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), + (i64 0)))>; +def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))), + (v4f16 (DUPv4i16lane + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), + (i64 0)))>; +def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))), + (v8f16 (DUPv8i16lane + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), + (i64 0)))>; + +def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), + (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; +def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), + (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; + +def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), + (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; +def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), + (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; +def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), + (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; + +// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane +// instruction even if the types don't match: we just have to remap the lane +// carefully. N.b. this trick only applies to truncations. +def VecIndex_x2 : SDNodeXFormgetTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64); +}]>; +def VecIndex_x4 : SDNodeXFormgetTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64); +}]>; +def VecIndex_x8 : SDNodeXFormgetTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64); +}]>; + +multiclass DUPWithTruncPats { + def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn), + imm:$idx)))), + (DUP V128:$Rn, (IdxXFORM imm:$idx))>; + + def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn), + imm:$idx)))), + (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; +} + +defm : DUPWithTruncPats; +defm : DUPWithTruncPats; +defm : DUPWithTruncPats; + +defm : DUPWithTruncPats; +defm : DUPWithTruncPats; +defm : DUPWithTruncPats; + +multiclass DUPWithTrunci64Pats { + def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn), + imm:$idx))))), + (DUP V128:$Rn, (IdxXFORM imm:$idx))>; + + def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn), + imm:$idx))))), + (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; +} + +defm : DUPWithTrunci64Pats; +defm : DUPWithTrunci64Pats; +defm : DUPWithTrunci64Pats; + +defm : DUPWithTrunci64Pats; +defm : DUPWithTrunci64Pats; +defm : DUPWithTrunci64Pats; + +// SMOV and UMOV definitions, with some extra patterns for convenience +defm SMOV : SMov; +defm UMOV : UMov; + +def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), + (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>; +def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), + (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; +def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), + (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; +def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), + (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; +def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), + (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; +def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), + (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; + +def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), + VectorIndexB:$idx)))), i8), + (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; +def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), + VectorIndexH:$idx)))), i16), + (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; + +// Extracting i8 or i16 elements will have the zero-extend transformed to +// an 'and' mask by type legalization since neither i8 nor i16 are legal types +// for AArch64. Match these patterns here since UMOV already zeroes out the high +// bits of the destination register. +def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), + (i32 0xff)), + (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>; +def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), + (i32 0xffff)), + (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; + +defm INS : SIMDIns; + +def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), + (SUBREG_TO_REG (i32 0), + (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; +def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), + (SUBREG_TO_REG (i32 0), + (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; + +def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), + (SUBREG_TO_REG (i32 0), + (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; +def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), + (SUBREG_TO_REG (i32 0), + (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; + +def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), + (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; +def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), + (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; + +def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), + (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), + (i32 FPR32:$Rn), ssub))>; +def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))), + (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (i32 FPR32:$Rn), ssub))>; + +def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), + (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), + (i64 FPR64:$Rn), dsub))>; + +def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), + (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; +def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), + (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; + +def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; +def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; + +def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>; + +def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn), + (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))), + (EXTRACT_SUBREG + (INSvi16lane + (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), + VectorIndexS:$imm, + (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), + (i64 0)), + dsub)>; + +def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn), + (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))), + (INSvi16lane + V128:$Rn, VectorIndexH:$imm, + (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), + (i64 0))>; + +def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), + (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), + (EXTRACT_SUBREG + (INSvi32lane + (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), + VectorIndexS:$imm, + (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), + (i64 0)), + dsub)>; +def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn), + (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), + (INSvi32lane + V128:$Rn, VectorIndexS:$imm, + (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), + (i64 0))>; +def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), + (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))), + (INSvi64lane + V128:$Rn, VectorIndexD:$imm, + (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)), + (i64 0))>; + +// Copy an element at a constant index in one vector into a constant indexed +// element of another. +// FIXME refactor to a shared class/dev parameterized on vector type, vector +// index type and INS extension +def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane + (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), + VectorIndexB:$idx2)), + (v16i8 (INSvi8lane + V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) + )>; +def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane + (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), + VectorIndexH:$idx2)), + (v8i16 (INSvi16lane + V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) + )>; +def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane + (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), + VectorIndexS:$idx2)), + (v4i32 (INSvi32lane + V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) + )>; +def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane + (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), + VectorIndexD:$idx2)), + (v2i64 (INSvi64lane + V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2) + )>; + +multiclass Neon_INS_elt_pattern { + def : Pat<(VT128 (vector_insert V128:$src, + (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), + imm:$Immd)), + (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>; + + def : Pat<(VT128 (vector_insert V128:$src, + (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), + imm:$Immd)), + (INS V128:$src, imm:$Immd, + (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>; + + def : Pat<(VT64 (vector_insert V64:$src, + (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), + imm:$Immd)), + (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), + imm:$Immd, V128:$Rn, imm:$Immn), + dsub)>; + + def : Pat<(VT64 (vector_insert V64:$src, + (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), + imm:$Immd)), + (EXTRACT_SUBREG + (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, + (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn), + dsub)>; +} + +defm : Neon_INS_elt_pattern; +defm : Neon_INS_elt_pattern; +defm : Neon_INS_elt_pattern; + + +// Floating point vector extractions are codegen'd as either a sequence of +// subregister extractions, or a MOV (aka CPY here, alias for DUP) if +// the lane number is anything other than zero. +def : Pat<(vector_extract (v2f64 V128:$Rn), 0), + (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; +def : Pat<(vector_extract (v4f32 V128:$Rn), 0), + (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; +def : Pat<(vector_extract (v8f16 V128:$Rn), 0), + (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>; + +def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), + (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>; +def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), + (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>; +def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx), + (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>; + +// All concat_vectors operations are canonicalised to act on i64 vectors for +// AArch64. In the general case we need an instruction, which had just as well be +// INS. +class ConcatPat + : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), + (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; + +def : ConcatPat; +def : ConcatPat; +def : ConcatPat; +def : ConcatPat; +def : ConcatPat; +def : ConcatPat; +def : ConcatPat; + +// If the high lanes are undef, though, we can just ignore them: +class ConcatUndefPat + : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; + +def : ConcatUndefPat; +def : ConcatUndefPat; +def : ConcatUndefPat; +def : ConcatUndefPat; +def : ConcatUndefPat; +def : ConcatUndefPat; + +//---------------------------------------------------------------------------- +// AdvSIMD across lanes instructions +//---------------------------------------------------------------------------- + +defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">; +defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">; +defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">; +defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; +defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; +defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; +defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; +defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>; +defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; +defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; +defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>; + +// Patterns for across-vector intrinsics, that have a node equivalent, that +// returns a vector (with only the low lane defined) instead of a scalar. +// In effect, opNode is the same as (scalar_to_vector (IntNode)). +multiclass SIMDAcrossLanesIntrinsic { +// If a lane instruction caught the vector_extract around opNode, we can +// directly match the latter to the instruction. +def : Pat<(v8i8 (opNode V64:$Rn)), + (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>; +def : Pat<(v16i8 (opNode V128:$Rn)), + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>; +def : Pat<(v4i16 (opNode V64:$Rn)), + (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>; +def : Pat<(v8i16 (opNode V128:$Rn)), + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>; +def : Pat<(v4i32 (opNode V128:$Rn)), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>; + + +// If none did, fallback to the explicit patterns, consuming the vector_extract. +def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)), + (i32 0)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), + bsub), ssub)>; +def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), + bsub), ssub)>; +def : Pat<(i32 (vector_extract (insert_subvector undef, + (v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), + hsub), ssub)>; +def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), + hsub), ssub)>; +def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), + ssub), ssub)>; + +} + +multiclass SIMDAcrossLanesSignedIntrinsic + : SIMDAcrossLanesIntrinsic { +// If there is a sign extension after this intrinsic, consume it as smov already +// performed it +def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, + (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)), + (i32 (SMOVvi8to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), + (i64 0)))>; +def : Pat<(i32 (sext_inreg (i32 (vector_extract + (opNode (v16i8 V128:$Rn)), (i64 0))), i8)), + (i32 (SMOVvi8to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), + (i64 0)))>; +def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, + (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)), + (i32 (SMOVvi16to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), + (i64 0)))>; +def : Pat<(i32 (sext_inreg (i32 (vector_extract + (opNode (v8i16 V128:$Rn)), (i64 0))), i16)), + (i32 (SMOVvi16to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), + (i64 0)))>; +} + +multiclass SIMDAcrossLanesUnsignedIntrinsic + : SIMDAcrossLanesIntrinsic { +// If there is a masking operation keeping only what has been actually +// generated, consume it. +def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, + (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), + ssub))>; +def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))), + maski8_or_more)), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), + ssub))>; +def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, + (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), + ssub))>; +def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))), + maski16_or_more)), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), + ssub))>; +} + +defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>; +// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm +def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))), + (ADDPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>; +// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm +def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))), + (ADDPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>; +def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))), + (SMAXPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>; +def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))), + (SMINPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>; +def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))), + (UMAXPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>; +def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))), + (UMINPv2i32 V64:$Rn, V64:$Rn)>; + +multiclass SIMDAcrossLanesSignedLongIntrinsic { + def : Pat<(i32 (intOp (v8i8 V64:$Rn))), + (i32 (SMOVvi16to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), + (i64 0)))>; +def : Pat<(i32 (intOp (v16i8 V128:$Rn))), + (i32 (SMOVvi16to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), + (i64 0)))>; + +def : Pat<(i32 (intOp (v4i16 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), + ssub))>; +def : Pat<(i32 (intOp (v8i16 V128:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), + ssub))>; + +def : Pat<(i64 (intOp (v4i32 V128:$Rn))), + (i64 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), + dsub))>; +} + +multiclass SIMDAcrossLanesUnsignedLongIntrinsic { + def : Pat<(i32 (intOp (v8i8 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), + ssub))>; +def : Pat<(i32 (intOp (v16i8 V128:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), + ssub))>; + +def : Pat<(i32 (intOp (v4i16 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), + ssub))>; +def : Pat<(i32 (intOp (v8i16 V128:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), + ssub))>; + +def : Pat<(i64 (intOp (v4i32 V128:$Rn))), + (i64 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), + dsub))>; +} + +defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; +defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; + +// The vaddlv_s32 intrinsic gets mapped to SADDLP. +def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))), + (i64 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (SADDLPv2i32_v1i64 V64:$Rn), dsub), + dsub))>; +// The vaddlv_u32 intrinsic gets mapped to UADDLP. +def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))), + (i64 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (UADDLPv2i32_v1i64 V64:$Rn), dsub), + dsub))>; + +//------------------------------------------------------------------------------ +// AdvSIMD modified immediate instructions +//------------------------------------------------------------------------------ + +// AdvSIMD BIC +defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>; +// AdvSIMD ORR +defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>; + +def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; + +def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; + +def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; + +def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; + +// AdvSIMD FMOV +def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8, + "fmov", ".2d", + [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; +def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8, + "fmov", ".2s", + [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; +def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8, + "fmov", ".4s", + [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; +let Predicates = [HasNEON, HasFullFP16] in { +def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8, + "fmov", ".4h", + [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; +def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8, + "fmov", ".8h", + [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; +} // Predicates = [HasNEON, HasFullFP16] + +// AdvSIMD MOVI + +// EDIT byte mask: scalar +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", + [(set FPR64:$Rd, simdimmtype10:$imm8)]>; +// The movi_edit node has the immediate value already encoded, so we use +// a plain imm0_255 here. +def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)), + (MOVID imm0_255:$shift)>; + +def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>; +def : Pat<(v2i32 immAllZerosV), (MOVID (i32 0))>; +def : Pat<(v4i16 immAllZerosV), (MOVID (i32 0))>; +def : Pat<(v8i8 immAllZerosV), (MOVID (i32 0))>; + +def : Pat<(v1i64 immAllOnesV), (MOVID (i32 255))>; +def : Pat<(v2i32 immAllOnesV), (MOVID (i32 255))>; +def : Pat<(v4i16 immAllOnesV), (MOVID (i32 255))>; +def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>; + +// EDIT byte mask: 2d + +// The movi_edit node has the immediate value already encoded, so we use +// a plain imm0_255 in the pattern +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128, + simdimmtype10, + "movi", ".2d", + [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; + +def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>; +def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>; +def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>; +def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>; + +def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>; +def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; +def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; +def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; + +// EDIT per word & halfword: 2s, 4h, 4s, & 8h +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; + +def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; + +def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; + +def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), + (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), + (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), + (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), + (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; + +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { +// EDIT per word: 2s & 4s with MSL shifter +def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", + [(set (v2i32 V64:$Rd), + (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; +def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", + [(set (v4i32 V128:$Rd), + (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; + +// Per byte: 8b & 16b +def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255, + "movi", ".8b", + [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; + +def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255, + "movi", ".16b", + [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; +} + +// AdvSIMD MVNI + +// EDIT per word & halfword: 2s, 4h, 4s, & 8h +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; + +def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; + +def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; + +def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), + (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), + (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), + (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), + (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; + +// EDIT per word: 2s & 4s with MSL shifter +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { +def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", + [(set (v2i32 V64:$Rd), + (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; +def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", + [(set (v4i32 V128:$Rd), + (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD indexed element +//---------------------------------------------------------------------------- + +let hasSideEffects = 0 in { + defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">; + defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">; +} + +// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the +// instruction expects the addend first, while the intrinsic expects it last. + +// On the other hand, there are quite a few valid combinatorial options due to +// the commutativity of multiplication and the fact that (-x) * y = x * (-y). +defm : SIMDFPIndexedTiedPatterns<"FMLA", + TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>; +defm : SIMDFPIndexedTiedPatterns<"FMLA", + TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>; + +defm : SIMDFPIndexedTiedPatterns<"FMLS", + TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; +defm : SIMDFPIndexedTiedPatterns<"FMLS", + TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; +defm : SIMDFPIndexedTiedPatterns<"FMLS", + TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; +defm : SIMDFPIndexedTiedPatterns<"FMLS", + TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; + +multiclass FMLSIndexedAfterNegPatterns { + // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit + // and DUP scalar. + def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), + (AArch64duplane32 (v4f32 (fneg V128:$Rm)), + VectorIndexS:$idx))), + (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), + (v2f32 (AArch64duplane32 + (v4f32 (insert_subvector undef, + (v2f32 (fneg V64:$Rm)), + (i32 0))), + VectorIndexS:$idx)))), + (FMLSv2i32_indexed V64:$Rd, V64:$Rn, + (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), + VectorIndexS:$idx)>; + def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), + (AArch64dup (f32 (fneg FPR32Op:$Rm))))), + (FMLSv2i32_indexed V64:$Rd, V64:$Rn, + (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; + + // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit + // and DUP scalar. + def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), + (AArch64duplane32 (v4f32 (fneg V128:$Rm)), + VectorIndexS:$idx))), + (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, + VectorIndexS:$idx)>; + def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), + (v4f32 (AArch64duplane32 + (v4f32 (insert_subvector undef, + (v2f32 (fneg V64:$Rm)), + (i32 0))), + VectorIndexS:$idx)))), + (FMLSv4i32_indexed V128:$Rd, V128:$Rn, + (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), + VectorIndexS:$idx)>; + def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), + (AArch64dup (f32 (fneg FPR32Op:$Rm))))), + (FMLSv4i32_indexed V128:$Rd, V128:$Rn, + (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; + + // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar + // (DUPLANE from 64-bit would be trivial). + def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), + (AArch64duplane64 (v2f64 (fneg V128:$Rm)), + VectorIndexD:$idx))), + (FMLSv2i64_indexed + V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), + (AArch64dup (f64 (fneg FPR64Op:$Rm))))), + (FMLSv2i64_indexed V128:$Rd, V128:$Rn, + (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; + + // 2 variants for 32-bit scalar version: extract from .2s or from .4s + def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), + (vector_extract (v4f32 (fneg V128:$Rm)), + VectorIndexS:$idx))), + (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, + V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), + (vector_extract (v4f32 (insert_subvector undef, + (v2f32 (fneg V64:$Rm)), + (i32 0))), + VectorIndexS:$idx))), + (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, + (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; + + // 1 variant for 64-bit scalar version: extract from .1d or from .2d + def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), + (vector_extract (v2f64 (fneg V128:$Rm)), + VectorIndexS:$idx))), + (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn, + V128:$Rm, VectorIndexS:$idx)>; +} + +defm : FMLSIndexedAfterNegPatterns< + TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; +defm : FMLSIndexedAfterNegPatterns< + TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >; + +defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; +defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", fmul>; + +def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), + (FMULv2i32_indexed V64:$Rn, + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), + (i64 0))>; +def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), + (FMULv4i32_indexed V128:$Rn, + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), + (i64 0))>; +def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), + (FMULv2i64_indexed V128:$Rn, + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), + (i64 0))>; + +defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; +defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; +defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", + TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>; +defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", + TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>; +defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; +defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; +defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", + TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; +defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", + int_aarch64_neon_smull>; +defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", + int_aarch64_neon_sqadd>; +defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", + int_aarch64_neon_sqsub>; +defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", + int_aarch64_neon_sqadd>; +defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", + int_aarch64_neon_sqsub>; +defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; +defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; +defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", + TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; +defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", + int_aarch64_neon_umull>; + +// A scalar sqdmull with the second operand being a vector lane can be +// handled directly with the indexed instruction encoding. +def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), + (vector_extract (v4i32 V128:$Vm), + VectorIndexS:$idx)), + (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; + +//---------------------------------------------------------------------------- +// AdvSIMD scalar shift instructions +//---------------------------------------------------------------------------- +defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">; +defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">; +defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">; +defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">; +// Codegen patterns for the above. We don't put these directly on the +// instructions because TableGen's type inference can't handle the truth. +// Having the same base pattern for fp <--> int totally freaks it out. +def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), + (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; +def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), + (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; +def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), + (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), + (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), + vecshiftR64:$imm)), + (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), + vecshiftR64:$imm)), + (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), + (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; +def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), + (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), + vecshiftR64:$imm)), + (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), + (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), + vecshiftR64:$imm)), + (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), + (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; + +// Patterns for FP16 Instrinsics - requires reg copy to/from as i16s not supported. + +def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)), + (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; +def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)), + (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; +def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp + (and FPR32:$Rn, (i32 65535)), + vecshiftR16:$imm)), + (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; +def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)), + (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; +def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), + (UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>; +def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)), + (i32 (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), + (FCVTZSh FPR16:$Rn, vecshiftR32:$imm), + hsub))>; +def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)), + (i64 (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (FCVTZSh FPR16:$Rn, vecshiftR64:$imm), + hsub))>; +def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)), + (i32 (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), + (FCVTZUh FPR16:$Rn, vecshiftR32:$imm), + hsub))>; +def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)), + (i64 (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (FCVTZUh FPR16:$Rn, vecshiftR64:$imm), + hsub))>; + +defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; +defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; +defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", + int_aarch64_neon_sqrshrn>; +defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", + int_aarch64_neon_sqrshrun>; +defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; +defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; +defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", + int_aarch64_neon_sqshrn>; +defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", + int_aarch64_neon_sqshrun>; +defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; +defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; +defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", + TriOpFrag<(add node:$LHS, + (AArch64srshri node:$MHS, node:$RHS))>>; +defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>; +defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", + TriOpFrag<(add node:$LHS, + (AArch64vashr node:$MHS, node:$RHS))>>; +defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", + int_aarch64_neon_uqrshrn>; +defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; +defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", + int_aarch64_neon_uqshrn>; +defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; +defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", + TriOpFrag<(add node:$LHS, + (AArch64urshri node:$MHS, node:$RHS))>>; +defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>; +defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", + TriOpFrag<(add node:$LHS, + (AArch64vlshr node:$MHS, node:$RHS))>>; + +//---------------------------------------------------------------------------- +// AdvSIMD vector shift instructions +//---------------------------------------------------------------------------- +defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; +defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; +defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf", + int_aarch64_neon_vcvtfxs2fp>; +defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", + int_aarch64_neon_rshrn>; +defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; +defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", + BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; +defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>; +def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), + (i32 vecshiftL64:$imm))), + (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; +defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", + int_aarch64_neon_sqrshrn>; +defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", + int_aarch64_neon_sqrshrun>; +defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; +defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; +defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", + int_aarch64_neon_sqshrn>; +defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", + int_aarch64_neon_sqshrun>; +defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>; +def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), + (i32 vecshiftR64:$imm))), + (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; +defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>; +defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", + TriOpFrag<(add node:$LHS, + (AArch64srshri node:$MHS, node:$RHS))> >; +defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", + BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>; + +defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; +defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", + TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; +defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf", + int_aarch64_neon_vcvtfxu2fp>; +defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", + int_aarch64_neon_uqrshrn>; +defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; +defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", + int_aarch64_neon_uqshrn>; +defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>; +defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", + TriOpFrag<(add node:$LHS, + (AArch64urshri node:$MHS, node:$RHS))> >; +defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", + BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>; +defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; +defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", + TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; + +// SHRN patterns for when a logical right shift was used instead of arithmetic +// (the immediate guarantees no sign bits actually end up in the result so it +// doesn't matter). +def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), + (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; +def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), + (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; +def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), + (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; + +def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), + (trunc (AArch64vlshr (v8i16 V128:$Rn), + vecshiftR16Narrow:$imm)))), + (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR16Narrow:$imm)>; +def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), + (trunc (AArch64vlshr (v4i32 V128:$Rn), + vecshiftR32Narrow:$imm)))), + (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR32Narrow:$imm)>; +def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), + (trunc (AArch64vlshr (v2i64 V128:$Rn), + vecshiftR64Narrow:$imm)))), + (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR32Narrow:$imm)>; + +// Vector sign and zero extensions are implemented with SSHLL and USSHLL. +// Anyexts are implemented as zexts. +def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>; +def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; +def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; +def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>; +def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; +def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; +def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>; +def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; +def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; +// Also match an extend from the upper half of a 128 bit source register. +def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), + (USHLLv16i8_shift V128:$Rn, (i32 0))>; +def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), + (USHLLv16i8_shift V128:$Rn, (i32 0))>; +def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), + (SSHLLv16i8_shift V128:$Rn, (i32 0))>; +def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), + (USHLLv8i16_shift V128:$Rn, (i32 0))>; +def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), + (USHLLv8i16_shift V128:$Rn, (i32 0))>; +def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), + (SSHLLv8i16_shift V128:$Rn, (i32 0))>; +def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), + (USHLLv4i32_shift V128:$Rn, (i32 0))>; +def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), + (USHLLv4i32_shift V128:$Rn, (i32 0))>; +def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), + (SSHLLv4i32_shift V128:$Rn, (i32 0))>; + +// Vector shift sxtl aliases +def : InstAlias<"sxtl.8h $dst, $src1", + (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"sxtl $dst.8h, $src1.8b", + (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"sxtl.4s $dst, $src1", + (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"sxtl $dst.4s, $src1.4h", + (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"sxtl.2d $dst, $src1", + (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"sxtl $dst.2d, $src1.2s", + (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; + +// Vector shift sxtl2 aliases +def : InstAlias<"sxtl2.8h $dst, $src1", + (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"sxtl2 $dst.8h, $src1.16b", + (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"sxtl2.4s $dst, $src1", + (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"sxtl2 $dst.4s, $src1.8h", + (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"sxtl2.2d $dst, $src1", + (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"sxtl2 $dst.2d, $src1.4s", + (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; + +// Vector shift uxtl aliases +def : InstAlias<"uxtl.8h $dst, $src1", + (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"uxtl $dst.8h, $src1.8b", + (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"uxtl.4s $dst, $src1", + (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"uxtl $dst.4s, $src1.4h", + (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"uxtl.2d $dst, $src1", + (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"uxtl $dst.2d, $src1.2s", + (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; + +// Vector shift uxtl2 aliases +def : InstAlias<"uxtl2.8h $dst, $src1", + (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"uxtl2 $dst.8h, $src1.16b", + (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"uxtl2.4s $dst, $src1", + (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"uxtl2 $dst.4s, $src1.8h", + (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"uxtl2.2d $dst, $src1", + (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"uxtl2 $dst.2d, $src1.4s", + (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; + +// If an integer is about to be converted to a floating point value, +// just load it on the floating point unit. +// These patterns are more complex because floating point loads do not +// support sign extension. +// The sign extension has to be explicitly added and is only supported for +// one step: byte-to-half, half-to-word, word-to-doubleword. +// SCVTF GPR -> FPR is 9 cycles. +// SCVTF FPR -> FPR is 4 cyclces. +// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles. +// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR +// and still being faster. +// However, this is not good for code size. +// 8-bits -> float. 2 sizes step-up. +class SExtLoadi8CVTf32Pat + : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))), + (SCVTFv1i32 (f32 (EXTRACT_SUBREG + (SSHLLv4i16_shift + (f64 + (EXTRACT_SUBREG + (SSHLLv8i8_shift + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + INST, + bsub), + 0), + dsub)), + 0), + ssub)))>, + Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>; + +def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext), + (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>; +def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext), + (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>; +def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset), + (LDURBi GPR64sp:$Rn, simm9:$offset)>; + +// 16-bits -> float. 1 size step-up. +class SExtLoadi16CVTf32Pat + : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))), + (SCVTFv1i32 (f32 (EXTRACT_SUBREG + (SSHLLv4i16_shift + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + INST, + hsub), + 0), + ssub)))>, Requires<[NotForCodeSize]>; + +def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), + (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; +def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), + (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; +def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; +def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), + (LDURHi GPR64sp:$Rn, simm9:$offset)>; + +// 32-bits to 32-bits are handled in target specific dag combine: +// performIntToFpCombine. +// 64-bits integer to 32-bits floating point, not possible with +// SCVTF on floating point registers (both source and destination +// must have the same size). + +// Here are the patterns for 8, 16, 32, and 64-bits to double. +// 8-bits -> double. 3 size step-up: give up. +// 16-bits -> double. 2 size step. +class SExtLoadi16CVTf64Pat + : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))), + (SCVTFv1i64 (f64 (EXTRACT_SUBREG + (SSHLLv2i32_shift + (f64 + (EXTRACT_SUBREG + (SSHLLv4i16_shift + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + INST, + hsub), + 0), + dsub)), + 0), + dsub)))>, + Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>; + +def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), + (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; +def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), + (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; +def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; +def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), + (LDURHi GPR64sp:$Rn, simm9:$offset)>; +// 32-bits -> double. 1 size step-up. +class SExtLoadi32CVTf64Pat + : Pat <(f64 (sint_to_fp (i32 (load addrmode)))), + (SCVTFv1i64 (f64 (EXTRACT_SUBREG + (SSHLLv2i32_shift + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + INST, + ssub), + 0), + dsub)))>, Requires<[NotForCodeSize]>; + +def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), + (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; +def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext), + (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>; +def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), + (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>; +def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset), + (LDURSi GPR64sp:$Rn, simm9:$offset)>; + +// 64-bits -> double are handled in target specific dag combine: +// performIntToFpCombine. + + +//---------------------------------------------------------------------------- +// AdvSIMD Load-Store Structure +//---------------------------------------------------------------------------- +defm LD1 : SIMDLd1Multiple<"ld1">; +defm LD2 : SIMDLd2Multiple<"ld2">; +defm LD3 : SIMDLd3Multiple<"ld3">; +defm LD4 : SIMDLd4Multiple<"ld4">; + +defm ST1 : SIMDSt1Multiple<"st1">; +defm ST2 : SIMDSt2Multiple<"st2">; +defm ST3 : SIMDSt3Multiple<"st3">; +defm ST4 : SIMDSt4Multiple<"st4">; + +class Ld1Pat + : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>; + +def : Ld1Pat; +def : Ld1Pat; +def : Ld1Pat; +def : Ld1Pat; +def : Ld1Pat; +def : Ld1Pat; +def : Ld1Pat; +def : Ld1Pat; + +class St1Pat + : Pat<(store ty:$Vt, GPR64sp:$Rn), + (INST ty:$Vt, GPR64sp:$Rn)>; + +def : St1Pat; +def : St1Pat; +def : St1Pat; +def : St1Pat; +def : St1Pat; +def : St1Pat; +def : St1Pat; +def : St1Pat; + +//--- +// Single-element +//--- + +defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>; +defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>; +defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>; +defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>; +let mayLoad = 1, hasSideEffects = 0 in { +defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>; +defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>; +defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>; +defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>; +defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>; +defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>; +defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>; +defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>; +defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>; +defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>; +defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>; +defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>; +defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>; +defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>; +defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; +defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; +} + +def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), + (LD1Rv8b GPR64sp:$Rn)>; +def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), + (LD1Rv16b GPR64sp:$Rn)>; +def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), + (LD1Rv4h GPR64sp:$Rn)>; +def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), + (LD1Rv8h GPR64sp:$Rn)>; +def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), + (LD1Rv2s GPR64sp:$Rn)>; +def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), + (LD1Rv4s GPR64sp:$Rn)>; +def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), + (LD1Rv2d GPR64sp:$Rn)>; +def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), + (LD1Rv1d GPR64sp:$Rn)>; +// Grab the floating point version too +def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), + (LD1Rv2s GPR64sp:$Rn)>; +def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), + (LD1Rv4s GPR64sp:$Rn)>; +def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), + (LD1Rv2d GPR64sp:$Rn)>; +def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), + (LD1Rv1d GPR64sp:$Rn)>; +def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), + (LD1Rv4h GPR64sp:$Rn)>; +def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), + (LD1Rv8h GPR64sp:$Rn)>; + +class Ld1Lane128Pat + : Pat<(vector_insert (VTy VecListOne128:$Rd), + (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), + (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>; + +def : Ld1Lane128Pat; +def : Ld1Lane128Pat; +def : Ld1Lane128Pat; +def : Ld1Lane128Pat; +def : Ld1Lane128Pat; +def : Ld1Lane128Pat; +def : Ld1Lane128Pat; + +class Ld1Lane64Pat + : Pat<(vector_insert (VTy VecListOne64:$Rd), + (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), + (EXTRACT_SUBREG + (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), + VecIndex:$idx, GPR64sp:$Rn), + dsub)>; + +def : Ld1Lane64Pat; +def : Ld1Lane64Pat; +def : Ld1Lane64Pat; +def : Ld1Lane64Pat; +def : Ld1Lane64Pat; + + +defm LD1 : SIMDLdSt1SingleAliases<"ld1">; +defm LD2 : SIMDLdSt2SingleAliases<"ld2">; +defm LD3 : SIMDLdSt3SingleAliases<"ld3">; +defm LD4 : SIMDLdSt4SingleAliases<"ld4">; + +// Stores +defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; +defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; +defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; +defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; + +let AddedComplexity = 19 in +class St1Lane128Pat + : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), + GPR64sp:$Rn), + (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>; + +def : St1Lane128Pat; +def : St1Lane128Pat; +def : St1Lane128Pat; +def : St1Lane128Pat; +def : St1Lane128Pat; +def : St1Lane128Pat; +def : St1Lane128Pat; + +let AddedComplexity = 19 in +class St1Lane64Pat + : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), + GPR64sp:$Rn), + (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), + VecIndex:$idx, GPR64sp:$Rn)>; + +def : St1Lane64Pat; +def : St1Lane64Pat; +def : St1Lane64Pat; +def : St1Lane64Pat; +def : St1Lane64Pat; + +multiclass St1LanePost64Pat { + def : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), + GPR64sp:$Rn, offset), + (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), + VecIndex:$idx, GPR64sp:$Rn, XZR)>; + + def : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), + GPR64sp:$Rn, GPR64:$Rm), + (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), + VecIndex:$idx, GPR64sp:$Rn, $Rm)>; +} + +defm : St1LanePost64Pat; +defm : St1LanePost64Pat; +defm : St1LanePost64Pat; +defm : St1LanePost64Pat; +defm : St1LanePost64Pat; +defm : St1LanePost64Pat; +defm : St1LanePost64Pat; + +multiclass St1LanePost128Pat { + def : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), + GPR64sp:$Rn, offset), + (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>; + + def : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), + GPR64sp:$Rn, GPR64:$Rm), + (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>; +} + +defm : St1LanePost128Pat; +defm : St1LanePost128Pat; +defm : St1LanePost128Pat; +defm : St1LanePost128Pat; +defm : St1LanePost128Pat; +defm : St1LanePost128Pat; +defm : St1LanePost128Pat; + +let mayStore = 1, hasSideEffects = 0 in { +defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; +defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; +defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; +defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; +defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; +defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; +defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; +defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; +defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; +defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; +defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; +defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; +} + +defm ST1 : SIMDLdSt1SingleAliases<"st1">; +defm ST2 : SIMDLdSt2SingleAliases<"st2">; +defm ST3 : SIMDLdSt3SingleAliases<"st3">; +defm ST4 : SIMDLdSt4SingleAliases<"st4">; + +//---------------------------------------------------------------------------- +// Crypto extensions +//---------------------------------------------------------------------------- + +let Predicates = [HasAES] in { +def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>; +def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>; +def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>; +def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>; +} + +// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required +// for AES fusion on some CPUs. +let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { +def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, + Sched<[WriteV]>; +def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, + Sched<[WriteV]>; +} + +// Only use constrained versions of AES(I)MC instructions if they are paired with +// AESE/AESD. +def : Pat<(v16i8 (int_aarch64_crypto_aesmc + (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1), + (v16i8 V128:$src2))))), + (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1), + (v16i8 V128:$src2)))))>, + Requires<[HasFuseAES]>; + +def : Pat<(v16i8 (int_aarch64_crypto_aesimc + (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1), + (v16i8 V128:$src2))))), + (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1), + (v16i8 V128:$src2)))))>, + Requires<[HasFuseAES]>; + +let Predicates = [HasSHA2] in { +def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>; +def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>; +def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>; +def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>; +def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>; +def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>; +def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>; + +def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>; +def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>; +def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>; +} + +//---------------------------------------------------------------------------- +// Compiler-pseudos +//---------------------------------------------------------------------------- +// FIXME: Like for X86, these should go in their own separate .td file. + +def def32 : PatLeaf<(i32 GPR32:$src), [{ + return isDef32(*N); +}]>; + +// In the case of a 32-bit def that is known to implicitly zero-extend, +// we can use a SUBREG_TO_REG. +def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>; + +// For an anyext, we don't care what the high bits are, so we can perform an +// INSERT_SUBREF into an IMPLICIT_DEF. +def : Pat<(i64 (anyext GPR32:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; + +// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and +// then assert the extension has happened. +def : Pat<(i64 (zext GPR32:$src)), + (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; + +// To sign extend, we use a signed bitfield move instruction (SBFM) on the +// containing super-reg. +def : Pat<(i64 (sext GPR32:$src)), + (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; +def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>; +def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>; +def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>; +def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>; +def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; +def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; +def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; + +def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), + (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), + (i64 (i32shift_sext_i8 imm0_31:$imm)))>; +def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), + (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), + (i64 (i64shift_sext_i8 imm0_63:$imm)))>; + +def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), + (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), + (i64 (i32shift_sext_i16 imm0_31:$imm)))>; +def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), + (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), + (i64 (i64shift_sext_i16 imm0_63:$imm)))>; + +def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)), + (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), + (i64 (i64shift_a imm0_63:$imm)), + (i64 (i64shift_sext_i32 imm0_63:$imm)))>; + +// sra patterns have an AddedComplexity of 10, so make sure we have a higher +// AddedComplexity for the following patterns since we want to match sext + sra +// patterns before we attempt to match a single sra node. +let AddedComplexity = 20 in { +// We support all sext + sra combinations which preserve at least one bit of the +// original value which is to be sign extended. E.g. we support shifts up to +// bitwidth-1 bits. +def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), + (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; +def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), + (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; + +def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), + (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; +def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), + (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; + +def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), + (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), + (i64 imm0_31:$imm), 31)>; +} // AddedComplexity = 20 + +// To truncate, we can simply extract from a subregister. +def : Pat<(i32 (trunc GPR64sp:$src)), + (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; + +// __builtin_trap() uses the BRK instruction on AArch64. +def : Pat<(trap), (BRK 1)>; + +// Conversions within AdvSIMD types in the same register size are free. +// But because we need a consistent lane ordering, in big endian many +// conversions require one or more REV instructions. +// +// Consider a simple memory load followed by a bitconvert then a store. +// v0 = load v2i32 +// v1 = BITCAST v2i32 v0 to v4i16 +// store v4i16 v2 +// +// In big endian mode every memory access has an implicit byte swap. LDR and +// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that +// is, they treat the vector as a sequence of elements to be byte-swapped. +// The two pairs of instructions are fundamentally incompatible. We've decided +// to use LD1/ST1 only to simplify compiler implementation. +// +// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes +// the original code sequence: +// v0 = load v2i32 +// v1 = REV v2i32 (implicit) +// v2 = BITCAST v2i32 v1 to v4i16 +// v3 = REV v4i16 v2 (implicit) +// store v4i16 v3 +// +// But this is now broken - the value stored is different to the value loaded +// due to lane reordering. To fix this, on every BITCAST we must perform two +// other REVs: +// v0 = load v2i32 +// v1 = REV v2i32 (implicit) +// v2 = REV v2i32 +// v3 = BITCAST v2i32 v2 to v4i16 +// v4 = REV v4i16 +// v5 = REV v4i16 v4 (implicit) +// store v4i16 v5 +// +// This means an extra two instructions, but actually in most cases the two REV +// instructions can be combined into one. For example: +// (REV64_2s (REV64_4h X)) === (REV32_4h X) +// +// There is also no 128-bit REV instruction. This must be synthesized with an +// EXT instruction. +// +// Most bitconverts require some sort of conversion. The only exceptions are: +// a) Identity conversions - vNfX <-> vNiX +// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX +// + +// Natural vector casts (64 bit) +def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; + +def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; + +def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2f32 (AArch64NvCast (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; + +def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1f64 (AArch64NvCast (f64 FPR64:$src))), (v1f64 FPR64:$src)>; + +def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; + +// Natural vector casts (128 bit) +def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; + +def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v4f32 (AArch64NvCast (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; + +def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v4f32 (AArch64NvCast (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; + +def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; + +def : Pat<(v16i8 (AArch64NvCast (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; + +def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v4f32 (AArch64NvCast (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; + +def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +} +let Predicates = [IsBE] in { +def : Pat<(v8i8 (bitconvert GPR64:$Xn)), + (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; +def : Pat<(v4i16 (bitconvert GPR64:$Xn)), + (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; +def : Pat<(v2i32 (bitconvert GPR64:$Xn)), + (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; +def : Pat<(v4f16 (bitconvert GPR64:$Xn)), + (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; +def : Pat<(v2f32 (bitconvert GPR64:$Xn)), + (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; + +def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), + (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; +def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), + (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; +def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), + (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; +def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), + (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; +def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), + (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; +} +def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), + (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), + (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; + +def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), + (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>; +def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), + (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>; +def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), + (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), + (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>; +def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; + +let Predicates = [IsLE] in { +def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), + (v1i64 (REV64v2i32 FPR64:$src))>; +def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), + (v1i64 (REV64v4i16 FPR64:$src))>; +def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), + (v1i64 (REV64v8i8 FPR64:$src))>; +def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), + (v1i64 (REV64v4i16 FPR64:$src))>; +def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), + (v1i64 (REV64v2i32 FPR64:$src))>; +} +def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), + (v2i32 (REV64v2i32 FPR64:$src))>; +def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), + (v2i32 (REV32v4i16 FPR64:$src))>; +def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), + (v2i32 (REV32v8i8 FPR64:$src))>; +def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), + (v2i32 (REV64v2i32 FPR64:$src))>; +def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), + (v2i32 (REV64v2i32 FPR64:$src))>; +def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), + (v2i32 (REV32v4i16 FPR64:$src))>; +} +def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), + (v4i16 (REV64v4i16 FPR64:$src))>; +def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), + (v4i16 (REV32v4i16 FPR64:$src))>; +def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), + (v4i16 (REV16v8i8 FPR64:$src))>; +def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), + (v4i16 (REV64v4i16 FPR64:$src))>; +def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), + (v4i16 (REV32v4i16 FPR64:$src))>; +def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), + (v4i16 (REV64v4i16 FPR64:$src))>; +} +def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), + (v4f16 (REV64v4i16 FPR64:$src))>; +def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), + (v4f16 (REV32v4i16 FPR64:$src))>; +def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), + (v4f16 (REV16v8i8 FPR64:$src))>; +def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), + (v4f16 (REV64v4i16 FPR64:$src))>; +def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), + (v4f16 (REV32v4i16 FPR64:$src))>; +def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), + (v4f16 (REV64v4i16 FPR64:$src))>; +} +def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), + (v8i8 (REV64v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), + (v8i8 (REV32v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), + (v8i8 (REV16v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), + (v8i8 (REV64v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), + (v8i8 (REV32v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), + (v8i8 (REV64v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), + (v8i8 (REV16v8i8 FPR64:$src))>; +} + +let Predicates = [IsLE] in { +def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), + (f64 (REV64v2i32 FPR64:$src))>; +def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), + (f64 (REV64v4i16 FPR64:$src))>; +def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), + (f64 (REV64v2i32 FPR64:$src))>; +def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), + (f64 (REV64v8i8 FPR64:$src))>; +def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), + (f64 (REV64v4i16 FPR64:$src))>; +} +def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>; +def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>; +def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; +def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; +def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), + (v1f64 (REV64v2i32 FPR64:$src))>; +def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), + (v1f64 (REV64v4i16 FPR64:$src))>; +def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), + (v1f64 (REV64v8i8 FPR64:$src))>; +def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), + (v1f64 (REV64v2i32 FPR64:$src))>; +def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), + (v1f64 (REV64v4i16 FPR64:$src))>; +} +def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; +def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), + (v2f32 (REV64v2i32 FPR64:$src))>; +def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), + (v2f32 (REV32v4i16 FPR64:$src))>; +def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), + (v2f32 (REV32v8i8 FPR64:$src))>; +def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), + (v2f32 (REV64v2i32 FPR64:$src))>; +def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), + (v2f32 (REV64v2i32 FPR64:$src))>; +def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), + (v2f32 (REV32v4i16 FPR64:$src))>; +} +def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), + (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; +def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), + (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), + (REV64v4i32 FPR128:$src), (i32 8)))>; +def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), + (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), + (REV64v8i16 FPR128:$src), (i32 8)))>; +def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), + (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), + (REV64v8i16 FPR128:$src), (i32 8)))>; +def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), + (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; +def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), + (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), + (REV64v4i32 FPR128:$src), (i32 8)))>; +def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), + (f128 (EXTv16i8 (REV64v16i8 FPR128:$src), + (REV64v16i8 FPR128:$src), (i32 8)))>; +} + +let Predicates = [IsLE] in { +def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), + (v2f64 (EXTv16i8 FPR128:$src, + FPR128:$src, (i32 8)))>; +def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), + (v2f64 (REV64v4i32 FPR128:$src))>; +def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), + (v2f64 (REV64v8i16 FPR128:$src))>; +def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), + (v2f64 (REV64v8i16 FPR128:$src))>; +def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), + (v2f64 (REV64v16i8 FPR128:$src))>; +def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), + (v2f64 (REV64v4i32 FPR128:$src))>; +} +def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), + (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src), + (REV64v4i32 FPR128:$src), (i32 8)))>; +def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), + (v4f32 (REV32v8i16 FPR128:$src))>; +def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), + (v4f32 (REV32v8i16 FPR128:$src))>; +def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), + (v4f32 (REV32v16i8 FPR128:$src))>; +def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), + (v4f32 (REV64v4i32 FPR128:$src))>; +def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), + (v4f32 (REV64v4i32 FPR128:$src))>; +} +def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), + (v2i64 (EXTv16i8 FPR128:$src, + FPR128:$src, (i32 8)))>; +def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), + (v2i64 (REV64v4i32 FPR128:$src))>; +def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), + (v2i64 (REV64v8i16 FPR128:$src))>; +def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), + (v2i64 (REV64v16i8 FPR128:$src))>; +def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), + (v2i64 (REV64v4i32 FPR128:$src))>; +def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), + (v2i64 (REV64v8i16 FPR128:$src))>; +} +def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), + (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src), + (REV64v4i32 FPR128:$src), + (i32 8)))>; +def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), + (v4i32 (REV64v4i32 FPR128:$src))>; +def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), + (v4i32 (REV32v8i16 FPR128:$src))>; +def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), + (v4i32 (REV32v16i8 FPR128:$src))>; +def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), + (v4i32 (REV64v4i32 FPR128:$src))>; +def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), + (v4i32 (REV32v8i16 FPR128:$src))>; +} +def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), + (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src), + (REV64v8i16 FPR128:$src), + (i32 8)))>; +def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), + (v8i16 (REV64v8i16 FPR128:$src))>; +def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), + (v8i16 (REV32v8i16 FPR128:$src))>; +def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), + (v8i16 (REV16v16i8 FPR128:$src))>; +def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), + (v8i16 (REV64v8i16 FPR128:$src))>; +def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), + (v8i16 (REV32v8i16 FPR128:$src))>; +} +def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), + (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src), + (REV64v8i16 FPR128:$src), + (i32 8)))>; +def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), + (v8f16 (REV64v8i16 FPR128:$src))>; +def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), + (v8f16 (REV32v8i16 FPR128:$src))>; +def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), + (v8f16 (REV16v16i8 FPR128:$src))>; +def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), + (v8f16 (REV64v8i16 FPR128:$src))>; +def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), + (v8f16 (REV32v8i16 FPR128:$src))>; +} +def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), + (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src), + (REV64v16i8 FPR128:$src), + (i32 8)))>; +def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), + (v16i8 (REV64v16i8 FPR128:$src))>; +def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), + (v16i8 (REV32v16i8 FPR128:$src))>; +def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), + (v16i8 (REV16v16i8 FPR128:$src))>; +def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), + (v16i8 (REV64v16i8 FPR128:$src))>; +def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), + (v16i8 (REV32v16i8 FPR128:$src))>; +def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), + (v16i8 (REV16v16i8 FPR128:$src))>; +} + +def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; + +def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), + (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; +def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), + (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; +def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))), + (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; +def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), + (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; + +// A 64-bit subvector insert to the first 128-bit vector position +// is a subregister copy that needs no instruction. +multiclass InsertSubvectorUndef { + def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; +} + +defm : InsertSubvectorUndef; +defm : InsertSubvectorUndef; + +// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 +// or v2f32. +def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), + (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), + (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; +def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), + (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), + (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; + // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, + // so we match on v4f32 here, not v2f32. This will also catch adding + // the low two lanes of a true v4f32 vector. +def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), + (vector_extract (v4f32 FPR128:$Rn), (i64 1))), + (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; + +// Scalar 64-bit shifts in FPR64 registers. +def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), + (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), + (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), + (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), + (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; + +// Patterns for nontemporal/no-allocate stores. +// We have to resort to tricks to turn a single-input store into a store pair, +// because there is no single-input nontemporal store, only STNP. +let Predicates = [IsLE] in { +let AddedComplexity = 15 in { +class NTStore128Pat : + Pat<(nontemporalstore (VT FPR128:$Rt), + (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), + (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub), + (CPYi64 FPR128:$Rt, (i64 1)), + GPR64sp:$Rn, simm7s8:$offset)>; + +def : NTStore128Pat; +def : NTStore128Pat; +def : NTStore128Pat; +def : NTStore128Pat; + +class NTStore64Pat : + Pat<(nontemporalstore (VT FPR64:$Rt), + (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), + (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub), + (CPYi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), + GPR64sp:$Rn, simm7s4:$offset)>; + +// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64? +def : NTStore64Pat; +def : NTStore64Pat; +def : NTStore64Pat; +def : NTStore64Pat; +def : NTStore64Pat; + +def : Pat<(nontemporalstore GPR64:$Rt, + (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), + (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), + (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32), + GPR64sp:$Rn, simm7s4:$offset)>; +} // AddedComplexity=10 +} // Predicates = [IsLE] + +// Tail call return handling. These are all compiler pseudo-instructions, +// so no encoding information or anything like that. +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { + def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>, + Sched<[WriteBrReg]>; + def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>, + Sched<[WriteBrReg]>; +} + +def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), + (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>; +def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), + (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; +def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), + (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; + +include "AArch64InstrAtomics.td" +include "AArch64SVEInstrInfo.td" diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td b/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td new file mode 100644 index 000000000..eee584708 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td @@ -0,0 +1,20 @@ +//=- AArch64RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +/// General Purpose Registers: W, X. +def GPRRegBank : RegisterBank<"GPR", [GPR64all]>; + +/// Floating Point/Vector Registers: B, H, S, D, Q. +def FPRRegBank : RegisterBank<"FPR", [QQQQ]>; + +/// Conditional register: NZCV. +def CCRegBank : RegisterBank<"CC", [CCR]>; diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td b/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td new file mode 100644 index 000000000..bbf401b47 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td @@ -0,0 +1,1113 @@ +//=- AArch64RegisterInfo.td - Describe the AArch64 Registers -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + + +class AArch64Reg enc, string n, list subregs = [], + list altNames = []> + : Register { + let HWEncoding = enc; + let Namespace = "AArch64"; + let SubRegs = subregs; +} + +let Namespace = "AArch64" in { + def sub_32 : SubRegIndex<32>; + + def bsub : SubRegIndex<8>; + def hsub : SubRegIndex<16>; + def ssub : SubRegIndex<32>; + def dsub : SubRegIndex<32>; + def sube32 : SubRegIndex<32>; + def subo32 : SubRegIndex<32>; + def qhisub : SubRegIndex<64>; + def qsub : SubRegIndex<64>; + def sube64 : SubRegIndex<64>; + def subo64 : SubRegIndex<64>; + // SVE + def zsub : SubRegIndex<128>; + // Note: zsub_hi should never be used directly because it represents + // the scalable part of the SVE vector and cannot be manipulated as a + // subvector in the same way the lower 128bits can. + def zsub_hi : SubRegIndex<128>; + // Note: Code depends on these having consecutive numbers + def dsub0 : SubRegIndex<64>; + def dsub1 : SubRegIndex<64>; + def dsub2 : SubRegIndex<64>; + def dsub3 : SubRegIndex<64>; + // Note: Code depends on these having consecutive numbers + def qsub0 : SubRegIndex<128>; + def qsub1 : SubRegIndex<128>; + def qsub2 : SubRegIndex<128>; + def qsub3 : SubRegIndex<128>; +} + +let Namespace = "AArch64" in { + def vreg : RegAltNameIndex; + def vlist1 : RegAltNameIndex; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// +def W0 : AArch64Reg<0, "w0" >, DwarfRegNum<[0]>; +def W1 : AArch64Reg<1, "w1" >, DwarfRegNum<[1]>; +def W2 : AArch64Reg<2, "w2" >, DwarfRegNum<[2]>; +def W3 : AArch64Reg<3, "w3" >, DwarfRegNum<[3]>; +def W4 : AArch64Reg<4, "w4" >, DwarfRegNum<[4]>; +def W5 : AArch64Reg<5, "w5" >, DwarfRegNum<[5]>; +def W6 : AArch64Reg<6, "w6" >, DwarfRegNum<[6]>; +def W7 : AArch64Reg<7, "w7" >, DwarfRegNum<[7]>; +def W8 : AArch64Reg<8, "w8" >, DwarfRegNum<[8]>; +def W9 : AArch64Reg<9, "w9" >, DwarfRegNum<[9]>; +def W10 : AArch64Reg<10, "w10">, DwarfRegNum<[10]>; +def W11 : AArch64Reg<11, "w11">, DwarfRegNum<[11]>; +def W12 : AArch64Reg<12, "w12">, DwarfRegNum<[12]>; +def W13 : AArch64Reg<13, "w13">, DwarfRegNum<[13]>; +def W14 : AArch64Reg<14, "w14">, DwarfRegNum<[14]>; +def W15 : AArch64Reg<15, "w15">, DwarfRegNum<[15]>; +def W16 : AArch64Reg<16, "w16">, DwarfRegNum<[16]>; +def W17 : AArch64Reg<17, "w17">, DwarfRegNum<[17]>; +def W18 : AArch64Reg<18, "w18">, DwarfRegNum<[18]>; +def W19 : AArch64Reg<19, "w19">, DwarfRegNum<[19]>; +def W20 : AArch64Reg<20, "w20">, DwarfRegNum<[20]>; +def W21 : AArch64Reg<21, "w21">, DwarfRegNum<[21]>; +def W22 : AArch64Reg<22, "w22">, DwarfRegNum<[22]>; +def W23 : AArch64Reg<23, "w23">, DwarfRegNum<[23]>; +def W24 : AArch64Reg<24, "w24">, DwarfRegNum<[24]>; +def W25 : AArch64Reg<25, "w25">, DwarfRegNum<[25]>; +def W26 : AArch64Reg<26, "w26">, DwarfRegNum<[26]>; +def W27 : AArch64Reg<27, "w27">, DwarfRegNum<[27]>; +def W28 : AArch64Reg<28, "w28">, DwarfRegNum<[28]>; +def W29 : AArch64Reg<29, "w29">, DwarfRegNum<[29]>; +def W30 : AArch64Reg<30, "w30">, DwarfRegNum<[30]>; +def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>; +def WZR : AArch64Reg<31, "wzr">, DwarfRegAlias; + +let SubRegIndices = [sub_32] in { +def X0 : AArch64Reg<0, "x0", [W0]>, DwarfRegAlias; +def X1 : AArch64Reg<1, "x1", [W1]>, DwarfRegAlias; +def X2 : AArch64Reg<2, "x2", [W2]>, DwarfRegAlias; +def X3 : AArch64Reg<3, "x3", [W3]>, DwarfRegAlias; +def X4 : AArch64Reg<4, "x4", [W4]>, DwarfRegAlias; +def X5 : AArch64Reg<5, "x5", [W5]>, DwarfRegAlias; +def X6 : AArch64Reg<6, "x6", [W6]>, DwarfRegAlias; +def X7 : AArch64Reg<7, "x7", [W7]>, DwarfRegAlias; +def X8 : AArch64Reg<8, "x8", [W8]>, DwarfRegAlias; +def X9 : AArch64Reg<9, "x9", [W9]>, DwarfRegAlias; +def X10 : AArch64Reg<10, "x10", [W10]>, DwarfRegAlias; +def X11 : AArch64Reg<11, "x11", [W11]>, DwarfRegAlias; +def X12 : AArch64Reg<12, "x12", [W12]>, DwarfRegAlias; +def X13 : AArch64Reg<13, "x13", [W13]>, DwarfRegAlias; +def X14 : AArch64Reg<14, "x14", [W14]>, DwarfRegAlias; +def X15 : AArch64Reg<15, "x15", [W15]>, DwarfRegAlias; +def X16 : AArch64Reg<16, "x16", [W16]>, DwarfRegAlias; +def X17 : AArch64Reg<17, "x17", [W17]>, DwarfRegAlias; +def X18 : AArch64Reg<18, "x18", [W18]>, DwarfRegAlias; +def X19 : AArch64Reg<19, "x19", [W19]>, DwarfRegAlias; +def X20 : AArch64Reg<20, "x20", [W20]>, DwarfRegAlias; +def X21 : AArch64Reg<21, "x21", [W21]>, DwarfRegAlias; +def X22 : AArch64Reg<22, "x22", [W22]>, DwarfRegAlias; +def X23 : AArch64Reg<23, "x23", [W23]>, DwarfRegAlias; +def X24 : AArch64Reg<24, "x24", [W24]>, DwarfRegAlias; +def X25 : AArch64Reg<25, "x25", [W25]>, DwarfRegAlias; +def X26 : AArch64Reg<26, "x26", [W26]>, DwarfRegAlias; +def X27 : AArch64Reg<27, "x27", [W27]>, DwarfRegAlias; +def X28 : AArch64Reg<28, "x28", [W28]>, DwarfRegAlias; +def FP : AArch64Reg<29, "x29", [W29]>, DwarfRegAlias; +def LR : AArch64Reg<30, "x30", [W30]>, DwarfRegAlias; +def SP : AArch64Reg<31, "sp", [WSP]>, DwarfRegAlias; +def XZR : AArch64Reg<31, "xzr", [WZR]>, DwarfRegAlias; +} + +// Condition code register. +def NZCV : AArch64Reg<0, "nzcv">; + +// First fault status register +def FFR : AArch64Reg<0, "ffr">, DwarfRegNum<[47]>; + +// GPR register classes with the intersections of GPR32/GPR32sp and +// GPR64/GPR64sp for use by the coalescer. +def GPR32common : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 30)> { + let AltOrders = [(rotl GPR32common, 8)]; + let AltOrderSelect = [{ return 1; }]; +} +def GPR64common : RegisterClass<"AArch64", [i64], 64, + (add (sequence "X%u", 0, 28), FP, LR)> { + let AltOrders = [(rotl GPR64common, 8)]; + let AltOrderSelect = [{ return 1; }]; +} +// GPR register classes which exclude SP/WSP. +def GPR32 : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR)> { + let AltOrders = [(rotl GPR32, 8)]; + let AltOrderSelect = [{ return 1; }]; +} +def GPR64 : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR)> { + let AltOrders = [(rotl GPR64, 8)]; + let AltOrderSelect = [{ return 1; }]; +} + +// GPR register classes which include SP/WSP. +def GPR32sp : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WSP)> { + let AltOrders = [(rotl GPR32sp, 8)]; + let AltOrderSelect = [{ return 1; }]; +} +def GPR64sp : RegisterClass<"AArch64", [i64], 64, (add GPR64common, SP)> { + let AltOrders = [(rotl GPR64sp, 8)]; + let AltOrderSelect = [{ return 1; }]; +} + +def GPR32sponly : RegisterClass<"AArch64", [i32], 32, (add WSP)>; +def GPR64sponly : RegisterClass<"AArch64", [i64], 64, (add SP)>; + +def GPR64spPlus0Operand : AsmOperandClass { + let Name = "GPR64sp0"; + let RenderMethod = "addRegOperands"; + let PredicateMethod = "isGPR64"; + let ParserMethod = "tryParseGPR64sp0Operand"; +} + +def GPR64sp0 : RegisterOperand { + let ParserMatchClass = GPR64spPlus0Operand; +} + +// GPR32/GPR64 but with zero-register substitution enabled. +// TODO: Roll this out to GPR32/GPR64/GPR32all/GPR64all. +def GPR32z : RegisterOperand { + let GIZeroRegister = WZR; +} +def GPR64z : RegisterOperand { + let GIZeroRegister = XZR; +} + +// GPR register classes which include WZR/XZR AND SP/WSP. This is not a +// constraint used by any instructions, it is used as a common super-class. +def GPR32all : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR, WSP)>; +def GPR64all : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR, SP)>; + +// For tail calls, we can't use callee-saved registers, as they are restored +// to the saved value before the tail call, which would clobber a call address. +// This is for indirect tail calls to store the address of the destination. +def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X21, + X22, X23, X24, X25, X26, + X27, X28, FP, LR)>; + +// GPR register classes for post increment amount of vector load/store that +// has alternate printing when Rm=31 and prints a constant immediate value +// equal to the total number of bytes transferred. + +// FIXME: TableGen *should* be able to do these itself now. There appears to be +// a bug in counting how many operands a Post-indexed MCInst should have which +// means the aliases don't trigger. +def GPR64pi1 : RegisterOperand">; +def GPR64pi2 : RegisterOperand">; +def GPR64pi3 : RegisterOperand">; +def GPR64pi4 : RegisterOperand">; +def GPR64pi6 : RegisterOperand">; +def GPR64pi8 : RegisterOperand">; +def GPR64pi12 : RegisterOperand">; +def GPR64pi16 : RegisterOperand">; +def GPR64pi24 : RegisterOperand">; +def GPR64pi32 : RegisterOperand">; +def GPR64pi48 : RegisterOperand">; +def GPR64pi64 : RegisterOperand">; + +// Condition code regclass. +def CCR : RegisterClass<"AArch64", [i32], 32, (add NZCV)> { + let CopyCost = -1; // Don't allow copying of status registers. + + // CCR is not allocatable. + let isAllocatable = 0; +} + +//===----------------------------------------------------------------------===// +// Floating Point Scalar Registers +//===----------------------------------------------------------------------===// + +def B0 : AArch64Reg<0, "b0">, DwarfRegNum<[64]>; +def B1 : AArch64Reg<1, "b1">, DwarfRegNum<[65]>; +def B2 : AArch64Reg<2, "b2">, DwarfRegNum<[66]>; +def B3 : AArch64Reg<3, "b3">, DwarfRegNum<[67]>; +def B4 : AArch64Reg<4, "b4">, DwarfRegNum<[68]>; +def B5 : AArch64Reg<5, "b5">, DwarfRegNum<[69]>; +def B6 : AArch64Reg<6, "b6">, DwarfRegNum<[70]>; +def B7 : AArch64Reg<7, "b7">, DwarfRegNum<[71]>; +def B8 : AArch64Reg<8, "b8">, DwarfRegNum<[72]>; +def B9 : AArch64Reg<9, "b9">, DwarfRegNum<[73]>; +def B10 : AArch64Reg<10, "b10">, DwarfRegNum<[74]>; +def B11 : AArch64Reg<11, "b11">, DwarfRegNum<[75]>; +def B12 : AArch64Reg<12, "b12">, DwarfRegNum<[76]>; +def B13 : AArch64Reg<13, "b13">, DwarfRegNum<[77]>; +def B14 : AArch64Reg<14, "b14">, DwarfRegNum<[78]>; +def B15 : AArch64Reg<15, "b15">, DwarfRegNum<[79]>; +def B16 : AArch64Reg<16, "b16">, DwarfRegNum<[80]>; +def B17 : AArch64Reg<17, "b17">, DwarfRegNum<[81]>; +def B18 : AArch64Reg<18, "b18">, DwarfRegNum<[82]>; +def B19 : AArch64Reg<19, "b19">, DwarfRegNum<[83]>; +def B20 : AArch64Reg<20, "b20">, DwarfRegNum<[84]>; +def B21 : AArch64Reg<21, "b21">, DwarfRegNum<[85]>; +def B22 : AArch64Reg<22, "b22">, DwarfRegNum<[86]>; +def B23 : AArch64Reg<23, "b23">, DwarfRegNum<[87]>; +def B24 : AArch64Reg<24, "b24">, DwarfRegNum<[88]>; +def B25 : AArch64Reg<25, "b25">, DwarfRegNum<[89]>; +def B26 : AArch64Reg<26, "b26">, DwarfRegNum<[90]>; +def B27 : AArch64Reg<27, "b27">, DwarfRegNum<[91]>; +def B28 : AArch64Reg<28, "b28">, DwarfRegNum<[92]>; +def B29 : AArch64Reg<29, "b29">, DwarfRegNum<[93]>; +def B30 : AArch64Reg<30, "b30">, DwarfRegNum<[94]>; +def B31 : AArch64Reg<31, "b31">, DwarfRegNum<[95]>; + +let SubRegIndices = [bsub] in { +def H0 : AArch64Reg<0, "h0", [B0]>, DwarfRegAlias; +def H1 : AArch64Reg<1, "h1", [B1]>, DwarfRegAlias; +def H2 : AArch64Reg<2, "h2", [B2]>, DwarfRegAlias; +def H3 : AArch64Reg<3, "h3", [B3]>, DwarfRegAlias; +def H4 : AArch64Reg<4, "h4", [B4]>, DwarfRegAlias; +def H5 : AArch64Reg<5, "h5", [B5]>, DwarfRegAlias; +def H6 : AArch64Reg<6, "h6", [B6]>, DwarfRegAlias; +def H7 : AArch64Reg<7, "h7", [B7]>, DwarfRegAlias; +def H8 : AArch64Reg<8, "h8", [B8]>, DwarfRegAlias; +def H9 : AArch64Reg<9, "h9", [B9]>, DwarfRegAlias; +def H10 : AArch64Reg<10, "h10", [B10]>, DwarfRegAlias; +def H11 : AArch64Reg<11, "h11", [B11]>, DwarfRegAlias; +def H12 : AArch64Reg<12, "h12", [B12]>, DwarfRegAlias; +def H13 : AArch64Reg<13, "h13", [B13]>, DwarfRegAlias; +def H14 : AArch64Reg<14, "h14", [B14]>, DwarfRegAlias; +def H15 : AArch64Reg<15, "h15", [B15]>, DwarfRegAlias; +def H16 : AArch64Reg<16, "h16", [B16]>, DwarfRegAlias; +def H17 : AArch64Reg<17, "h17", [B17]>, DwarfRegAlias; +def H18 : AArch64Reg<18, "h18", [B18]>, DwarfRegAlias; +def H19 : AArch64Reg<19, "h19", [B19]>, DwarfRegAlias; +def H20 : AArch64Reg<20, "h20", [B20]>, DwarfRegAlias; +def H21 : AArch64Reg<21, "h21", [B21]>, DwarfRegAlias; +def H22 : AArch64Reg<22, "h22", [B22]>, DwarfRegAlias; +def H23 : AArch64Reg<23, "h23", [B23]>, DwarfRegAlias; +def H24 : AArch64Reg<24, "h24", [B24]>, DwarfRegAlias; +def H25 : AArch64Reg<25, "h25", [B25]>, DwarfRegAlias; +def H26 : AArch64Reg<26, "h26", [B26]>, DwarfRegAlias; +def H27 : AArch64Reg<27, "h27", [B27]>, DwarfRegAlias; +def H28 : AArch64Reg<28, "h28", [B28]>, DwarfRegAlias; +def H29 : AArch64Reg<29, "h29", [B29]>, DwarfRegAlias; +def H30 : AArch64Reg<30, "h30", [B30]>, DwarfRegAlias; +def H31 : AArch64Reg<31, "h31", [B31]>, DwarfRegAlias; +} + +let SubRegIndices = [hsub] in { +def S0 : AArch64Reg<0, "s0", [H0]>, DwarfRegAlias; +def S1 : AArch64Reg<1, "s1", [H1]>, DwarfRegAlias; +def S2 : AArch64Reg<2, "s2", [H2]>, DwarfRegAlias; +def S3 : AArch64Reg<3, "s3", [H3]>, DwarfRegAlias; +def S4 : AArch64Reg<4, "s4", [H4]>, DwarfRegAlias; +def S5 : AArch64Reg<5, "s5", [H5]>, DwarfRegAlias; +def S6 : AArch64Reg<6, "s6", [H6]>, DwarfRegAlias; +def S7 : AArch64Reg<7, "s7", [H7]>, DwarfRegAlias; +def S8 : AArch64Reg<8, "s8", [H8]>, DwarfRegAlias; +def S9 : AArch64Reg<9, "s9", [H9]>, DwarfRegAlias; +def S10 : AArch64Reg<10, "s10", [H10]>, DwarfRegAlias; +def S11 : AArch64Reg<11, "s11", [H11]>, DwarfRegAlias; +def S12 : AArch64Reg<12, "s12", [H12]>, DwarfRegAlias; +def S13 : AArch64Reg<13, "s13", [H13]>, DwarfRegAlias; +def S14 : AArch64Reg<14, "s14", [H14]>, DwarfRegAlias; +def S15 : AArch64Reg<15, "s15", [H15]>, DwarfRegAlias; +def S16 : AArch64Reg<16, "s16", [H16]>, DwarfRegAlias; +def S17 : AArch64Reg<17, "s17", [H17]>, DwarfRegAlias; +def S18 : AArch64Reg<18, "s18", [H18]>, DwarfRegAlias; +def S19 : AArch64Reg<19, "s19", [H19]>, DwarfRegAlias; +def S20 : AArch64Reg<20, "s20", [H20]>, DwarfRegAlias; +def S21 : AArch64Reg<21, "s21", [H21]>, DwarfRegAlias; +def S22 : AArch64Reg<22, "s22", [H22]>, DwarfRegAlias; +def S23 : AArch64Reg<23, "s23", [H23]>, DwarfRegAlias; +def S24 : AArch64Reg<24, "s24", [H24]>, DwarfRegAlias; +def S25 : AArch64Reg<25, "s25", [H25]>, DwarfRegAlias; +def S26 : AArch64Reg<26, "s26", [H26]>, DwarfRegAlias; +def S27 : AArch64Reg<27, "s27", [H27]>, DwarfRegAlias; +def S28 : AArch64Reg<28, "s28", [H28]>, DwarfRegAlias; +def S29 : AArch64Reg<29, "s29", [H29]>, DwarfRegAlias; +def S30 : AArch64Reg<30, "s30", [H30]>, DwarfRegAlias; +def S31 : AArch64Reg<31, "s31", [H31]>, DwarfRegAlias; +} + +let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in { +def D0 : AArch64Reg<0, "d0", [S0], ["v0", ""]>, DwarfRegAlias; +def D1 : AArch64Reg<1, "d1", [S1], ["v1", ""]>, DwarfRegAlias; +def D2 : AArch64Reg<2, "d2", [S2], ["v2", ""]>, DwarfRegAlias; +def D3 : AArch64Reg<3, "d3", [S3], ["v3", ""]>, DwarfRegAlias; +def D4 : AArch64Reg<4, "d4", [S4], ["v4", ""]>, DwarfRegAlias; +def D5 : AArch64Reg<5, "d5", [S5], ["v5", ""]>, DwarfRegAlias; +def D6 : AArch64Reg<6, "d6", [S6], ["v6", ""]>, DwarfRegAlias; +def D7 : AArch64Reg<7, "d7", [S7], ["v7", ""]>, DwarfRegAlias; +def D8 : AArch64Reg<8, "d8", [S8], ["v8", ""]>, DwarfRegAlias; +def D9 : AArch64Reg<9, "d9", [S9], ["v9", ""]>, DwarfRegAlias; +def D10 : AArch64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias; +def D11 : AArch64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias; +def D12 : AArch64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias; +def D13 : AArch64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias; +def D14 : AArch64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias; +def D15 : AArch64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias; +def D16 : AArch64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias; +def D17 : AArch64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias; +def D18 : AArch64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias; +def D19 : AArch64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias; +def D20 : AArch64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias; +def D21 : AArch64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias; +def D22 : AArch64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias; +def D23 : AArch64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias; +def D24 : AArch64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias; +def D25 : AArch64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias; +def D26 : AArch64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias; +def D27 : AArch64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias; +def D28 : AArch64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias; +def D29 : AArch64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias; +def D30 : AArch64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias; +def D31 : AArch64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias; +} + +let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in { +def Q0 : AArch64Reg<0, "q0", [D0], ["v0", ""]>, DwarfRegAlias; +def Q1 : AArch64Reg<1, "q1", [D1], ["v1", ""]>, DwarfRegAlias; +def Q2 : AArch64Reg<2, "q2", [D2], ["v2", ""]>, DwarfRegAlias; +def Q3 : AArch64Reg<3, "q3", [D3], ["v3", ""]>, DwarfRegAlias; +def Q4 : AArch64Reg<4, "q4", [D4], ["v4", ""]>, DwarfRegAlias; +def Q5 : AArch64Reg<5, "q5", [D5], ["v5", ""]>, DwarfRegAlias; +def Q6 : AArch64Reg<6, "q6", [D6], ["v6", ""]>, DwarfRegAlias; +def Q7 : AArch64Reg<7, "q7", [D7], ["v7", ""]>, DwarfRegAlias; +def Q8 : AArch64Reg<8, "q8", [D8], ["v8", ""]>, DwarfRegAlias; +def Q9 : AArch64Reg<9, "q9", [D9], ["v9", ""]>, DwarfRegAlias; +def Q10 : AArch64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias; +def Q11 : AArch64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias; +def Q12 : AArch64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias; +def Q13 : AArch64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias; +def Q14 : AArch64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias; +def Q15 : AArch64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias; +def Q16 : AArch64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias; +def Q17 : AArch64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias; +def Q18 : AArch64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias; +def Q19 : AArch64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias; +def Q20 : AArch64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias; +def Q21 : AArch64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias; +def Q22 : AArch64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias; +def Q23 : AArch64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias; +def Q24 : AArch64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias; +def Q25 : AArch64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias; +def Q26 : AArch64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias; +def Q27 : AArch64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias; +def Q28 : AArch64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias; +def Q29 : AArch64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias; +def Q30 : AArch64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias; +def Q31 : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias; +} + +def FPR8 : RegisterClass<"AArch64", [untyped], 8, (sequence "B%u", 0, 31)> { + let Size = 8; +} +def FPR16 : RegisterClass<"AArch64", [f16], 16, (sequence "H%u", 0, 31)> { + let Size = 16; +} +def FPR32 : RegisterClass<"AArch64", [f32, i32], 32,(sequence "S%u", 0, 31)>; +def FPR64 : RegisterClass<"AArch64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32, + v1i64, v4f16], + 64, (sequence "D%u", 0, 31)>; +// We don't (yet) have an f128 legal type, so don't use that here. We +// normalize 128-bit vectors to v2f64 for arg passing and such, so use +// that here. +def FPR128 : RegisterClass<"AArch64", + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128, + v8f16], + 128, (sequence "Q%u", 0, 31)>; + +// The lower 16 vector registers. Some instructions can only take registers +// in this range. +def FPR128_lo : RegisterClass<"AArch64", + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], + 128, (trunc FPR128, 16)>; + +// Pairs, triples, and quads of 64-bit vector registers. +def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>; +def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2], + [(rotl FPR64, 0), (rotl FPR64, 1), + (rotl FPR64, 2)]>; +def DSeqQuads : RegisterTuples<[dsub0, dsub1, dsub2, dsub3], + [(rotl FPR64, 0), (rotl FPR64, 1), + (rotl FPR64, 2), (rotl FPR64, 3)]>; +def DD : RegisterClass<"AArch64", [untyped], 64, (add DSeqPairs)> { + let Size = 128; +} +def DDD : RegisterClass<"AArch64", [untyped], 64, (add DSeqTriples)> { + let Size = 192; +} +def DDDD : RegisterClass<"AArch64", [untyped], 64, (add DSeqQuads)> { + let Size = 256; +} + +// Pairs, triples, and quads of 128-bit vector registers. +def QSeqPairs : RegisterTuples<[qsub0, qsub1], [(rotl FPR128, 0), (rotl FPR128, 1)]>; +def QSeqTriples : RegisterTuples<[qsub0, qsub1, qsub2], + [(rotl FPR128, 0), (rotl FPR128, 1), + (rotl FPR128, 2)]>; +def QSeqQuads : RegisterTuples<[qsub0, qsub1, qsub2, qsub3], + [(rotl FPR128, 0), (rotl FPR128, 1), + (rotl FPR128, 2), (rotl FPR128, 3)]>; +def QQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqPairs)> { + let Size = 256; +} +def QQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqTriples)> { + let Size = 384; +} +def QQQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqQuads)> { + let Size = 512; +} + + +// Vector operand versions of the FP registers. Alternate name printing and +// assmebler matching. +def VectorReg64AsmOperand : AsmOperandClass { + let Name = "VectorReg64"; + let PredicateMethod = "isNeonVectorReg"; +} +def VectorReg128AsmOperand : AsmOperandClass { + let Name = "VectorReg128"; + let PredicateMethod = "isNeonVectorReg"; +} + +def V64 : RegisterOperand { + let ParserMatchClass = VectorReg64AsmOperand; +} + +def V128 : RegisterOperand { + let ParserMatchClass = VectorReg128AsmOperand; +} + +def VectorRegLoAsmOperand : AsmOperandClass { + let Name = "VectorRegLo"; + let PredicateMethod = "isNeonVectorRegLo"; +} +def V128_lo : RegisterOperand { + let ParserMatchClass = VectorRegLoAsmOperand; +} + +class TypedVecListAsmOperand + : AsmOperandClass { + let Name = "TypedVectorList" # count # "_" # lanes # eltsize; + + let PredicateMethod + = "isTypedVectorList"; + let RenderMethod = "addVectorListOperands<" # vecty # ", " # count # ">"; +} + +class TypedVecListRegOperand + : RegisterOperand">; + +multiclass VectorList { + // With implicit types (probably on instruction instead). E.g. { v0, v1 } + def _64AsmOperand : AsmOperandClass { + let Name = NAME # "64"; + let PredicateMethod = "isImplicitlyTypedVectorList"; + let RenderMethod = "addVectorListOperands"; + } + + def "64" : RegisterOperand { + let ParserMatchClass = !cast(NAME # "_64AsmOperand"); + } + + def _128AsmOperand : AsmOperandClass { + let Name = NAME # "128"; + let PredicateMethod = "isImplicitlyTypedVectorList"; + let RenderMethod = "addVectorListOperands"; + } + + def "128" : RegisterOperand { + let ParserMatchClass = !cast(NAME # "_128AsmOperand"); + } + + // 64-bit register lists with explicit type. + + // { v0.8b, v1.8b } + def _8bAsmOperand : TypedVecListAsmOperand; + def "8b" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_8bAsmOperand"); + } + + // { v0.4h, v1.4h } + def _4hAsmOperand : TypedVecListAsmOperand; + def "4h" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_4hAsmOperand"); + } + + // { v0.2s, v1.2s } + def _2sAsmOperand : TypedVecListAsmOperand; + def "2s" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_2sAsmOperand"); + } + + // { v0.1d, v1.1d } + def _1dAsmOperand : TypedVecListAsmOperand; + def "1d" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_1dAsmOperand"); + } + + // 128-bit register lists with explicit type + + // { v0.16b, v1.16b } + def _16bAsmOperand : TypedVecListAsmOperand; + def "16b" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_16bAsmOperand"); + } + + // { v0.8h, v1.8h } + def _8hAsmOperand : TypedVecListAsmOperand; + def "8h" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_8hAsmOperand"); + } + + // { v0.4s, v1.4s } + def _4sAsmOperand : TypedVecListAsmOperand; + def "4s" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_4sAsmOperand"); + } + + // { v0.2d, v1.2d } + def _2dAsmOperand : TypedVecListAsmOperand; + def "2d" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_2dAsmOperand"); + } + + // { v0.b, v1.b } + def _bAsmOperand : TypedVecListAsmOperand; + def "b" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_bAsmOperand"); + } + + // { v0.h, v1.h } + def _hAsmOperand : TypedVecListAsmOperand; + def "h" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_hAsmOperand"); + } + + // { v0.s, v1.s } + def _sAsmOperand : TypedVecListAsmOperand; + def "s" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_sAsmOperand"); + } + + // { v0.d, v1.d } + def _dAsmOperand : TypedVecListAsmOperand; + def "d" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_dAsmOperand"); + } + + +} + +defm VecListOne : VectorList<1, FPR64, FPR128>; +defm VecListTwo : VectorList<2, DD, QQ>; +defm VecListThree : VectorList<3, DDD, QQQ>; +defm VecListFour : VectorList<4, DDDD, QQQQ>; + +class FPRAsmOperand : AsmOperandClass { + let Name = "FPRAsmOperand" # RC; + let PredicateMethod = "isGPR64"; + let RenderMethod = "addRegOperands"; +} + +// Register operand versions of the scalar FP registers. +def FPR8Op : RegisterOperand { + let ParserMatchClass = FPRAsmOperand<"FPR8">; +} + +def FPR16Op : RegisterOperand { + let ParserMatchClass = FPRAsmOperand<"FPR16">; +} + +def FPR32Op : RegisterOperand { + let ParserMatchClass = FPRAsmOperand<"FPR32">; +} + +def FPR64Op : RegisterOperand { + let ParserMatchClass = FPRAsmOperand<"FPR64">; +} + +def FPR128Op : RegisterOperand { + let ParserMatchClass = FPRAsmOperand<"FPR128">; +} + +//===----------------------------------------------------------------------===// +// ARMv8.1a atomic CASP register operands + + +def WSeqPairs : RegisterTuples<[sube32, subo32], + [(rotl GPR32, 0), (rotl GPR32, 1)]>; +def XSeqPairs : RegisterTuples<[sube64, subo64], + [(rotl GPR64, 0), (rotl GPR64, 1)]>; + +def WSeqPairsClass : RegisterClass<"AArch64", [untyped], 32, + (add WSeqPairs)>{ + let Size = 64; +} +def XSeqPairsClass : RegisterClass<"AArch64", [untyped], 64, + (add XSeqPairs)>{ + let Size = 128; +} + + +let RenderMethod = "addRegOperands", ParserMethod="tryParseGPRSeqPair" in { + def WSeqPairsAsmOperandClass : AsmOperandClass { let Name = "WSeqPair"; } + def XSeqPairsAsmOperandClass : AsmOperandClass { let Name = "XSeqPair"; } +} + +def WSeqPairClassOperand : + RegisterOperand"> { + let ParserMatchClass = WSeqPairsAsmOperandClass; +} +def XSeqPairClassOperand : + RegisterOperand"> { + let ParserMatchClass = XSeqPairsAsmOperandClass; +} + + +//===----- END: v8.1a atomic CASP register operands -----------------------===// + +// SVE predicate registers +def P0 : AArch64Reg<0, "p0">, DwarfRegNum<[48]>; +def P1 : AArch64Reg<1, "p1">, DwarfRegNum<[49]>; +def P2 : AArch64Reg<2, "p2">, DwarfRegNum<[50]>; +def P3 : AArch64Reg<3, "p3">, DwarfRegNum<[51]>; +def P4 : AArch64Reg<4, "p4">, DwarfRegNum<[52]>; +def P5 : AArch64Reg<5, "p5">, DwarfRegNum<[53]>; +def P6 : AArch64Reg<6, "p6">, DwarfRegNum<[54]>; +def P7 : AArch64Reg<7, "p7">, DwarfRegNum<[55]>; +def P8 : AArch64Reg<8, "p8">, DwarfRegNum<[56]>; +def P9 : AArch64Reg<9, "p9">, DwarfRegNum<[57]>; +def P10 : AArch64Reg<10, "p10">, DwarfRegNum<[58]>; +def P11 : AArch64Reg<11, "p11">, DwarfRegNum<[59]>; +def P12 : AArch64Reg<12, "p12">, DwarfRegNum<[60]>; +def P13 : AArch64Reg<13, "p13">, DwarfRegNum<[61]>; +def P14 : AArch64Reg<14, "p14">, DwarfRegNum<[62]>; +def P15 : AArch64Reg<15, "p15">, DwarfRegNum<[63]>; + +// The part of SVE registers that don't overlap Neon registers. +// These are only used as part of clobber lists. +def Z0_HI : AArch64Reg<0, "z0_hi">; +def Z1_HI : AArch64Reg<1, "z1_hi">; +def Z2_HI : AArch64Reg<2, "z2_hi">; +def Z3_HI : AArch64Reg<3, "z3_hi">; +def Z4_HI : AArch64Reg<4, "z4_hi">; +def Z5_HI : AArch64Reg<5, "z5_hi">; +def Z6_HI : AArch64Reg<6, "z6_hi">; +def Z7_HI : AArch64Reg<7, "z7_hi">; +def Z8_HI : AArch64Reg<8, "z8_hi">; +def Z9_HI : AArch64Reg<9, "z9_hi">; +def Z10_HI : AArch64Reg<10, "z10_hi">; +def Z11_HI : AArch64Reg<11, "z11_hi">; +def Z12_HI : AArch64Reg<12, "z12_hi">; +def Z13_HI : AArch64Reg<13, "z13_hi">; +def Z14_HI : AArch64Reg<14, "z14_hi">; +def Z15_HI : AArch64Reg<15, "z15_hi">; +def Z16_HI : AArch64Reg<16, "z16_hi">; +def Z17_HI : AArch64Reg<17, "z17_hi">; +def Z18_HI : AArch64Reg<18, "z18_hi">; +def Z19_HI : AArch64Reg<19, "z19_hi">; +def Z20_HI : AArch64Reg<20, "z20_hi">; +def Z21_HI : AArch64Reg<21, "z21_hi">; +def Z22_HI : AArch64Reg<22, "z22_hi">; +def Z23_HI : AArch64Reg<23, "z23_hi">; +def Z24_HI : AArch64Reg<24, "z24_hi">; +def Z25_HI : AArch64Reg<25, "z25_hi">; +def Z26_HI : AArch64Reg<26, "z26_hi">; +def Z27_HI : AArch64Reg<27, "z27_hi">; +def Z28_HI : AArch64Reg<28, "z28_hi">; +def Z29_HI : AArch64Reg<29, "z29_hi">; +def Z30_HI : AArch64Reg<30, "z30_hi">; +def Z31_HI : AArch64Reg<31, "z31_hi">; + +// SVE variable-size vector registers +let SubRegIndices = [zsub,zsub_hi] in { +def Z0 : AArch64Reg<0, "z0", [Q0, Z0_HI]>, DwarfRegNum<[96]>; +def Z1 : AArch64Reg<1, "z1", [Q1, Z1_HI]>, DwarfRegNum<[97]>; +def Z2 : AArch64Reg<2, "z2", [Q2, Z2_HI]>, DwarfRegNum<[98]>; +def Z3 : AArch64Reg<3, "z3", [Q3, Z3_HI]>, DwarfRegNum<[99]>; +def Z4 : AArch64Reg<4, "z4", [Q4, Z4_HI]>, DwarfRegNum<[100]>; +def Z5 : AArch64Reg<5, "z5", [Q5, Z5_HI]>, DwarfRegNum<[101]>; +def Z6 : AArch64Reg<6, "z6", [Q6, Z6_HI]>, DwarfRegNum<[102]>; +def Z7 : AArch64Reg<7, "z7", [Q7, Z7_HI]>, DwarfRegNum<[103]>; +def Z8 : AArch64Reg<8, "z8", [Q8, Z8_HI]>, DwarfRegNum<[104]>; +def Z9 : AArch64Reg<9, "z9", [Q9, Z9_HI]>, DwarfRegNum<[105]>; +def Z10 : AArch64Reg<10, "z10", [Q10, Z10_HI]>, DwarfRegNum<[106]>; +def Z11 : AArch64Reg<11, "z11", [Q11, Z11_HI]>, DwarfRegNum<[107]>; +def Z12 : AArch64Reg<12, "z12", [Q12, Z12_HI]>, DwarfRegNum<[108]>; +def Z13 : AArch64Reg<13, "z13", [Q13, Z13_HI]>, DwarfRegNum<[109]>; +def Z14 : AArch64Reg<14, "z14", [Q14, Z14_HI]>, DwarfRegNum<[110]>; +def Z15 : AArch64Reg<15, "z15", [Q15, Z15_HI]>, DwarfRegNum<[111]>; +def Z16 : AArch64Reg<16, "z16", [Q16, Z16_HI]>, DwarfRegNum<[112]>; +def Z17 : AArch64Reg<17, "z17", [Q17, Z17_HI]>, DwarfRegNum<[113]>; +def Z18 : AArch64Reg<18, "z18", [Q18, Z18_HI]>, DwarfRegNum<[114]>; +def Z19 : AArch64Reg<19, "z19", [Q19, Z19_HI]>, DwarfRegNum<[115]>; +def Z20 : AArch64Reg<20, "z20", [Q20, Z20_HI]>, DwarfRegNum<[116]>; +def Z21 : AArch64Reg<21, "z21", [Q21, Z21_HI]>, DwarfRegNum<[117]>; +def Z22 : AArch64Reg<22, "z22", [Q22, Z22_HI]>, DwarfRegNum<[118]>; +def Z23 : AArch64Reg<23, "z23", [Q23, Z23_HI]>, DwarfRegNum<[119]>; +def Z24 : AArch64Reg<24, "z24", [Q24, Z24_HI]>, DwarfRegNum<[120]>; +def Z25 : AArch64Reg<25, "z25", [Q25, Z25_HI]>, DwarfRegNum<[121]>; +def Z26 : AArch64Reg<26, "z26", [Q26, Z26_HI]>, DwarfRegNum<[122]>; +def Z27 : AArch64Reg<27, "z27", [Q27, Z27_HI]>, DwarfRegNum<[123]>; +def Z28 : AArch64Reg<28, "z28", [Q28, Z28_HI]>, DwarfRegNum<[124]>; +def Z29 : AArch64Reg<29, "z29", [Q29, Z29_HI]>, DwarfRegNum<[125]>; +def Z30 : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>; +def Z31 : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>; +} + +// Enum descibing the element size for destructive +// operations. +class ElementSizeEnum val> { + bits<3> Value = val; +} + +def ElementSizeNone : ElementSizeEnum<0>; +def ElementSizeB : ElementSizeEnum<1>; +def ElementSizeH : ElementSizeEnum<2>; +def ElementSizeS : ElementSizeEnum<3>; +def ElementSizeD : ElementSizeEnum<4>; +def ElementSizeQ : ElementSizeEnum<5>; // Unused + +class SVERegOp : RegisterOperand { + ElementSizeEnum ElementSize; + + let ElementSize = Size; + let PrintMethod = !if(!eq(Suffix, ""), + "printSVERegOp<>", + "printSVERegOp<'" # Suffix # "'>"); + let ParserMatchClass = C; +} + +class PPRRegOp : SVERegOp {} +class ZPRRegOp : SVERegOp {} + +//****************************************************************************** + +// SVE predicate register classes. +class PPRClass : RegisterClass< + "AArch64", + [ nxv16i1, nxv8i1, nxv4i1, nxv2i1 ], 16, + (sequence "P%u", 0, lastreg)> { + let Size = 16; +} + +def PPR : PPRClass<15>; +def PPR_3b : PPRClass<7>; // Restricted 3 bit SVE predicate register class. + +class PPRAsmOperand : AsmOperandClass { + let Name = "SVE" # name # "Reg"; + let PredicateMethod = "isSVEPredicateVectorRegOfWidth<" + # Width # ", " # "AArch64::" # RegClass # "RegClassID>"; + let DiagnosticType = "InvalidSVE" # name # "Reg"; + let RenderMethod = "addRegOperands"; + let ParserMethod = "tryParseSVEPredicateVector"; +} + +def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", "PPR", 0>; +def PPRAsmOp8 : PPRAsmOperand<"PredicateB", "PPR", 8>; +def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>; +def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>; +def PPRAsmOp64 : PPRAsmOperand<"PredicateD", "PPR", 64>; + +def PPRAny : PPRRegOp<"", PPRAsmOpAny, ElementSizeNone, PPR>; +def PPR8 : PPRRegOp<"b", PPRAsmOp8, ElementSizeB, PPR>; +def PPR16 : PPRRegOp<"h", PPRAsmOp16, ElementSizeH, PPR>; +def PPR32 : PPRRegOp<"s", PPRAsmOp32, ElementSizeS, PPR>; +def PPR64 : PPRRegOp<"d", PPRAsmOp64, ElementSizeD, PPR>; + +def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", 0>; +def PPRAsmOp3b8 : PPRAsmOperand<"Predicate3bB", "PPR_3b", 8>; +def PPRAsmOp3b16 : PPRAsmOperand<"Predicate3bH", "PPR_3b", 16>; +def PPRAsmOp3b32 : PPRAsmOperand<"Predicate3bS", "PPR_3b", 32>; +def PPRAsmOp3b64 : PPRAsmOperand<"Predicate3bD", "PPR_3b", 64>; + +def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, ElementSizeNone, PPR_3b>; +def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, ElementSizeB, PPR_3b>; +def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, ElementSizeH, PPR_3b>; +def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, ElementSizeS, PPR_3b>; +def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, ElementSizeD, PPR_3b>; + +//****************************************************************************** + +// SVE vector register class +def ZPR : RegisterClass<"AArch64", + [nxv16i8, nxv8i16, nxv4i32, nxv2i64, + nxv2f16, nxv4f16, nxv8f16, + nxv1f32, nxv2f32, nxv4f32, + nxv1f64, nxv2f64], + 128, (sequence "Z%u", 0, 31)> { + let Size = 128; +} + +// SVE restricted 4 bit scalable vector register class +def ZPR_4b : RegisterClass<"AArch64", + [nxv16i8, nxv8i16, nxv4i32, nxv2i64, + nxv2f16, nxv4f16, nxv8f16, + nxv1f32, nxv2f32, nxv4f32, + nxv1f64, nxv2f64], + 128, (sequence "Z%u", 0, 15)> { + let Size = 128; +} + +// SVE restricted 3 bit scalable vector register class +def ZPR_3b : RegisterClass<"AArch64", + [nxv16i8, nxv8i16, nxv4i32, nxv2i64, + nxv2f16, nxv4f16, nxv8f16, + nxv1f32, nxv2f32, nxv4f32, + nxv1f64, nxv2f64], + 128, (sequence "Z%u", 0, 7)> { + let Size = 128; +} + +class ZPRAsmOperand + : AsmOperandClass { + let Name = "SVE" # name # "Reg"; + let PredicateMethod = "isSVEDataVectorRegOfWidth<" + # Width # ", AArch64::ZPR" + # RegClassSuffix # "RegClassID>"; + let RenderMethod = "addRegOperands"; + let DiagnosticType = "InvalidZPR" # RegClassSuffix # Width; + let ParserMethod = "tryParseSVEDataVector"; +} + +def ZPRAsmOpAny : ZPRAsmOperand<"VectorAny", 0>; +def ZPRAsmOp8 : ZPRAsmOperand<"VectorB", 8>; +def ZPRAsmOp16 : ZPRAsmOperand<"VectorH", 16>; +def ZPRAsmOp32 : ZPRAsmOperand<"VectorS", 32>; +def ZPRAsmOp64 : ZPRAsmOperand<"VectorD", 64>; +def ZPRAsmOp128 : ZPRAsmOperand<"VectorQ", 128>; + +def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ElementSizeNone, ZPR>; +def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ElementSizeB, ZPR>; +def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ElementSizeH, ZPR>; +def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ElementSizeS, ZPR>; +def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ElementSizeD, ZPR>; +def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ElementSizeQ, ZPR>; + +def ZPRAsmOp3b8 : ZPRAsmOperand<"Vector3bB", 8, "_3b">; +def ZPRAsmOp3b16 : ZPRAsmOperand<"Vector3bH", 16, "_3b">; +def ZPRAsmOp3b32 : ZPRAsmOperand<"Vector3bS", 32, "_3b">; + +def ZPR3b8 : ZPRRegOp<"b", ZPRAsmOp3b8, ElementSizeB, ZPR_3b>; +def ZPR3b16 : ZPRRegOp<"h", ZPRAsmOp3b16, ElementSizeH, ZPR_3b>; +def ZPR3b32 : ZPRRegOp<"s", ZPRAsmOp3b32, ElementSizeS, ZPR_3b>; + +def ZPRAsmOp4b16 : ZPRAsmOperand<"Vector4bH", 16, "_4b">; +def ZPRAsmOp4b32 : ZPRAsmOperand<"Vector4bS", 32, "_4b">; +def ZPRAsmOp4b64 : ZPRAsmOperand<"Vector4bD", 64, "_4b">; + +def ZPR4b16 : ZPRRegOp<"h", ZPRAsmOp4b16, ElementSizeH, ZPR_4b>; +def ZPR4b32 : ZPRRegOp<"s", ZPRAsmOp4b32, ElementSizeS, ZPR_4b>; +def ZPR4b64 : ZPRRegOp<"d", ZPRAsmOp4b64, ElementSizeD, ZPR_4b>; + +class FPRasZPR : AsmOperandClass{ + let Name = "FPR" # Width # "asZPR"; + let PredicateMethod = "isFPRasZPR"; + let RenderMethod = "addFPRasZPRRegOperands<" # Width # ">"; +} + +class FPRasZPROperand : RegisterOperand { + let ParserMatchClass = FPRasZPR; + let PrintMethod = "printZPRasFPR<" # Width # ">"; +} + +def FPR8asZPR : FPRasZPROperand<8>; +def FPR16asZPR : FPRasZPROperand<16>; +def FPR32asZPR : FPRasZPROperand<32>; +def FPR64asZPR : FPRasZPROperand<64>; +def FPR128asZPR : FPRasZPROperand<128>; + +let Namespace = "AArch64" in { + def zsub0 : SubRegIndex<128, -1>; + def zsub1 : SubRegIndex<128, -1>; + def zsub2 : SubRegIndex<128, -1>; + def zsub3 : SubRegIndex<128, -1>; +} + +// Pairs, triples, and quads of SVE vector registers. +def ZSeqPairs : RegisterTuples<[zsub0, zsub1], [(rotl ZPR, 0), (rotl ZPR, 1)]>; +def ZSeqTriples : RegisterTuples<[zsub0, zsub1, zsub2], [(rotl ZPR, 0), (rotl ZPR, 1), (rotl ZPR, 2)]>; +def ZSeqQuads : RegisterTuples<[zsub0, zsub1, zsub2, zsub3], [(rotl ZPR, 0), (rotl ZPR, 1), (rotl ZPR, 2), (rotl ZPR, 3)]>; + +def ZPR2 : RegisterClass<"AArch64", [untyped], 128, (add ZSeqPairs)> { + let Size = 256; +} +def ZPR3 : RegisterClass<"AArch64", [untyped], 128, (add ZSeqTriples)> { + let Size = 384; +} +def ZPR4 : RegisterClass<"AArch64", [untyped], 128, (add ZSeqQuads)> { + let Size = 512; +} + +class ZPRVectorList : AsmOperandClass { + let Name = "SVEVectorList" # NumRegs # ElementWidth; + let ParserMethod = "tryParseVectorList"; + let PredicateMethod = + "isTypedVectorList"; + let RenderMethod = "addVectorListOperands"; +} + +def Z_b : RegisterOperand"> { + let ParserMatchClass = ZPRVectorList<8, 1>; +} + +def Z_h : RegisterOperand"> { + let ParserMatchClass = ZPRVectorList<16, 1>; +} + +def Z_s : RegisterOperand"> { + let ParserMatchClass = ZPRVectorList<32, 1>; +} + +def Z_d : RegisterOperand"> { + let ParserMatchClass = ZPRVectorList<64, 1>; +} + +def ZZ_b : RegisterOperand"> { + let ParserMatchClass = ZPRVectorList<8, 2>; +} + +def ZZ_h : RegisterOperand"> { + let ParserMatchClass = ZPRVectorList<16, 2>; +} + +def ZZ_s : RegisterOperand"> { + let ParserMatchClass = ZPRVectorList<32, 2>; +} + +def ZZ_d : RegisterOperand"> { + let ParserMatchClass = ZPRVectorList<64, 2>; +} + +def ZZZ_b : RegisterOperand"> { + let ParserMatchClass = ZPRVectorList<8, 3>; +} + +def ZZZ_h : RegisterOperand"> { + let ParserMatchClass = ZPRVectorList<16, 3>; +} + +def ZZZ_s : RegisterOperand"> { + let ParserMatchClass = ZPRVectorList<32, 3>; +} + +def ZZZ_d : RegisterOperand"> { + let ParserMatchClass = ZPRVectorList<64, 3>; +} + +def ZZZZ_b : RegisterOperand"> { + let ParserMatchClass = ZPRVectorList<8, 4>; +} + +def ZZZZ_h : RegisterOperand"> { + let ParserMatchClass = ZPRVectorList<16, 4>; +} + +def ZZZZ_s : RegisterOperand"> { + let ParserMatchClass = ZPRVectorList<32, 4>; +} + +def ZZZZ_d : RegisterOperand"> { + let ParserMatchClass = ZPRVectorList<64, 4>; +} + +class ZPRExtendAsmOperand : AsmOperandClass { + let Name = "ZPRExtend" # ShiftExtend # RegWidth # Scale + # !if(ScaleAlwaysSame, "Only", ""); + + let PredicateMethod = "isSVEDataVectorRegWithShiftExtend<" + # RegWidth # ", AArch64::ZPRRegClassID, " + # "AArch64_AM::" # ShiftExtend # ", " + # Scale # ", " + # !if(ScaleAlwaysSame, "true", "false") + # ">"; + let DiagnosticType = "InvalidZPR" # RegWidth # ShiftExtend # Scale; + let RenderMethod = "addRegOperands"; + let ParserMethod = "tryParseSVEDataVector"; +} + +class ZPRExtendRegisterOperand + : RegisterOperand { + let ParserMatchClass = + !cast("ZPR" # RegWidth # "AsmOpndExt" # Repr # Scale # Suffix); + let PrintMethod = "printRegWithShiftExtend<" + # !if(SignExtend, "true", "false") # ", " + # Scale # ", " + # !if(IsLSL, "'x'", "'w'") # ", " + # !if(!eq(RegWidth, 32), "'s'", "'d'") # ">"; +} + +foreach RegWidth = [32, 64] in { + // UXTW(8|16|32|64) + def ZPR#RegWidth#AsmOpndExtUXTW8Only : ZPRExtendAsmOperand<"UXTW", RegWidth, 8, 0b1>; + def ZPR#RegWidth#AsmOpndExtUXTW8 : ZPRExtendAsmOperand<"UXTW", RegWidth, 8>; + def ZPR#RegWidth#AsmOpndExtUXTW16 : ZPRExtendAsmOperand<"UXTW", RegWidth, 16>; + def ZPR#RegWidth#AsmOpndExtUXTW32 : ZPRExtendAsmOperand<"UXTW", RegWidth, 32>; + def ZPR#RegWidth#AsmOpndExtUXTW64 : ZPRExtendAsmOperand<"UXTW", RegWidth, 64>; + + def ZPR#RegWidth#ExtUXTW8Only : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 8, "Only">; + def ZPR#RegWidth#ExtUXTW8 : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 8>; + def ZPR#RegWidth#ExtUXTW16 : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 16>; + def ZPR#RegWidth#ExtUXTW32 : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 32>; + def ZPR#RegWidth#ExtUXTW64 : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 64>; + + // SXTW(8|16|32|64) + def ZPR#RegWidth#AsmOpndExtSXTW8Only : ZPRExtendAsmOperand<"SXTW", RegWidth, 8, 0b1>; + def ZPR#RegWidth#AsmOpndExtSXTW8 : ZPRExtendAsmOperand<"SXTW", RegWidth, 8>; + def ZPR#RegWidth#AsmOpndExtSXTW16 : ZPRExtendAsmOperand<"SXTW", RegWidth, 16>; + def ZPR#RegWidth#AsmOpndExtSXTW32 : ZPRExtendAsmOperand<"SXTW", RegWidth, 32>; + def ZPR#RegWidth#AsmOpndExtSXTW64 : ZPRExtendAsmOperand<"SXTW", RegWidth, 64>; + + def ZPR#RegWidth#ExtSXTW8Only : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 8, "Only">; + def ZPR#RegWidth#ExtSXTW8 : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 8>; + def ZPR#RegWidth#ExtSXTW16 : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 16>; + def ZPR#RegWidth#ExtSXTW32 : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 32>; + def ZPR#RegWidth#ExtSXTW64 : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 64>; + + // LSL(8|16|32|64) + def ZPR#RegWidth#AsmOpndExtLSL8 : ZPRExtendAsmOperand<"LSL", RegWidth, 8>; + def ZPR#RegWidth#AsmOpndExtLSL16 : ZPRExtendAsmOperand<"LSL", RegWidth, 16>; + def ZPR#RegWidth#AsmOpndExtLSL32 : ZPRExtendAsmOperand<"LSL", RegWidth, 32>; + def ZPR#RegWidth#AsmOpndExtLSL64 : ZPRExtendAsmOperand<"LSL", RegWidth, 64>; + def ZPR#RegWidth#ExtLSL8 : ZPRExtendRegisterOperand<0b0, 0b1, "LSL", RegWidth, 8>; + def ZPR#RegWidth#ExtLSL16 : ZPRExtendRegisterOperand<0b0, 0b1, "LSL", RegWidth, 16>; + def ZPR#RegWidth#ExtLSL32 : ZPRExtendRegisterOperand<0b0, 0b1, "LSL", RegWidth, 32>; + def ZPR#RegWidth#ExtLSL64 : ZPRExtendRegisterOperand<0b0, 0b1, "LSL", RegWidth, 64>; +} + +class GPR64ShiftExtendAsmOperand : AsmOperandClass { + let Name = AsmOperandName # Scale; + let PredicateMethod = "isGPR64WithShiftExtend"; + let DiagnosticType = "Invalid" # AsmOperandName # Scale; + let RenderMethod = "addRegOperands"; + let ParserMethod = "tryParseGPROperand"; +} + +class GPR64ExtendRegisterOperand : RegisterOperand{ + let ParserMatchClass = !cast(Name); + let PrintMethod = "printRegWithShiftExtend"; +} + +foreach Scale = [8, 16, 32, 64] in { + def GPR64shiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64shifted", Scale, "GPR64">; + def GPR64shifted # Scale : GPR64ExtendRegisterOperand<"GPR64shiftedAsmOpnd" # Scale, Scale, GPR64>; + + def GPR64NoXZRshiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64NoXZRshifted", Scale, "GPR64common">; + def GPR64NoXZRshifted # Scale : GPR64ExtendRegisterOperand<"GPR64NoXZRshiftedAsmOpnd" # Scale, Scale, GPR64common>; +} diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td new file mode 100644 index 000000000..0fde68011 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td @@ -0,0 +1,1024 @@ +//=- AArch64SVEInstrInfo.td - AArch64 SVE Instructions -*- tablegen -*-----=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AArch64 Scalable Vector Extension (SVE) Instruction definitions. +// +//===----------------------------------------------------------------------===// + +let Predicates = [HasSVE] in { + + def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr">; + def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">; + def RDFFR_P : sve_int_rdffr_unpred<"rdffr">; + def SETFFR : sve_int_setffr<"setffr">; + def WRFFR : sve_int_wrffr<"wrffr">; + + defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add">; + defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub">; + defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd">; + defm UQADD_ZZZ : sve_int_bin_cons_arit_0<0b101, "uqadd">; + defm SQSUB_ZZZ : sve_int_bin_cons_arit_0<0b110, "sqsub">; + defm UQSUB_ZZZ : sve_int_bin_cons_arit_0<0b111, "uqsub">; + + def AND_ZZZ : sve_int_bin_cons_log<0b00, "and">; + def ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr">; + def EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor">; + def BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic">; + + defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add">; + defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub">; + defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr">; + + defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr">; + defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor">; + defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and">; + defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic">; + + defm ADD_ZI : sve_int_arith_imm0<0b000, "add">; + defm SUB_ZI : sve_int_arith_imm0<0b001, "sub">; + defm SUBR_ZI : sve_int_arith_imm0<0b011, "subr">; + defm SQADD_ZI : sve_int_arith_imm0<0b100, "sqadd">; + defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd">; + defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub">; + defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub">; + + defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad">; + defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb">; + defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla">; + defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls">; + + // SVE predicated integer reductions. + defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv">; + defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv">; + defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv">; + defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv">; + defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv">; + defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv">; + defm ORV_VPZ : sve_int_reduce_2<0b000, "orv">; + defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv">; + defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv">; + + defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn">; + defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon">; + defm AND_ZI : sve_int_log_imm<0b10, "and", "bic">; + + defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", simm8>; + defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", simm8>; + defm UMAX_ZI : sve_int_arith_imm1<0b01, "umax", imm0_255>; + defm UMIN_ZI : sve_int_arith_imm1<0b11, "umin", imm0_255>; + + defm MUL_ZI : sve_int_arith_imm2<"mul">; + defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul">; + defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh">; + defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh">; + + defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv">; + defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv">; + defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr">; + defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr">; + + defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot">; + defm UDOT_ZZZ : sve_intx_dot<0b1, "udot">; + + defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot">; + defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot">; + + defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb">; + defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb">; + defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth">; + defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth">; + defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw">; + defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw">; + defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs">; + defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg">; + + defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls">; + defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz">; + defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt">; + defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot">; + defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not">; + defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs">; + defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg">; + + defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax">; + defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax">; + defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin">; + defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin">; + defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd">; + defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd">; + + defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe">; + defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte">; + + defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", sve_fpimm_half_one>; + defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", sve_fpimm_half_one>; + defm FMUL_ZPmI : sve_fp_2op_i_p_zds<0b010, "fmul", sve_fpimm_half_two>; + defm FSUBR_ZPmI : sve_fp_2op_i_p_zds<0b011, "fsubr", sve_fpimm_half_one>; + defm FMAXNM_ZPmI : sve_fp_2op_i_p_zds<0b100, "fmaxnm", sve_fpimm_zero_one>; + defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", sve_fpimm_zero_one>; + defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", sve_fpimm_zero_one>; + defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", sve_fpimm_zero_one>; + + defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd">; + defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub">; + defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul">; + defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr">; + defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm">; + defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm">; + defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax">; + defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin">; + defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd">; + defm FSCALE_ZPmZ : sve_fp_2op_p_zds<0b1001, "fscale">; + defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx">; + defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr">; + defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv">; + + defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd">; + defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub">; + defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul">; + defm FTSMUL_ZZZ : sve_fp_3op_u_zd<0b011, "ftsmul">; + defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps">; + defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts">; + + defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel">; + + defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd">; + defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla">; + + defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla">; + defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls">; + defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla">; + defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls">; + + defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad">; + defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb">; + defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad">; + defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb">; + + defm FTMAD_ZZI : sve_fp_ftmad<"ftmad">; + + defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla">; + defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls">; + + defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla">; + defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul">; + + // SVE floating point reductions. + defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda">; + defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv">; + defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv">; + defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv">; + defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv">; + defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv">; + + // Splat immediate (unpredicated) + defm DUP_ZI : sve_int_dup_imm<"dup">; + defm FDUP_ZI : sve_int_dup_fpimm<"fdup">; + defm DUPM_ZI : sve_int_dup_mask_imm<"dupm">; + + // Splat immediate (predicated) + defm CPY_ZPmI : sve_int_dup_imm_pred_merge<"cpy">; + defm CPY_ZPzI : sve_int_dup_imm_pred_zero<"cpy">; + defm FCPY_ZPmI : sve_int_dup_fpimm_pred<"fcpy">; + + // Splat scalar register (unpredicated, GPR or vector + element index) + defm DUP_ZR : sve_int_perm_dup_r<"dup">; + defm DUP_ZZI : sve_int_perm_dup_i<"dup">; + + // Splat scalar register (predicated) + defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy">; + defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy">; + + // Select elements from either vector (predicated) + defm SEL_ZPZZ : sve_int_sel_vvv<"sel">; + + defm SPLICE_ZPZ : sve_int_perm_splice<"splice">; + defm COMPACT_ZPZ : sve_int_perm_compact<"compact">; + defm INSR_ZR : sve_int_perm_insrs<"insr">; + defm INSR_ZV : sve_int_perm_insrv<"insr">; + def EXT_ZZI : sve_int_perm_extract_i<"ext">; + + defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit">; + defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb">; + defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh">; + defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw">; + + defm REV_PP : sve_int_perm_reverse_p<"rev">; + defm REV_ZZ : sve_int_perm_reverse_z<"rev">; + + defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo">; + defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi">; + defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo">; + defm UUNPKHI_ZZ : sve_int_perm_unpk<0b11, "uunpkhi">; + + def PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo">; + def PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi">; + + defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">; + defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">; + def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>; + def FEXPA_ZZ_H : sve_int_bin_cons_misc_0_c<0b01000000, "fexpa", ZPR16>; + def FEXPA_ZZ_S : sve_int_bin_cons_misc_0_c<0b10000000, "fexpa", ZPR32>; + def FEXPA_ZZ_D : sve_int_bin_cons_misc_0_c<0b11000000, "fexpa", ZPR64>; + + def BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa">; + def BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas">; + def BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb">; + def BRKPBS_PPzPP : sve_int_brkp<0b11, "brkpbs">; + + def BRKN_PPzP : sve_int_brkn<0b0, "brkn">; + def BRKNS_PPzP : sve_int_brkn<0b1, "brkns">; + + defm BRKA_PPzP : sve_int_break_z<0b000, "brka">; + defm BRKA_PPmP : sve_int_break_m<0b001, "brka">; + defm BRKAS_PPzP : sve_int_break_z<0b010, "brkas">; + defm BRKB_PPzP : sve_int_break_z<0b100, "brkb">; + defm BRKB_PPmP : sve_int_break_m<0b101, "brkb">; + defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs">; + + def PTEST_PP : sve_int_ptest<0b010000, "ptest">; + def PFALSE : sve_int_pfalse<0b000000, "pfalse">; + defm PFIRST : sve_int_pfirst<0b00000, "pfirst">; + defm PNEXT : sve_int_pnext<0b00110, "pnext">; + + def AND_PPzPP : sve_int_pred_log<0b0000, "and">; + def BIC_PPzPP : sve_int_pred_log<0b0001, "bic">; + def EOR_PPzPP : sve_int_pred_log<0b0010, "eor">; + def SEL_PPPP : sve_int_pred_log<0b0011, "sel">; + def ANDS_PPzPP : sve_int_pred_log<0b0100, "ands">; + def BICS_PPzPP : sve_int_pred_log<0b0101, "bics">; + def EORS_PPzPP : sve_int_pred_log<0b0110, "eors">; + def ORR_PPzPP : sve_int_pred_log<0b1000, "orr">; + def ORN_PPzPP : sve_int_pred_log<0b1001, "orn">; + def NOR_PPzPP : sve_int_pred_log<0b1010, "nor">; + def NAND_PPzPP : sve_int_pred_log<0b1011, "nand">; + def ORRS_PPzPP : sve_int_pred_log<0b1100, "orrs">; + def ORNS_PPzPP : sve_int_pred_log<0b1101, "orns">; + def NORS_PPzPP : sve_int_pred_log<0b1110, "nors">; + def NANDS_PPzPP : sve_int_pred_log<0b1111, "nands">; + + defm CLASTA_RPZ : sve_int_perm_clast_rz<0, "clasta">; + defm CLASTB_RPZ : sve_int_perm_clast_rz<1, "clastb">; + defm CLASTA_VPZ : sve_int_perm_clast_vz<0, "clasta">; + defm CLASTB_VPZ : sve_int_perm_clast_vz<1, "clastb">; + defm CLASTA_ZPZ : sve_int_perm_clast_zz<0, "clasta">; + defm CLASTB_ZPZ : sve_int_perm_clast_zz<1, "clastb">; + + defm LASTA_RPZ : sve_int_perm_last_r<0, "lasta">; + defm LASTB_RPZ : sve_int_perm_last_r<1, "lastb">; + defm LASTA_VPZ : sve_int_perm_last_v<0, "lasta">; + defm LASTB_VPZ : sve_int_perm_last_v<1, "lastb">; + + // continuous load with reg+immediate + defm LD1B_IMM : sve_mem_cld_si<0b0000, "ld1b", Z_b, ZPR8>; + defm LD1B_H_IMM : sve_mem_cld_si<0b0001, "ld1b", Z_h, ZPR16>; + defm LD1B_S_IMM : sve_mem_cld_si<0b0010, "ld1b", Z_s, ZPR32>; + defm LD1B_D_IMM : sve_mem_cld_si<0b0011, "ld1b", Z_d, ZPR64>; + defm LD1SW_D_IMM : sve_mem_cld_si<0b0100, "ld1sw", Z_d, ZPR64>; + defm LD1H_IMM : sve_mem_cld_si<0b0101, "ld1h", Z_h, ZPR16>; + defm LD1H_S_IMM : sve_mem_cld_si<0b0110, "ld1h", Z_s, ZPR32>; + defm LD1H_D_IMM : sve_mem_cld_si<0b0111, "ld1h", Z_d, ZPR64>; + defm LD1SH_D_IMM : sve_mem_cld_si<0b1000, "ld1sh", Z_d, ZPR64>; + defm LD1SH_S_IMM : sve_mem_cld_si<0b1001, "ld1sh", Z_s, ZPR32>; + defm LD1W_IMM : sve_mem_cld_si<0b1010, "ld1w", Z_s, ZPR32>; + defm LD1W_D_IMM : sve_mem_cld_si<0b1011, "ld1w", Z_d, ZPR64>; + defm LD1SB_D_IMM : sve_mem_cld_si<0b1100, "ld1sb", Z_d, ZPR64>; + defm LD1SB_S_IMM : sve_mem_cld_si<0b1101, "ld1sb", Z_s, ZPR32>; + defm LD1SB_H_IMM : sve_mem_cld_si<0b1110, "ld1sb", Z_h, ZPR16>; + defm LD1D_IMM : sve_mem_cld_si<0b1111, "ld1d", Z_d, ZPR64>; + + // LD1R loads (splat scalar to vector) + defm LD1RB_IMM : sve_mem_ld_dup<0b00, 0b00, "ld1rb", Z_b, ZPR8, uimm6s1>; + defm LD1RB_H_IMM : sve_mem_ld_dup<0b00, 0b01, "ld1rb", Z_h, ZPR16, uimm6s1>; + defm LD1RB_S_IMM : sve_mem_ld_dup<0b00, 0b10, "ld1rb", Z_s, ZPR32, uimm6s1>; + defm LD1RB_D_IMM : sve_mem_ld_dup<0b00, 0b11, "ld1rb", Z_d, ZPR64, uimm6s1>; + defm LD1RSW_IMM : sve_mem_ld_dup<0b01, 0b00, "ld1rsw", Z_d, ZPR64, uimm6s4>; + defm LD1RH_IMM : sve_mem_ld_dup<0b01, 0b01, "ld1rh", Z_h, ZPR16, uimm6s2>; + defm LD1RH_S_IMM : sve_mem_ld_dup<0b01, 0b10, "ld1rh", Z_s, ZPR32, uimm6s2>; + defm LD1RH_D_IMM : sve_mem_ld_dup<0b01, 0b11, "ld1rh", Z_d, ZPR64, uimm6s2>; + defm LD1RSH_D_IMM : sve_mem_ld_dup<0b10, 0b00, "ld1rsh", Z_d, ZPR64, uimm6s2>; + defm LD1RSH_S_IMM : sve_mem_ld_dup<0b10, 0b01, "ld1rsh", Z_s, ZPR32, uimm6s2>; + defm LD1RW_IMM : sve_mem_ld_dup<0b10, 0b10, "ld1rw", Z_s, ZPR32, uimm6s4>; + defm LD1RW_D_IMM : sve_mem_ld_dup<0b10, 0b11, "ld1rw", Z_d, ZPR64, uimm6s4>; + defm LD1RSB_D_IMM : sve_mem_ld_dup<0b11, 0b00, "ld1rsb", Z_d, ZPR64, uimm6s1>; + defm LD1RSB_S_IMM : sve_mem_ld_dup<0b11, 0b01, "ld1rsb", Z_s, ZPR32, uimm6s1>; + defm LD1RSB_H_IMM : sve_mem_ld_dup<0b11, 0b10, "ld1rsb", Z_h, ZPR16, uimm6s1>; + defm LD1RD_IMM : sve_mem_ld_dup<0b11, 0b11, "ld1rd", Z_d, ZPR64, uimm6s8>; + + // LD1RQ loads (load quadword-vector and splat to scalable vector) + defm LD1RQ_B_IMM : sve_mem_ldqr_si<0b00, "ld1rqb", Z_b, ZPR8>; + defm LD1RQ_H_IMM : sve_mem_ldqr_si<0b01, "ld1rqh", Z_h, ZPR16>; + defm LD1RQ_W_IMM : sve_mem_ldqr_si<0b10, "ld1rqw", Z_s, ZPR32>; + defm LD1RQ_D_IMM : sve_mem_ldqr_si<0b11, "ld1rqd", Z_d, ZPR64>; + defm LD1RQ_B : sve_mem_ldqr_ss<0b00, "ld1rqb", Z_b, ZPR8, GPR64NoXZRshifted8>; + defm LD1RQ_H : sve_mem_ldqr_ss<0b01, "ld1rqh", Z_h, ZPR16, GPR64NoXZRshifted16>; + defm LD1RQ_W : sve_mem_ldqr_ss<0b10, "ld1rqw", Z_s, ZPR32, GPR64NoXZRshifted32>; + defm LD1RQ_D : sve_mem_ldqr_ss<0b11, "ld1rqd", Z_d, ZPR64, GPR64NoXZRshifted64>; + + // continuous load with reg+reg addressing. + defm LD1B : sve_mem_cld_ss<0b0000, "ld1b", Z_b, ZPR8, GPR64NoXZRshifted8>; + defm LD1B_H : sve_mem_cld_ss<0b0001, "ld1b", Z_h, ZPR16, GPR64NoXZRshifted8>; + defm LD1B_S : sve_mem_cld_ss<0b0010, "ld1b", Z_s, ZPR32, GPR64NoXZRshifted8>; + defm LD1B_D : sve_mem_cld_ss<0b0011, "ld1b", Z_d, ZPR64, GPR64NoXZRshifted8>; + defm LD1SW_D : sve_mem_cld_ss<0b0100, "ld1sw", Z_d, ZPR64, GPR64NoXZRshifted32>; + defm LD1H : sve_mem_cld_ss<0b0101, "ld1h", Z_h, ZPR16, GPR64NoXZRshifted16>; + defm LD1H_S : sve_mem_cld_ss<0b0110, "ld1h", Z_s, ZPR32, GPR64NoXZRshifted16>; + defm LD1H_D : sve_mem_cld_ss<0b0111, "ld1h", Z_d, ZPR64, GPR64NoXZRshifted16>; + defm LD1SH_D : sve_mem_cld_ss<0b1000, "ld1sh", Z_d, ZPR64, GPR64NoXZRshifted16>; + defm LD1SH_S : sve_mem_cld_ss<0b1001, "ld1sh", Z_s, ZPR32, GPR64NoXZRshifted16>; + defm LD1W : sve_mem_cld_ss<0b1010, "ld1w", Z_s, ZPR32, GPR64NoXZRshifted32>; + defm LD1W_D : sve_mem_cld_ss<0b1011, "ld1w", Z_d, ZPR64, GPR64NoXZRshifted32>; + defm LD1SB_D : sve_mem_cld_ss<0b1100, "ld1sb", Z_d, ZPR64, GPR64NoXZRshifted8>; + defm LD1SB_S : sve_mem_cld_ss<0b1101, "ld1sb", Z_s, ZPR32, GPR64NoXZRshifted8>; + defm LD1SB_H : sve_mem_cld_ss<0b1110, "ld1sb", Z_h, ZPR16, GPR64NoXZRshifted8>; + defm LD1D : sve_mem_cld_ss<0b1111, "ld1d", Z_d, ZPR64, GPR64NoXZRshifted64>; + + // non-faulting continuous load with reg+immediate + defm LDNF1B_IMM : sve_mem_cldnf_si<0b0000, "ldnf1b", Z_b, ZPR8>; + defm LDNF1B_H_IMM : sve_mem_cldnf_si<0b0001, "ldnf1b", Z_h, ZPR16>; + defm LDNF1B_S_IMM : sve_mem_cldnf_si<0b0010, "ldnf1b", Z_s, ZPR32>; + defm LDNF1B_D_IMM : sve_mem_cldnf_si<0b0011, "ldnf1b", Z_d, ZPR64>; + defm LDNF1SW_D_IMM : sve_mem_cldnf_si<0b0100, "ldnf1sw", Z_d, ZPR64>; + defm LDNF1H_IMM : sve_mem_cldnf_si<0b0101, "ldnf1h", Z_h, ZPR16>; + defm LDNF1H_S_IMM : sve_mem_cldnf_si<0b0110, "ldnf1h", Z_s, ZPR32>; + defm LDNF1H_D_IMM : sve_mem_cldnf_si<0b0111, "ldnf1h", Z_d, ZPR64>; + defm LDNF1SH_D_IMM : sve_mem_cldnf_si<0b1000, "ldnf1sh", Z_d, ZPR64>; + defm LDNF1SH_S_IMM : sve_mem_cldnf_si<0b1001, "ldnf1sh", Z_s, ZPR32>; + defm LDNF1W_IMM : sve_mem_cldnf_si<0b1010, "ldnf1w", Z_s, ZPR32>; + defm LDNF1W_D_IMM : sve_mem_cldnf_si<0b1011, "ldnf1w", Z_d, ZPR64>; + defm LDNF1SB_D_IMM : sve_mem_cldnf_si<0b1100, "ldnf1sb", Z_d, ZPR64>; + defm LDNF1SB_S_IMM : sve_mem_cldnf_si<0b1101, "ldnf1sb", Z_s, ZPR32>; + defm LDNF1SB_H_IMM : sve_mem_cldnf_si<0b1110, "ldnf1sb", Z_h, ZPR16>; + defm LDNF1D_IMM : sve_mem_cldnf_si<0b1111, "ldnf1d", Z_d, ZPR64>; + + // First-faulting loads with reg+reg addressing. + defm LDFF1B : sve_mem_cldff_ss<0b0000, "ldff1b", Z_b, ZPR8, GPR64shifted8>; + defm LDFF1B_H : sve_mem_cldff_ss<0b0001, "ldff1b", Z_h, ZPR16, GPR64shifted8>; + defm LDFF1B_S : sve_mem_cldff_ss<0b0010, "ldff1b", Z_s, ZPR32, GPR64shifted8>; + defm LDFF1B_D : sve_mem_cldff_ss<0b0011, "ldff1b", Z_d, ZPR64, GPR64shifted8>; + defm LDFF1SW_D : sve_mem_cldff_ss<0b0100, "ldff1sw", Z_d, ZPR64, GPR64shifted32>; + defm LDFF1H : sve_mem_cldff_ss<0b0101, "ldff1h", Z_h, ZPR16, GPR64shifted16>; + defm LDFF1H_S : sve_mem_cldff_ss<0b0110, "ldff1h", Z_s, ZPR32, GPR64shifted16>; + defm LDFF1H_D : sve_mem_cldff_ss<0b0111, "ldff1h", Z_d, ZPR64, GPR64shifted16>; + defm LDFF1SH_D : sve_mem_cldff_ss<0b1000, "ldff1sh", Z_d, ZPR64, GPR64shifted16>; + defm LDFF1SH_S : sve_mem_cldff_ss<0b1001, "ldff1sh", Z_s, ZPR32, GPR64shifted16>; + defm LDFF1W : sve_mem_cldff_ss<0b1010, "ldff1w", Z_s, ZPR32, GPR64shifted32>; + defm LDFF1W_D : sve_mem_cldff_ss<0b1011, "ldff1w", Z_d, ZPR64, GPR64shifted32>; + defm LDFF1SB_D : sve_mem_cldff_ss<0b1100, "ldff1sb", Z_d, ZPR64, GPR64shifted8>; + defm LDFF1SB_S : sve_mem_cldff_ss<0b1101, "ldff1sb", Z_s, ZPR32, GPR64shifted8>; + defm LDFF1SB_H : sve_mem_cldff_ss<0b1110, "ldff1sb", Z_h, ZPR16, GPR64shifted8>; + defm LDFF1D : sve_mem_cldff_ss<0b1111, "ldff1d", Z_d, ZPR64, GPR64shifted64>; + + // LD(2|3|4) structured loads with reg+immediate + defm LD2B_IMM : sve_mem_eld_si<0b00, 0b01, ZZ_b, "ld2b", simm4s2>; + defm LD3B_IMM : sve_mem_eld_si<0b00, 0b10, ZZZ_b, "ld3b", simm4s3>; + defm LD4B_IMM : sve_mem_eld_si<0b00, 0b11, ZZZZ_b, "ld4b", simm4s4>; + defm LD2H_IMM : sve_mem_eld_si<0b01, 0b01, ZZ_h, "ld2h", simm4s2>; + defm LD3H_IMM : sve_mem_eld_si<0b01, 0b10, ZZZ_h, "ld3h", simm4s3>; + defm LD4H_IMM : sve_mem_eld_si<0b01, 0b11, ZZZZ_h, "ld4h", simm4s4>; + defm LD2W_IMM : sve_mem_eld_si<0b10, 0b01, ZZ_s, "ld2w", simm4s2>; + defm LD3W_IMM : sve_mem_eld_si<0b10, 0b10, ZZZ_s, "ld3w", simm4s3>; + defm LD4W_IMM : sve_mem_eld_si<0b10, 0b11, ZZZZ_s, "ld4w", simm4s4>; + defm LD2D_IMM : sve_mem_eld_si<0b11, 0b01, ZZ_d, "ld2d", simm4s2>; + defm LD3D_IMM : sve_mem_eld_si<0b11, 0b10, ZZZ_d, "ld3d", simm4s3>; + defm LD4D_IMM : sve_mem_eld_si<0b11, 0b11, ZZZZ_d, "ld4d", simm4s4>; + + // LD(2|3|4) structured loads (register + register) + def LD2B : sve_mem_eld_ss<0b00, 0b01, ZZ_b, "ld2b", GPR64NoXZRshifted8>; + def LD3B : sve_mem_eld_ss<0b00, 0b10, ZZZ_b, "ld3b", GPR64NoXZRshifted8>; + def LD4B : sve_mem_eld_ss<0b00, 0b11, ZZZZ_b, "ld4b", GPR64NoXZRshifted8>; + def LD2H : sve_mem_eld_ss<0b01, 0b01, ZZ_h, "ld2h", GPR64NoXZRshifted16>; + def LD3H : sve_mem_eld_ss<0b01, 0b10, ZZZ_h, "ld3h", GPR64NoXZRshifted16>; + def LD4H : sve_mem_eld_ss<0b01, 0b11, ZZZZ_h, "ld4h", GPR64NoXZRshifted16>; + def LD2W : sve_mem_eld_ss<0b10, 0b01, ZZ_s, "ld2w", GPR64NoXZRshifted32>; + def LD3W : sve_mem_eld_ss<0b10, 0b10, ZZZ_s, "ld3w", GPR64NoXZRshifted32>; + def LD4W : sve_mem_eld_ss<0b10, 0b11, ZZZZ_s, "ld4w", GPR64NoXZRshifted32>; + def LD2D : sve_mem_eld_ss<0b11, 0b01, ZZ_d, "ld2d", GPR64NoXZRshifted64>; + def LD3D : sve_mem_eld_ss<0b11, 0b10, ZZZ_d, "ld3d", GPR64NoXZRshifted64>; + def LD4D : sve_mem_eld_ss<0b11, 0b11, ZZZZ_d, "ld4d", GPR64NoXZRshifted64>; + + // Gathers using unscaled 32-bit offsets, e.g. + // ld1h z0.s, p0/z, [x0, z0.s, uxtw] + defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; + defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; + defm GLD1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0010, "ld1b", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; + defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; + defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; + defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; + defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; + defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; + defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; + defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; + + // Gathers using scaled 32-bit offsets, e.g. + // ld1h z0.s, p0/z, [x0, z0.s, uxtw #1] + defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>; + defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>; + defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", ZPR32ExtSXTW16, ZPR32ExtUXTW16>; + defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", ZPR32ExtSXTW16, ZPR32ExtUXTW16>; + defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", ZPR32ExtSXTW32, ZPR32ExtUXTW32>; + defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", ZPR32ExtSXTW32, ZPR32ExtUXTW32>; + + // Gathers using scaled 32-bit pointers with offset, e.g. + // ld1h z0.s, p0/z, [z0.s, #16] + defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31>; + defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31>; + defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31>; + defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31>; + defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2>; + defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2>; + defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2>; + defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2>; + defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4>; + defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4>; + + // Gathers using scaled 64-bit pointers with offset, e.g. + // ld1h z0.d, p0/z, [z0.d, #16] + defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31>; + defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31>; + defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31>; + defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31>; + defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2>; + defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2>; + defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2>; + defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2>; + defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4>; + defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4>; + defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4>; + defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4>; + defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8>; + defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8>; + + // Gathers using unscaled 64-bit offsets, e.g. + // ld1h z0.d, p0/z, [x0, z0.d] + defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb">; + defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb">; + defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b">; + defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b">; + defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh">; + defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh">; + defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h">; + defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h">; + defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw">; + defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw">; + defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w">; + defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w">; + defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d">; + defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d">; + + // Gathers using scaled 64-bit offsets, e.g. + // ld1h z0.d, p0/z, [x0, z0.d, lsl #1] + defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", ZPR64ExtLSL16>; + defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", ZPR64ExtLSL16>; + defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", ZPR64ExtLSL16>; + defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", ZPR64ExtLSL16>; + defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", ZPR64ExtLSL32>; + defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", ZPR64ExtLSL32>; + defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", ZPR64ExtLSL32>; + defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", ZPR64ExtLSL32>; + defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", ZPR64ExtLSL64>; + defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", ZPR64ExtLSL64>; + + // Gathers using unscaled 32-bit offsets unpacked in 64-bits elements, e.g. + // ld1h z0.d, p0/z, [x0, z0.d, uxtw] + defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; + defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; + defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; + defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; + defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLD1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0110, "ld1h", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLD1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1000, "ld1sw", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLD1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1010, "ld1w", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLD1D : sve_mem_64b_gld_vs_32_unscaled<0b1110, "ld1d", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + + // Gathers using scaled 32-bit offsets unpacked in 64-bits elements, e.g. + // ld1h z0.d, p0/z, [x0, z0.d, uxtw #1] + defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", ZPR64ExtSXTW16, ZPR64ExtUXTW16>; + defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh",ZPR64ExtSXTW16, ZPR64ExtUXTW16>; + defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", ZPR64ExtSXTW16, ZPR64ExtUXTW16>; + defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", ZPR64ExtSXTW16, ZPR64ExtUXTW16>; + defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", ZPR64ExtSXTW32, ZPR64ExtUXTW32>; + defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw",ZPR64ExtSXTW32, ZPR64ExtUXTW32>; + defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", ZPR64ExtSXTW32, ZPR64ExtUXTW32>; + defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", ZPR64ExtSXTW32, ZPR64ExtUXTW32>; + defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", ZPR64ExtSXTW64, ZPR64ExtUXTW64>; + defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", ZPR64ExtSXTW64, ZPR64ExtUXTW64>; + + // Non-temporal contiguous loads (register + immediate) + defm LDNT1B_ZRI : sve_mem_cldnt_si<0b00, "ldnt1b", Z_b, ZPR8>; + defm LDNT1H_ZRI : sve_mem_cldnt_si<0b01, "ldnt1h", Z_h, ZPR16>; + defm LDNT1W_ZRI : sve_mem_cldnt_si<0b10, "ldnt1w", Z_s, ZPR32>; + defm LDNT1D_ZRI : sve_mem_cldnt_si<0b11, "ldnt1d", Z_d, ZPR64>; + + // Non-temporal contiguous loads (register + register) + defm LDNT1B_ZRR : sve_mem_cldnt_ss<0b00, "ldnt1b", Z_b, ZPR8, GPR64NoXZRshifted8>; + defm LDNT1H_ZRR : sve_mem_cldnt_ss<0b01, "ldnt1h", Z_h, ZPR16, GPR64NoXZRshifted16>; + defm LDNT1W_ZRR : sve_mem_cldnt_ss<0b10, "ldnt1w", Z_s, ZPR32, GPR64NoXZRshifted32>; + defm LDNT1D_ZRR : sve_mem_cldnt_ss<0b11, "ldnt1d", Z_d, ZPR64, GPR64NoXZRshifted64>; + + // contiguous store with immediates + defm ST1B_IMM : sve_mem_cst_si<0b00, 0b00, "st1b", Z_b, ZPR8>; + defm ST1B_H_IMM : sve_mem_cst_si<0b00, 0b01, "st1b", Z_h, ZPR16>; + defm ST1B_S_IMM : sve_mem_cst_si<0b00, 0b10, "st1b", Z_s, ZPR32>; + defm ST1B_D_IMM : sve_mem_cst_si<0b00, 0b11, "st1b", Z_d, ZPR64>; + defm ST1H_IMM : sve_mem_cst_si<0b01, 0b01, "st1h", Z_h, ZPR16>; + defm ST1H_S_IMM : sve_mem_cst_si<0b01, 0b10, "st1h", Z_s, ZPR32>; + defm ST1H_D_IMM : sve_mem_cst_si<0b01, 0b11, "st1h", Z_d, ZPR64>; + defm ST1W_IMM : sve_mem_cst_si<0b10, 0b10, "st1w", Z_s, ZPR32>; + defm ST1W_D_IMM : sve_mem_cst_si<0b10, 0b11, "st1w", Z_d, ZPR64>; + defm ST1D_IMM : sve_mem_cst_si<0b11, 0b11, "st1d", Z_d, ZPR64>; + + // contiguous store with reg+reg addressing. + defm ST1B : sve_mem_cst_ss<0b0000, "st1b", Z_b, ZPR8, GPR64NoXZRshifted8>; + defm ST1B_H : sve_mem_cst_ss<0b0001, "st1b", Z_h, ZPR16, GPR64NoXZRshifted8>; + defm ST1B_S : sve_mem_cst_ss<0b0010, "st1b", Z_s, ZPR32, GPR64NoXZRshifted8>; + defm ST1B_D : sve_mem_cst_ss<0b0011, "st1b", Z_d, ZPR64, GPR64NoXZRshifted8>; + defm ST1H : sve_mem_cst_ss<0b0101, "st1h", Z_h, ZPR16, GPR64NoXZRshifted16>; + defm ST1H_S : sve_mem_cst_ss<0b0110, "st1h", Z_s, ZPR32, GPR64NoXZRshifted16>; + defm ST1H_D : sve_mem_cst_ss<0b0111, "st1h", Z_d, ZPR64, GPR64NoXZRshifted16>; + defm ST1W : sve_mem_cst_ss<0b1010, "st1w", Z_s, ZPR32, GPR64NoXZRshifted32>; + defm ST1W_D : sve_mem_cst_ss<0b1011, "st1w", Z_d, ZPR64, GPR64NoXZRshifted32>; + defm ST1D : sve_mem_cst_ss<0b1111, "st1d", Z_d, ZPR64, GPR64NoXZRshifted64>; + + // Scatters using unscaled 32-bit offsets, e.g. + // st1h z0.s, p0, [x0, z0.s, uxtw] + // and unpacked: + // st1h z0.d, p0, [x0, z0.d, uxtw] + defm SST1B_D : sve_mem_sst_sv_32_unscaled<0b000, "st1b", Z_d, ZPR64, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; + defm SST1B_S : sve_mem_sst_sv_32_unscaled<0b001, "st1b", Z_s, ZPR32, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; + defm SST1H_D : sve_mem_sst_sv_32_unscaled<0b010, "st1h", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm SST1H_S : sve_mem_sst_sv_32_unscaled<0b011, "st1h", Z_s, ZPR32, ZPR32ExtSXTW8, ZPR32ExtUXTW8>; + defm SST1W_D : sve_mem_sst_sv_32_unscaled<0b100, "st1w", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm SST1W : sve_mem_sst_sv_32_unscaled<0b101, "st1w", Z_s, ZPR32, ZPR32ExtSXTW8, ZPR32ExtUXTW8>; + defm SST1D : sve_mem_sst_sv_32_unscaled<0b110, "st1d", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + + // Scatters using scaled 32-bit offsets, e.g. + // st1h z0.s, p0, [x0, z0.s, uxtw #1] + // and unpacked: + // st1h z0.d, p0, [x0, z0.d, uxtw #1] + defm SST1H_D : sve_mem_sst_sv_32_scaled<0b010, "st1h", Z_d, ZPR64, ZPR64ExtSXTW16, ZPR64ExtUXTW16>; + defm SST1H_S : sve_mem_sst_sv_32_scaled<0b011, "st1h", Z_s, ZPR32, ZPR32ExtSXTW16, ZPR32ExtUXTW16>; + defm SST1W_D : sve_mem_sst_sv_32_scaled<0b100, "st1w", Z_d, ZPR64, ZPR64ExtSXTW32, ZPR64ExtUXTW32>; + defm SST1W : sve_mem_sst_sv_32_scaled<0b101, "st1w", Z_s, ZPR32, ZPR32ExtSXTW32, ZPR32ExtUXTW32>; + defm SST1D : sve_mem_sst_sv_32_scaled<0b110, "st1d", Z_d, ZPR64, ZPR64ExtSXTW64, ZPR64ExtUXTW64>; + + // Scatters using 32/64-bit pointers with offset, e.g. + // st1h z0.s, p0, [z0.s, #16] + // st1h z0.d, p0, [z0.d, #16] + defm SST1B_D : sve_mem_sst_vi_ptrs<0b000, "st1b", Z_d, ZPR64, imm0_31>; + defm SST1B_S : sve_mem_sst_vi_ptrs<0b001, "st1b", Z_s, ZPR32, imm0_31>; + defm SST1H_D : sve_mem_sst_vi_ptrs<0b010, "st1h", Z_d, ZPR64, uimm5s2>; + defm SST1H_S : sve_mem_sst_vi_ptrs<0b011, "st1h", Z_s, ZPR32, uimm5s2>; + defm SST1W_D : sve_mem_sst_vi_ptrs<0b100, "st1w", Z_d, ZPR64, uimm5s4>; + defm SST1W : sve_mem_sst_vi_ptrs<0b101, "st1w", Z_s, ZPR32, uimm5s4>; + defm SST1D : sve_mem_sst_vi_ptrs<0b110, "st1d", Z_d, ZPR64, uimm5s8>; + + // Scatters using unscaled 64-bit offsets, e.g. + // st1h z0.d, p0, [x0, z0.d] + defm SST1B_D : sve_mem_sst_sv_64_unscaled<0b00, "st1b">; + defm SST1H_D : sve_mem_sst_sv_64_unscaled<0b01, "st1h">; + defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w">; + defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d">; + + // Scatters using scaled 64-bit offsets, e.g. + // st1h z0.d, p0, [x0, z0.d, lsl #1] + defm SST1H_D_SCALED : sve_mem_sst_sv_64_scaled<0b01, "st1h", ZPR64ExtLSL16>; + defm SST1W_D_SCALED : sve_mem_sst_sv_64_scaled<0b10, "st1w", ZPR64ExtLSL32>; + defm SST1D_SCALED : sve_mem_sst_sv_64_scaled<0b11, "st1d", ZPR64ExtLSL64>; + + // ST(2|3|4) structured stores (register + immediate) + defm ST2B_IMM : sve_mem_est_si<0b00, 0b01, ZZ_b, "st2b", simm4s2>; + defm ST3B_IMM : sve_mem_est_si<0b00, 0b10, ZZZ_b, "st3b", simm4s3>; + defm ST4B_IMM : sve_mem_est_si<0b00, 0b11, ZZZZ_b, "st4b", simm4s4>; + defm ST2H_IMM : sve_mem_est_si<0b01, 0b01, ZZ_h, "st2h", simm4s2>; + defm ST3H_IMM : sve_mem_est_si<0b01, 0b10, ZZZ_h, "st3h", simm4s3>; + defm ST4H_IMM : sve_mem_est_si<0b01, 0b11, ZZZZ_h, "st4h", simm4s4>; + defm ST2W_IMM : sve_mem_est_si<0b10, 0b01, ZZ_s, "st2w", simm4s2>; + defm ST3W_IMM : sve_mem_est_si<0b10, 0b10, ZZZ_s, "st3w", simm4s3>; + defm ST4W_IMM : sve_mem_est_si<0b10, 0b11, ZZZZ_s, "st4w", simm4s4>; + defm ST2D_IMM : sve_mem_est_si<0b11, 0b01, ZZ_d, "st2d", simm4s2>; + defm ST3D_IMM : sve_mem_est_si<0b11, 0b10, ZZZ_d, "st3d", simm4s3>; + defm ST4D_IMM : sve_mem_est_si<0b11, 0b11, ZZZZ_d, "st4d", simm4s4>; + + // ST(2|3|4) structured stores (register + register) + def ST2B : sve_mem_est_ss<0b00, 0b01, ZZ_b, "st2b", GPR64NoXZRshifted8>; + def ST3B : sve_mem_est_ss<0b00, 0b10, ZZZ_b, "st3b", GPR64NoXZRshifted8>; + def ST4B : sve_mem_est_ss<0b00, 0b11, ZZZZ_b, "st4b", GPR64NoXZRshifted8>; + def ST2H : sve_mem_est_ss<0b01, 0b01, ZZ_h, "st2h", GPR64NoXZRshifted16>; + def ST3H : sve_mem_est_ss<0b01, 0b10, ZZZ_h, "st3h", GPR64NoXZRshifted16>; + def ST4H : sve_mem_est_ss<0b01, 0b11, ZZZZ_h, "st4h", GPR64NoXZRshifted16>; + def ST2W : sve_mem_est_ss<0b10, 0b01, ZZ_s, "st2w", GPR64NoXZRshifted32>; + def ST3W : sve_mem_est_ss<0b10, 0b10, ZZZ_s, "st3w", GPR64NoXZRshifted32>; + def ST4W : sve_mem_est_ss<0b10, 0b11, ZZZZ_s, "st4w", GPR64NoXZRshifted32>; + def ST2D : sve_mem_est_ss<0b11, 0b01, ZZ_d, "st2d", GPR64NoXZRshifted64>; + def ST3D : sve_mem_est_ss<0b11, 0b10, ZZZ_d, "st3d", GPR64NoXZRshifted64>; + def ST4D : sve_mem_est_ss<0b11, 0b11, ZZZZ_d, "st4d", GPR64NoXZRshifted64>; + + // Non-temporal contiguous stores (register + immediate) + defm STNT1B_ZRI : sve_mem_cstnt_si<0b00, "stnt1b", Z_b, ZPR8>; + defm STNT1H_ZRI : sve_mem_cstnt_si<0b01, "stnt1h", Z_h, ZPR16>; + defm STNT1W_ZRI : sve_mem_cstnt_si<0b10, "stnt1w", Z_s, ZPR32>; + defm STNT1D_ZRI : sve_mem_cstnt_si<0b11, "stnt1d", Z_d, ZPR64>; + + // Non-temporal contiguous stores (register + register) + defm STNT1B_ZRR : sve_mem_cstnt_ss<0b00, "stnt1b", Z_b, ZPR8, GPR64NoXZRshifted8>; + defm STNT1H_ZRR : sve_mem_cstnt_ss<0b01, "stnt1h", Z_h, ZPR16, GPR64NoXZRshifted16>; + defm STNT1W_ZRR : sve_mem_cstnt_ss<0b10, "stnt1w", Z_s, ZPR32, GPR64NoXZRshifted32>; + defm STNT1D_ZRR : sve_mem_cstnt_ss<0b11, "stnt1d", Z_d, ZPR64, GPR64NoXZRshifted64>; + + // Fill/Spill + defm LDR_ZXI : sve_mem_z_fill<"ldr">; + defm LDR_PXI : sve_mem_p_fill<"ldr">; + defm STR_ZXI : sve_mem_z_spill<"str">; + defm STR_PXI : sve_mem_p_spill<"str">; + + // Contiguous prefetch (register + immediate) + defm PRFB_PRI : sve_mem_prfm_si<0b00, "prfb">; + defm PRFH_PRI : sve_mem_prfm_si<0b01, "prfh">; + defm PRFW_PRI : sve_mem_prfm_si<0b10, "prfw">; + defm PRFD_PRI : sve_mem_prfm_si<0b11, "prfd">; + + // Contiguous prefetch (register + register) + def PRFB_PRR : sve_mem_prfm_ss<0b001, "prfb", GPR64NoXZRshifted8>; + def PRFH_PRR : sve_mem_prfm_ss<0b011, "prfh", GPR64NoXZRshifted16>; + def PRFS_PRR : sve_mem_prfm_ss<0b101, "prfw", GPR64NoXZRshifted32>; + def PRFD_PRR : sve_mem_prfm_ss<0b111, "prfd", GPR64NoXZRshifted64>; + + // Gather prefetch using scaled 32-bit offsets, e.g. + // prfh pldl1keep, p0, [x0, z0.s, uxtw #1] + defm PRFB_S : sve_mem_32b_prfm_sv_scaled<0b00, "prfb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; + defm PRFH_S : sve_mem_32b_prfm_sv_scaled<0b01, "prfh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>; + defm PRFW_S : sve_mem_32b_prfm_sv_scaled<0b10, "prfw", ZPR32ExtSXTW32, ZPR32ExtUXTW32>; + defm PRFD_S : sve_mem_32b_prfm_sv_scaled<0b11, "prfd", ZPR32ExtSXTW64, ZPR32ExtUXTW64>; + + // Gather prefetch using unpacked, scaled 32-bit offsets, e.g. + // prfh pldl1keep, p0, [x0, z0.d, uxtw #1] + defm PRFB_D : sve_mem_64b_prfm_sv_ext_scaled<0b00, "prfb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; + defm PRFH_D : sve_mem_64b_prfm_sv_ext_scaled<0b01, "prfh", ZPR64ExtSXTW16, ZPR64ExtUXTW16>; + defm PRFW_D : sve_mem_64b_prfm_sv_ext_scaled<0b10, "prfw", ZPR64ExtSXTW32, ZPR64ExtUXTW32>; + defm PRFD_D : sve_mem_64b_prfm_sv_ext_scaled<0b11, "prfd", ZPR64ExtSXTW64, ZPR64ExtUXTW64>; + + // Gather prefetch using scaled 64-bit offsets, e.g. + // prfh pldl1keep, p0, [x0, z0.d, lsl #1] + defm PRFB_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b00, "prfb", ZPR64ExtLSL8>; + defm PRFH_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b01, "prfh", ZPR64ExtLSL16>; + defm PRFW_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b10, "prfw", ZPR64ExtLSL32>; + defm PRFD_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b11, "prfd", ZPR64ExtLSL64>; + + // Gather prefetch using 32/64-bit pointers with offset, e.g. + // prfh pldl1keep, p0, [z0.s, #16] + // prfh pldl1keep, p0, [z0.d, #16] + defm PRFB_S_PZI : sve_mem_32b_prfm_vi<0b00, "prfb", imm0_31>; + defm PRFH_S_PZI : sve_mem_32b_prfm_vi<0b01, "prfh", uimm5s2>; + defm PRFW_S_PZI : sve_mem_32b_prfm_vi<0b10, "prfw", uimm5s4>; + defm PRFD_S_PZI : sve_mem_32b_prfm_vi<0b11, "prfd", uimm5s8>; + + defm PRFB_D_PZI : sve_mem_64b_prfm_vi<0b00, "prfb", imm0_31>; + defm PRFH_D_PZI : sve_mem_64b_prfm_vi<0b01, "prfh", uimm5s2>; + defm PRFW_D_PZI : sve_mem_64b_prfm_vi<0b10, "prfw", uimm5s4>; + defm PRFD_D_PZI : sve_mem_64b_prfm_vi<0b11, "prfd", uimm5s8>; + + defm ADR_SXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_sxtw<0b00, "adr">; + defm ADR_UXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_uxtw<0b01, "adr">; + defm ADR_LSL_ZZZ_S : sve_int_bin_cons_misc_0_a_32_lsl<0b10, "adr">; + defm ADR_LSL_ZZZ_D : sve_int_bin_cons_misc_0_a_64_lsl<0b11, "adr">; + + defm TBL_ZZZ : sve_int_perm_tbl<"tbl">; + + defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1">; + defm ZIP2_ZZZ : sve_int_perm_bin_perm_zz<0b001, "zip2">; + defm UZP1_ZZZ : sve_int_perm_bin_perm_zz<0b010, "uzp1">; + defm UZP2_ZZZ : sve_int_perm_bin_perm_zz<0b011, "uzp2">; + defm TRN1_ZZZ : sve_int_perm_bin_perm_zz<0b100, "trn1">; + defm TRN2_ZZZ : sve_int_perm_bin_perm_zz<0b101, "trn2">; + + defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1">; + defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2">; + defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1">; + defm UZP2_PPP : sve_int_perm_bin_perm_pp<0b011, "uzp2">; + defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1">; + defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2">; + + defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs">; + defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi">; + defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge">; + defm CMPGT_PPzZZ : sve_int_cmp_0<0b101, "cmpgt">; + defm CMPEQ_PPzZZ : sve_int_cmp_0<0b110, "cmpeq">; + defm CMPNE_PPzZZ : sve_int_cmp_0<0b111, "cmpne">; + + defm CMPEQ_WIDE_PPzZZ : sve_int_cmp_0_wide<0b010, "cmpeq">; + defm CMPNE_WIDE_PPzZZ : sve_int_cmp_0_wide<0b011, "cmpne">; + defm CMPGE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b000, "cmpge">; + defm CMPGT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b001, "cmpgt">; + defm CMPLT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b010, "cmplt">; + defm CMPLE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b011, "cmple">; + defm CMPHS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b100, "cmphs">; + defm CMPHI_WIDE_PPzZZ : sve_int_cmp_1_wide<0b101, "cmphi">; + defm CMPLO_WIDE_PPzZZ : sve_int_cmp_1_wide<0b110, "cmplo">; + defm CMPLS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b111, "cmpls">; + + defm CMPGE_PPzZI : sve_int_scmp_vi<0b000, "cmpge">; + defm CMPGT_PPzZI : sve_int_scmp_vi<0b001, "cmpgt">; + defm CMPLT_PPzZI : sve_int_scmp_vi<0b010, "cmplt">; + defm CMPLE_PPzZI : sve_int_scmp_vi<0b011, "cmple">; + defm CMPEQ_PPzZI : sve_int_scmp_vi<0b100, "cmpeq">; + defm CMPNE_PPzZI : sve_int_scmp_vi<0b101, "cmpne">; + defm CMPHS_PPzZI : sve_int_ucmp_vi<0b00, "cmphs">; + defm CMPHI_PPzZI : sve_int_ucmp_vi<0b01, "cmphi">; + defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo">; + defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls">; + + defm FCMGE_PPzZZ : sve_fp_3op_p_pd<0b000, "fcmge">; + defm FCMGT_PPzZZ : sve_fp_3op_p_pd<0b001, "fcmgt">; + defm FCMEQ_PPzZZ : sve_fp_3op_p_pd<0b010, "fcmeq">; + defm FCMNE_PPzZZ : sve_fp_3op_p_pd<0b011, "fcmne">; + defm FCMUO_PPzZZ : sve_fp_3op_p_pd<0b100, "fcmuo">; + defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge">; + defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt">; + + defm FCMGE_PPzZ0 : sve_fp_2op_p_pd<0b000, "fcmge">; + defm FCMGT_PPzZ0 : sve_fp_2op_p_pd<0b001, "fcmgt">; + defm FCMLT_PPzZ0 : sve_fp_2op_p_pd<0b010, "fcmlt">; + defm FCMLE_PPzZ0 : sve_fp_2op_p_pd<0b011, "fcmle">; + defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq">; + defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne">; + + defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt">; + defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele">; + defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo">; + defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels">; + + defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt">; + defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele">; + defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo">; + defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels">; + + def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>; + def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>; + def CTERMEQ_XX : sve_int_cterm<0b1, 0b0, "ctermeq", GPR64>; + def CTERMNE_XX : sve_int_cterm<0b1, 0b1, "ctermne", GPR64>; + + def RDVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdvl">; + def ADDVL_XXI : sve_int_arith_vl<0b0, "addvl">; + def ADDPL_XXI : sve_int_arith_vl<0b1, "addpl">; + + defm CNTB_XPiI : sve_int_count<0b000, "cntb">; + defm CNTH_XPiI : sve_int_count<0b010, "cnth">; + defm CNTW_XPiI : sve_int_count<0b100, "cntw">; + defm CNTD_XPiI : sve_int_count<0b110, "cntd">; + defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp">; + + defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb">; + defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb">; + defm INCH_XPiI : sve_int_pred_pattern_a<0b010, "inch">; + defm DECH_XPiI : sve_int_pred_pattern_a<0b011, "dech">; + defm INCW_XPiI : sve_int_pred_pattern_a<0b100, "incw">; + defm DECW_XPiI : sve_int_pred_pattern_a<0b101, "decw">; + defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd">; + defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd">; + + defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb">; + defm UQINCB_WPiI : sve_int_pred_pattern_b_u32<0b00001, "uqincb">; + defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb">; + defm UQDECB_WPiI : sve_int_pred_pattern_b_u32<0b00011, "uqdecb">; + defm SQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00100, "sqincb">; + defm UQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00101, "uqincb">; + defm SQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00110, "sqdecb">; + defm UQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00111, "uqdecb">; + + defm SQINCH_XPiWdI : sve_int_pred_pattern_b_s32<0b01000, "sqinch">; + defm UQINCH_WPiI : sve_int_pred_pattern_b_u32<0b01001, "uqinch">; + defm SQDECH_XPiWdI : sve_int_pred_pattern_b_s32<0b01010, "sqdech">; + defm UQDECH_WPiI : sve_int_pred_pattern_b_u32<0b01011, "uqdech">; + defm SQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01100, "sqinch">; + defm UQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01101, "uqinch">; + defm SQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01110, "sqdech">; + defm UQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01111, "uqdech">; + + defm SQINCW_XPiWdI : sve_int_pred_pattern_b_s32<0b10000, "sqincw">; + defm UQINCW_WPiI : sve_int_pred_pattern_b_u32<0b10001, "uqincw">; + defm SQDECW_XPiWdI : sve_int_pred_pattern_b_s32<0b10010, "sqdecw">; + defm UQDECW_WPiI : sve_int_pred_pattern_b_u32<0b10011, "uqdecw">; + defm SQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10100, "sqincw">; + defm UQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10101, "uqincw">; + defm SQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10110, "sqdecw">; + defm UQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10111, "uqdecw">; + + defm SQINCD_XPiWdI : sve_int_pred_pattern_b_s32<0b11000, "sqincd">; + defm UQINCD_WPiI : sve_int_pred_pattern_b_u32<0b11001, "uqincd">; + defm SQDECD_XPiWdI : sve_int_pred_pattern_b_s32<0b11010, "sqdecd">; + defm UQDECD_WPiI : sve_int_pred_pattern_b_u32<0b11011, "uqdecd">; + defm SQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11100, "sqincd">; + defm UQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11101, "uqincd">; + defm SQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11110, "sqdecd">; + defm UQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11111, "uqdecd">; + + defm SQINCH_ZPiI : sve_int_countvlv<0b01000, "sqinch", ZPR16>; + defm UQINCH_ZPiI : sve_int_countvlv<0b01001, "uqinch", ZPR16>; + defm SQDECH_ZPiI : sve_int_countvlv<0b01010, "sqdech", ZPR16>; + defm UQDECH_ZPiI : sve_int_countvlv<0b01011, "uqdech", ZPR16>; + defm INCH_ZPiI : sve_int_countvlv<0b01100, "inch", ZPR16>; + defm DECH_ZPiI : sve_int_countvlv<0b01101, "dech", ZPR16>; + defm SQINCW_ZPiI : sve_int_countvlv<0b10000, "sqincw", ZPR32>; + defm UQINCW_ZPiI : sve_int_countvlv<0b10001, "uqincw", ZPR32>; + defm SQDECW_ZPiI : sve_int_countvlv<0b10010, "sqdecw", ZPR32>; + defm UQDECW_ZPiI : sve_int_countvlv<0b10011, "uqdecw", ZPR32>; + defm INCW_ZPiI : sve_int_countvlv<0b10100, "incw", ZPR32>; + defm DECW_ZPiI : sve_int_countvlv<0b10101, "decw", ZPR32>; + defm SQINCD_ZPiI : sve_int_countvlv<0b11000, "sqincd", ZPR64>; + defm UQINCD_ZPiI : sve_int_countvlv<0b11001, "uqincd", ZPR64>; + defm SQDECD_ZPiI : sve_int_countvlv<0b11010, "sqdecd", ZPR64>; + defm UQDECD_ZPiI : sve_int_countvlv<0b11011, "uqdecd", ZPR64>; + defm INCD_ZPiI : sve_int_countvlv<0b11100, "incd", ZPR64>; + defm DECD_ZPiI : sve_int_countvlv<0b11101, "decd", ZPR64>; + + defm SQINCP_XPWd : sve_int_count_r_s32<0b00000, "sqincp">; + defm SQINCP_XP : sve_int_count_r_x64<0b00010, "sqincp">; + defm UQINCP_WP : sve_int_count_r_u32<0b00100, "uqincp">; + defm UQINCP_XP : sve_int_count_r_x64<0b00110, "uqincp">; + defm SQDECP_XPWd : sve_int_count_r_s32<0b01000, "sqdecp">; + defm SQDECP_XP : sve_int_count_r_x64<0b01010, "sqdecp">; + defm UQDECP_WP : sve_int_count_r_u32<0b01100, "uqdecp">; + defm UQDECP_XP : sve_int_count_r_x64<0b01110, "uqdecp">; + defm INCP_XP : sve_int_count_r_x64<0b10000, "incp">; + defm DECP_XP : sve_int_count_r_x64<0b10100, "decp">; + + defm SQINCP_ZP : sve_int_count_v<0b00000, "sqincp">; + defm UQINCP_ZP : sve_int_count_v<0b00100, "uqincp">; + defm SQDECP_ZP : sve_int_count_v<0b01000, "sqdecp">; + defm UQDECP_ZP : sve_int_count_v<0b01100, "uqdecp">; + defm INCP_ZP : sve_int_count_v<0b10000, "incp">; + defm DECP_ZP : sve_int_count_v<0b10100, "decp">; + + defm INDEX_RR : sve_int_index_rr<"index">; + defm INDEX_IR : sve_int_index_ir<"index">; + defm INDEX_RI : sve_int_index_ri<"index">; + defm INDEX_II : sve_int_index_ii<"index">; + + // Unpredicated shifts + defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr">; + defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr">; + defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl">; + + defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr">; + defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr">; + defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl">; + + // Predicated shifts + defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b000, "asr">; + defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b001, "lsr">; + defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b011, "lsl">; + defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b100, "asrd">; + + defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr">; + defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr">; + defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl">; + defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr">; + defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr">; + defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr">; + + defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr">; + defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">; + defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">; + + def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, ElementSizeS>; + def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, ElementSizeS>; + def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, ElementSizeH>; + def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, ElementSizeS>; + def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, ElementSizeS>; + def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, ElementSizeH>; + def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, ElementSizeH>; + def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, ElementSizeS>; + def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, ElementSizeH>; + def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, ElementSizeS>; + def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, ElementSizeD>; + def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, ElementSizeD>; + def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, ElementSizeD>; + def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, ElementSizeD>; + def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, ElementSizeD>; + def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, ElementSizeD>; + def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, ElementSizeS>; + def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, ElementSizeD>; + def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, ElementSizeS>; + def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, ElementSizeD>; + def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, ElementSizeD>; + def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, ElementSizeD>; + def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, ElementSizeD>; + def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, ElementSizeD>; + def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, ElementSizeD>; + def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, ElementSizeD>; + def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, ElementSizeD>; + def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, ElementSizeS>; + def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, ElementSizeD>; + def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, ElementSizeS>; + def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, ElementSizeD>; + def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, ElementSizeD>; + def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, ElementSizeD>; + def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, ElementSizeD>; + + defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn">; + defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp">; + defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm">; + defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz">; + defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta">; + defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx">; + defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti">; + defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx">; + defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt">; + + // InstAliases + def : InstAlias<"mov $Zd, $Zn", + (ORR_ZZZ ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zn), 1>; + def : InstAlias<"mov $Pd, $Pg/m, $Pn", + (SEL_PPPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pd), 1>; + def : InstAlias<"mov $Pd, $Pn", + (ORR_PPzPP PPR8:$Pd, PPR8:$Pn, PPR8:$Pn, PPR8:$Pn), 1>; + def : InstAlias<"mov $Pd, $Pg/z, $Pn", + (AND_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pn), 1>; + + def : InstAlias<"movs $Pd, $Pn", + (ORRS_PPzPP PPR8:$Pd, PPR8:$Pn, PPR8:$Pn, PPR8:$Pn), 1>; + def : InstAlias<"movs $Pd, $Pg/z, $Pn", + (ANDS_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pn), 1>; + + def : InstAlias<"not $Pd, $Pg/z, $Pn", + (EOR_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPRAny:$Pg), 1>; + + def : InstAlias<"nots $Pd, $Pg/z, $Pn", + (EORS_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPRAny:$Pg), 1>; + + def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn", + (CMPGE_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>; + def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn", + (CMPGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn", + (CMPGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn", + (CMPGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + + def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn", + (CMPHI_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>; + def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn", + (CMPHI_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn", + (CMPHI_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn", + (CMPHI_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + + def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn", + (CMPHS_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>; + def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn", + (CMPHS_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn", + (CMPHS_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn", + (CMPHS_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + + def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn", + (CMPGT_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>; + def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn", + (CMPGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn", + (CMPGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn", + (CMPGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + + def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn", + (FACGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn", + (FACGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn", + (FACGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + + def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn", + (FACGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn", + (FACGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn", + (FACGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + + def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn", + (FCMGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn", + (FCMGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn", + (FCMGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + + def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn", + (FCMGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn", + (FCMGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn", + (FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; +} diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA53.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA53.td new file mode 100644 index 000000000..f253a4f3e --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA53.td @@ -0,0 +1,295 @@ +//==- AArch64SchedA53.td - Cortex-A53 Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the ARM Cortex A53 processors. +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// The following definitions describe the simpler per-operand machine model. +// This works with MachineScheduler. See MCSchedule.h for details. + +// Cortex-A53 machine model for scheduling and other instruction cost heuristics. +def CortexA53Model : SchedMachineModel { + let MicroOpBufferSize = 0; // Explicitly set to zero since A53 is in-order. + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let LoadLatency = 3; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 9; // Based on "Cortex-A53 Software Optimisation + // Specification - Instruction Timings" + // v 1.0 Spreadsheet + let CompleteModel = 1; + + list UnsupportedFeatures = [HasSVE]; +} + + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +// Modeling each pipeline as a ProcResource using the BufferSize = 0 since +// Cortex-A53 is in-order. + +def A53UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU +def A53UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC +def A53UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division +def A53UnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store +def A53UnitB : ProcResource<1> { let BufferSize = 0; } // Branch +def A53UnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU +def A53UnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mult/Div/Sqrt + + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedWrite types which both map the ProcResources and +// set the latency. + +let SchedModel = CortexA53Model in { + +// ALU - Despite having a full latency of 4, most of the ALU instructions can +// forward a cycle earlier and then two cycles earlier in the case of a +// shift-only instruction. These latencies will be incorrect when the +// result cannot be forwarded, but modeling isn't rocket surgery. +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 3; } + +// MAC +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } + +// Div +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } + +// Load +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } + +// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd +// below, choosing the median of 3 which makes the latency 6. +// May model this more carefully in the future. The remaining +// A53WriteVLD# types represent the 1-5 cycle issues explicitly. +def : WriteRes { let Latency = 6; + let ResourceCycles = [3]; } +def A53WriteVLD1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; } +def A53WriteVLD2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5; + let ResourceCycles = [2]; } +def A53WriteVLD3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6; + let ResourceCycles = [3]; } +def A53WriteVLD4 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 7; + let ResourceCycles = [4]; } +def A53WriteVLD5 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 8; + let ResourceCycles = [5]; } + +// Pre/Post Indexing - Performed as part of address generation which is already +// accounted for in the WriteST* latencies below +def : WriteRes { let Latency = 0; } + +// Store +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } + +// Vector Store - Similar to vector loads, can take 1-3 cycles to issue. +def : WriteRes { let Latency = 5; + let ResourceCycles = [2];} +def A53WriteVST1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; } +def A53WriteVST2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5; + let ResourceCycles = [2]; } +def A53WriteVST3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6; + let ResourceCycles = [3]; } + +def : WriteRes { let Unsupported = 1; } + +// Branch +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// FP ALU +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } + +// FP Mul, Div, Sqrt +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 33; + let ResourceCycles = [29]; } +def A53WriteFMAC : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 10; } +def A53WriteFDivSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 18; + let ResourceCycles = [14]; } +def A53WriteFDivDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 33; + let ResourceCycles = [29]; } +def A53WriteFSqrtSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 17; + let ResourceCycles = [13]; } +def A53WriteFSqrtDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 32; + let ResourceCycles = [28]; } + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedRead types. + +// No forwarding for these reads. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable +// operands are needed one cycle later if and only if they are to be +// shifted. Otherwise, they too are needed two cycles later. This same +// ReadAdvance applies to Extended registers as well, even though there is +// a separate SchedPredicate for them. +def : ReadAdvance; +def A53ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI, + WriteISReg, WriteIEReg,WriteIS, + WriteID32,WriteID64, + WriteIM32,WriteIM64]>; +def A53ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI, + WriteISReg, WriteIEReg,WriteIS, + WriteID32,WriteID64, + WriteIM32,WriteIM64]>; +def A53ReadISReg : SchedReadVariant<[ + SchedVar, + SchedVar]>; +def : SchedAlias; + +def A53ReadIEReg : SchedReadVariant<[ + SchedVar, + SchedVar]>; +def : SchedAlias; + +// MAC - Operands are generally needed one cycle later in the MAC pipe. +// Accumulator operands are needed two cycles later. +def : ReadAdvance; +def : ReadAdvance; + +// Div +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// Subtarget-specific InstRWs. + +//--- +// Miscellaneous +//--- +def : InstRW<[WriteI], (instrs COPY)>; + +//--- +// Vector Loads +//--- +def : InstRW<[A53WriteVLD1], (instregex "LD1i(8|16|32|64)$")>; +def : InstRW<[A53WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A53WriteVLD1], (instregex "LD2i(8|16|32|64)$")>; +def : InstRW<[A53WriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[A53WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A53WriteVLD2], (instregex "LD3i(8|16|32|64)$")>; +def : InstRW<[A53WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[A53WriteVLD3], (instregex "LD3Threev2d$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD3Threev2d_POST$")>; + +def : InstRW<[A53WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; +def : InstRW<[A53WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[A53WriteVLD4], (instregex "LD4Fourv(2d)$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; + +//--- +// Vector Stores +//--- +def : InstRW<[A53WriteVST1], (instregex "ST1i(8|16|32|64)$")>; +def : InstRW<[A53WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A53WriteVST1], (instregex "ST2i(8|16|32|64)$")>; +def : InstRW<[A53WriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A53WriteVST2], (instregex "ST3i(8|16|32|64)$")>; +def : InstRW<[A53WriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST3Threev(2d)$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; + +def : InstRW<[A53WriteVST2], (instregex "ST4i(8|16|32|64)$")>; +def : InstRW<[A53WriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST4Fourv(2d)$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; + +//--- +// Floating Point MAC, DIV, SQRT +//--- +def : InstRW<[A53WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>; +def : InstRW<[A53WriteFMAC], (instregex "^FML(A|S).*")>; +def : InstRW<[A53WriteFDivSP], (instrs FDIVSrr)>; +def : InstRW<[A53WriteFDivDP], (instrs FDIVDrr)>; +def : InstRW<[A53WriteFDivSP], (instregex "^FDIVv.*32$")>; +def : InstRW<[A53WriteFDivDP], (instregex "^FDIVv.*64$")>; +def : InstRW<[A53WriteFSqrtSP], (instregex "^.*SQRT.*32$")>; +def : InstRW<[A53WriteFSqrtDP], (instregex "^.*SQRT.*64$")>; + +} diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57.td new file mode 100644 index 000000000..ade03f23f --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57.td @@ -0,0 +1,668 @@ +//=- AArch64SchedA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for ARM Cortex-A57 to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// The Cortex-A57 is a traditional superscalar microprocessor with a +// conservative 3-wide in-order stage for decode and dispatch. Combined with the +// much wider out-of-order issue stage, this produced a need to carefully +// schedule micro-ops so that all three decoded each cycle are successfully +// issued as the reservation station(s) simply don't stay occupied for long. +// Therefore, IssueWidth is set to the narrower of the two at three, while still +// modeling the machine as out-of-order. + +def CortexA57Model : SchedMachineModel { + let IssueWidth = 3; // 3-way decode and dispatch + let MicroOpBufferSize = 128; // 128 micro-op re-order buffer + let LoadLatency = 4; // Optimistic load latency + let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch + + // Enable partial & runtime unrolling. The magic number is chosen based on + // experiments and benchmarking data. + let LoopMicroOpBufferSize = 16; + let CompleteModel = 1; + + list UnsupportedFeatures = [HasSVE]; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Cortex-A57. +// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where +// micro-ops wait for their operands and then issue out-of-order. + +def A57UnitB : ProcResource<1>; // Type B micro-ops +def A57UnitI : ProcResource<2>; // Type I micro-ops +def A57UnitM : ProcResource<1>; // Type M micro-ops +def A57UnitL : ProcResource<1>; // Type L micro-ops +def A57UnitS : ProcResource<1>; // Type S micro-ops +def A57UnitX : ProcResource<1>; // Type X micro-ops +def A57UnitW : ProcResource<1>; // Type W micro-ops +let SchedModel = CortexA57Model in { + def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops +} + +let SchedModel = CortexA57Model in { + +//===----------------------------------------------------------------------===// +// Define customized scheduler read/write types specific to the Cortex-A57. + +include "AArch64SchedA57WriteRes.td" + +//===----------------------------------------------------------------------===// +// Map the target-defined scheduler read/write resources and latency for +// Cortex-A57. The Cortex-A57 types are directly associated with resources, so +// defining the aliases precludes the need for mapping them using WriteRes. The +// aliases are sufficient for creating a coarse, working model. As the model +// evolves, InstRWs will be used to override some of these SchedAliases. +// +// WARNING: Using SchedAliases is convenient and works well for latency and +// resource lookup for instructions. However, this creates an entry in +// AArch64WriteLatencyTable with a WriteResourceID of 0, breaking +// any SchedReadAdvance since the lookup will fail. + +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 5; } +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; + +def : WriteRes { let Unsupported = 1; } + +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } + +def : WriteRes { let Latency = 4; } + +// Forwarding logic is only modeled for multiply and accumulate +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + + +//===----------------------------------------------------------------------===// +// Specialize the coarse model by associating instruction groups with the +// subtarget-defined types. As the modeled is refined, this will override most +// of the above ShchedAlias mappings. + +// Miscellaneous +// ----------------------------------------------------------------------------- + +def : InstRW<[WriteI], (instrs COPY)>; + + +// Branch Instructions +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_1cyc_1B_1I], (instrs BL)>; +def : InstRW<[A57Write_2cyc_1B_1I], (instrs BLR)>; + + +// Shifted Register with Shift == 0 +// ---------------------------------------------------------------------------- + +def A57WriteISReg : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def : InstRW<[A57WriteISReg], (instregex ".*rs$")>; + + +// Divide and Multiply Instructions +// ----------------------------------------------------------------------------- + +// Multiply high +def : InstRW<[A57Write_6cyc_1M], (instrs SMULHrr, UMULHrr)>; + + +// Miscellaneous Data-Processing Instructions +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_1cyc_1I], (instrs EXTRWrri)>; +def : InstRW<[A57Write_3cyc_1I_1M], (instrs EXTRXrri)>; +def : InstRW<[A57Write_2cyc_1M], (instregex "BFM")>; + + +// Cryptography Extensions +// ----------------------------------------------------------------------------- + +def A57ReadAES : SchedReadAdvance<3, [A57Write_3cyc_1W]>; +def : InstRW<[A57Write_3cyc_1W], (instregex "^AES[DE]")>; +def : InstRW<[A57Write_3cyc_1W, A57ReadAES], (instregex "^AESI?MC")>; +def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>; +def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>; +def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>; +def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>; +def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>; +def : InstRW<[A57Write_3cyc_1W], (instregex "^CRC32")>; + + +// Vector Load +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1i(8|16|32)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1i(64)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1i(64)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Rv(1d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Rv(1d)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_7cyc_3L], (instregex "LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_7cyc_3L, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_8cyc_4L], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2i(8|16)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD2i(32)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2i(32)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2i(64)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2i(64)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Rv(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD2Rv(1d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD2Rv(1d)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s)$")>; +def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD2Twov(2d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2Twov(2d)_POST$")>; + +def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3i(8|16)$")>; +def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3i(8|16)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3i(32)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3i(32)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD3i(64)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3i(64)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3Rv(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD3Rv(1d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3Rv(1d)_POST$")>; +def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3Rv(16b|8h|4s)$")>; +def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD3Rv(2d)$")>; +def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD3Rv(2d)_POST$")>; + +def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD3Threev(8b|4h|2s)$")>; +def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_10cyc_3L_4V], (instregex "LD3Threev(16b|8h|4s)$")>; +def : InstRW<[A57Write_10cyc_3L_4V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_8cyc_4L], (instregex "LD3Threev(2d)$")>; +def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; + +def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(8|16)$")>; +def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(8|16)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4i(32)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4i(32)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(64)$")>; +def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(64)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4Rv(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD4Rv(1d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD4Rv(1d)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4Rv(16b|8h|4s)$")>; +def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_4V], (instregex "LD4Rv(2d)$")>; +def : InstRW<[A57Write_9cyc_2L_4V, WriteAdr], (instregex "LD4Rv(2d)_POST$")>; + +def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_11cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s)$")>; +def : InstRW<[A57Write_11cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_8cyc_4L], (instregex "LD4Fourv(2d)$")>; +def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; + +// Vector Store +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_1cyc_1S], (instregex "ST1i(8|16|32)$")>; +def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1i(8|16|32)_POST$")>; +def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST1i(64)$")>; +def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST1i(64)_POST$")>; + +def : InstRW<[A57Write_1cyc_1S], (instregex "ST1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_3cyc_3S], (instregex "ST1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_6cyc_6S], (instregex "ST1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_8cyc_8S], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST2i(8|16|32)$")>; +def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST2i(8|16|32)_POST$")>; +def : InstRW<[A57Write_2cyc_2S], (instregex "ST2i(64)$")>; +def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST2i(64)_POST$")>; + +def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST2Twov(16b|8h|4s)$")>; +def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_4cyc_4S], (instregex "ST2Twov(2d)$")>; +def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST2Twov(2d)_POST$")>; + +def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST3i(8|16)$")>; +def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST3i(8|16)_POST$")>; +def : InstRW<[A57Write_3cyc_3S], (instregex "ST3i(32)$")>; +def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST3i(32)_POST$")>; +def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST3i(64)$")>; +def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST3i(64)_POST$")>; + +def : InstRW<[A57Write_3cyc_3S_2V], (instregex "ST3Threev(8b|4h|2s)$")>; +def : InstRW<[A57Write_3cyc_3S_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_6cyc_6S_4V], (instregex "ST3Threev(16b|8h|4s)$")>; +def : InstRW<[A57Write_6cyc_6S_4V, WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_6cyc_6S], (instregex "ST3Threev(2d)$")>; +def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; + +def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST4i(8|16)$")>; +def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST4i(8|16)_POST$")>; +def : InstRW<[A57Write_4cyc_4S], (instregex "ST4i(32)$")>; +def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST4i(32)_POST$")>; +def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST4i(64)$")>; +def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST4i(64)_POST$")>; + +def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST4Fourv(8b|4h|2s)$")>; +def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_8cyc_8S_4V], (instregex "ST4Fourv(16b|8h|4s)$")>; +def : InstRW<[A57Write_8cyc_8S_4V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_8cyc_8S], (instregex "ST4Fourv(2d)$")>; +def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; + +// Vector - Integer +// ----------------------------------------------------------------------------- + +// Reference for forms in this group +// D form - v8i8, v4i16, v2i32 +// Q form - v16i8, v8i16, v4i32 +// D form - v1i8, v1i16, v1i32, v1i64 +// Q form - v16i8, v8i16, v4i32, v2i64 +// D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64 +// Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64 + +// ASIMD absolute diff accum, D-form +def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; +// ASIMD absolute diff accum, Q-form +def : InstRW<[A57Write_5cyc_2X], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; +// ASIMD absolute diff accum long +def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABAL")>; + +// ASIMD arith, reduce, 4H/4S +def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; +// ASIMD arith, reduce, 8B/8H +def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; +// ASIMD arith, reduce, 16B +def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU]?ADDL?Vv16i8v$")>; + +// ASIMD max/min, reduce, 4H/4S +def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; +// ASIMD max/min, reduce, 8B/8H +def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; +// ASIMD max/min, reduce, 16B +def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU](MIN|MAX)Vv16i8v$")>; + +// ASIMD multiply, D-form +def : InstRW<[A57Write_5cyc_1W], (instregex "^(P?MUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>; +// ASIMD multiply, Q-form +def : InstRW<[A57Write_6cyc_2W], (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; + +// ASIMD multiply accumulate, D-form +def : InstRW<[A57Write_5cyc_1W], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; +// ASIMD multiply accumulate, Q-form +def : InstRW<[A57Write_6cyc_2W], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; + +// ASIMD multiply accumulate long +// ASIMD multiply accumulate saturating long +def A57WriteIVMA : SchedWriteRes<[A57UnitW]> { let Latency = 5; } +def A57ReadIVMA4 : SchedReadAdvance<4, [A57WriteIVMA]>; +def : InstRW<[A57WriteIVMA, A57ReadIVMA4], (instregex "^(S|U|SQD)ML[AS]L")>; + +// ASIMD multiply long +def : InstRW<[A57Write_5cyc_1W], (instregex "^(S|U|SQD)MULL")>; +def : InstRW<[A57Write_5cyc_1W], (instregex "^PMULL(v8i8|v16i8)")>; +def : InstRW<[A57Write_3cyc_1W], (instregex "^PMULL(v1i64|v2i64)")>; + +// ASIMD pairwise add and accumulate +// ASIMD shift accumulate +def A57WriteIVA : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadIVA3 : SchedReadAdvance<3, [A57WriteIVA]>; +def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^[SU]ADALP")>; +def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>; + +// ASIMD shift by immed, complex +def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?(Q|R){1,2}SHR")>; +def : InstRW<[A57Write_4cyc_1X], (instregex "^SQSHLU")>; + + +// ASIMD shift by register, basic, Q-form +def : InstRW<[A57Write_4cyc_2X], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; + +// ASIMD shift by register, complex, D-form +def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU][QR]{1,2}SHL(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; + +// ASIMD shift by register, complex, Q-form +def : InstRW<[A57Write_5cyc_2X], (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; + + +// Vector - Floating Point +// ----------------------------------------------------------------------------- + +// Reference for forms in this group +// D form - v2f32 +// Q form - v4f32, v2f64 +// D form - 32, 64 +// D form - v1i32, v1i64 +// D form - v2i32 +// Q form - v4i32, v2i64 + +// ASIMD FP arith, normal, D-form +def : InstRW<[A57Write_5cyc_1V], (instregex "^(FABD|FADD|FSUB)(v2f32|32|64|v2i32p)")>; +// ASIMD FP arith, normal, Q-form +def : InstRW<[A57Write_5cyc_2V], (instregex "^(FABD|FADD|FSUB)(v4f32|v2f64|v2i64p)")>; + +// ASIMD FP arith, pairwise, D-form +def : InstRW<[A57Write_5cyc_1V], (instregex "^FADDP(v2f32|32|64|v2i32)")>; +// ASIMD FP arith, pairwise, Q-form +def : InstRW<[A57Write_9cyc_3V], (instregex "^FADDP(v4f32|v2f64|v2i64)")>; + +// ASIMD FP compare, D-form +def : InstRW<[A57Write_5cyc_1V], (instregex "^(FACGE|FACGT|FCMEQ|FCMGE|FCMGT|FCMLE|FCMLT)(v2f32|32|64|v1i32|v2i32|v1i64)")>; +// ASIMD FP compare, Q-form +def : InstRW<[A57Write_5cyc_2V], (instregex "^(FACGE|FACGT|FCMEQ|FCMGE|FCMGT|FCMLE|FCMLT)(v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP convert, long and narrow +def : InstRW<[A57Write_8cyc_3V], (instregex "^FCVT(L|N|XN)v")>; +// ASIMD FP convert, other, D-form +def : InstRW<[A57Write_5cyc_1V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; +// ASIMD FP convert, other, Q-form +def : InstRW<[A57Write_5cyc_2V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP divide, D-form, F32 +def : InstRW<[A57Write_17cyc_1W], (instregex "FDIVv2f32")>; +// ASIMD FP divide, Q-form, F32 +def : InstRW<[A57Write_34cyc_2W], (instregex "FDIVv4f32")>; +// ASIMD FP divide, Q-form, F64 +def : InstRW<[A57Write_64cyc_2W], (instregex "FDIVv2f64")>; + +// Note: These were simply duplicated from ASIMD FDIV because of missing documentation +// ASIMD FP square root, D-form, F32 +def : InstRW<[A57Write_17cyc_1W], (instregex "FSQRTv2f32")>; +// ASIMD FP square root, Q-form, F32 +def : InstRW<[A57Write_34cyc_2W], (instregex "FSQRTv4f32")>; +// ASIMD FP square root, Q-form, F64 +def : InstRW<[A57Write_64cyc_2W], (instregex "FSQRTv2f64")>; + +// ASIMD FP max/min, normal, D-form +def : InstRW<[A57Write_5cyc_1V], (instregex "^(FMAX|FMIN)(NM)?(v2f32)")>; +// ASIMD FP max/min, normal, Q-form +def : InstRW<[A57Write_5cyc_2V], (instregex "^(FMAX|FMIN)(NM)?(v4f32|v2f64)")>; +// ASIMD FP max/min, pairwise, D-form +def : InstRW<[A57Write_5cyc_1V], (instregex "^(FMAX|FMIN)(NM)?P(v2f32|v2i32)")>; +// ASIMD FP max/min, pairwise, Q-form +def : InstRW<[A57Write_9cyc_3V], (instregex "^(FMAX|FMIN)(NM)?P(v4f32|v2f64|v2i64)")>; +// ASIMD FP max/min, reduce +def : InstRW<[A57Write_10cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv")>; + +// ASIMD FP multiply, D-form, FZ +def : InstRW<[A57Write_5cyc_1V], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; +// ASIMD FP multiply, Q-form, FZ +def : InstRW<[A57Write_5cyc_2V], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP multiply accumulate, D-form, FZ +// ASIMD FP multiply accumulate, Q-form, FZ +def A57WriteFPVMAD : SchedWriteRes<[A57UnitV]> { let Latency = 9; } +def A57WriteFPVMAQ : SchedWriteRes<[A57UnitV, A57UnitV]> { let Latency = 10; } +def A57ReadFPVMA5 : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ]>; +def : InstRW<[A57WriteFPVMAD, A57ReadFPVMA5], (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; +def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA5], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP round, D-form +def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT[AIMNPXZ](v2f32)")>; +// ASIMD FP round, Q-form +def : InstRW<[A57Write_5cyc_2V], (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; + + +// Vector - Miscellaneous +// ----------------------------------------------------------------------------- + +// Reference for forms in this group +// D form - v8i8, v4i16, v2i32 +// Q form - v16i8, v8i16, v4i32 +// D form - v1i8, v1i16, v1i32, v1i64 +// Q form - v16i8, v8i16, v4i32, v2i64 + +// ASIMD bitwise insert, Q-form +def : InstRW<[A57Write_3cyc_2V], (instregex "^(BIF|BIT|BSL)v16i8")>; + +// ASIMD duplicate, gen reg, D-form and Q-form +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^CPY")>; +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^DUPv.+gpr")>; + +// ASIMD move, saturating +def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]QXTU?N")>; + +// ASIMD reciprocal estimate, D-form +def : InstRW<[A57Write_5cyc_1V], (instregex "^[FU](RECP|RSQRT)(E|X)(v2f32|v1i32|v2i32|v1i64)")>; +// ASIMD reciprocal estimate, Q-form +def : InstRW<[A57Write_5cyc_2V], (instregex "^[FU](RECP|RSQRT)(E|X)(v2f64|v4f32|v4i32)")>; + +// ASIMD reciprocal step, D-form, FZ +def : InstRW<[A57Write_9cyc_1V], (instregex "^F(RECP|RSQRT)S(v2f32|v1i32|v2i32|v1i64|32|64)")>; +// ASIMD reciprocal step, Q-form, FZ +def : InstRW<[A57Write_9cyc_2V], (instregex "^F(RECP|RSQRT)S(v2f64|v4f32|v4i32)")>; + +// ASIMD table lookup, D-form +def : InstRW<[A57Write_3cyc_1V], (instregex "^TB[LX]v8i8One")>; +def : InstRW<[A57Write_6cyc_2V], (instregex "^TB[LX]v8i8Two")>; +def : InstRW<[A57Write_9cyc_3V], (instregex "^TB[LX]v8i8Three")>; +def : InstRW<[A57Write_12cyc_4V], (instregex "^TB[LX]v8i8Four")>; +// ASIMD table lookup, Q-form +def : InstRW<[A57Write_6cyc_3V], (instregex "^TB[LX]v16i8One")>; +def : InstRW<[A57Write_9cyc_5V], (instregex "^TB[LX]v16i8Two")>; +def : InstRW<[A57Write_12cyc_7V], (instregex "^TB[LX]v16i8Three")>; +def : InstRW<[A57Write_15cyc_9V], (instregex "^TB[LX]v16i8Four")>; + +// ASIMD transfer, element to gen reg +def : InstRW<[A57Write_6cyc_1I_1L], (instregex "^[SU]MOVv")>; + +// ASIMD transfer, gen reg to element +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^INSv")>; + +// ASIMD unzip/zip, Q-form +def : InstRW<[A57Write_6cyc_3V], (instregex "^(UZP|ZIP)(1|2)(v16i8|v8i16|v4i32|v2i64)")>; + + +// Remainder +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_5cyc_1V], (instregex "^F(ADD|SUB)[DS]rr")>; + +def A57WriteFPMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; } +def A57ReadFPMA5 : SchedReadAdvance<5, [A57WriteFPMA]>; +def A57ReadFPM : SchedReadAdvance<0>; +def : InstRW<[A57WriteFPMA, A57ReadFPM, A57ReadFPM, A57ReadFPMA5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>; + +def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>; +def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[SU]CVTF")>; + +def : InstRW<[A57Write_32cyc_1W], (instrs FDIVDrr)>; +def : InstRW<[A57Write_17cyc_1W], (instrs FDIVSrr)>; + +def : InstRW<[A57Write_5cyc_1V], (instregex "^F(MAX|MIN).+rr")>; + +def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT.+r")>; + +def : InstRW<[A57Write_32cyc_1W], (instrs FSQRTDr)>; +def : InstRW<[A57Write_17cyc_1W], (instrs FSQRTSr)>; + +def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDNPDi)>; +def : InstRW<[A57Write_6cyc_2L, WriteLDHi], (instrs LDNPQi)>; +def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDNPSi)>; +def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDPDi)>; +def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPDpost)>; +def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPDpre)>; +def : InstRW<[A57Write_6cyc_2L, WriteLDHi], (instrs LDPQi)>; +def : InstRW<[A57Write_6cyc_2L, WriteLDHi, WriteAdr], (instrs LDPQpost)>; +def : InstRW<[A57Write_6cyc_2L, WriteLDHi, WriteAdr], (instrs LDPQpre)>; +def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi], (instrs LDPSWi)>; +def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi, WriteAdr], (instrs LDPSWpost)>; +def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi, WriteAdr], (instrs LDPSWpre)>; +def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDPSi)>; +def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPSpost)>; +def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPSpre)>; +def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRBpost)>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRBpre)>; +def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRBroW)>; +def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRBroX)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDRBui)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDRDl)>; +def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRDpost)>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRDpre)>; +def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRDroW)>; +def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRDroX)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDRDui)>; +def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRHHroW)>; +def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRHHroX)>; +def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRHpost)>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRHpre)>; +def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRHroW)>; +def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRHroX)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDRHui)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDRQl)>; +def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRQpost)>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRQpre)>; +def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRQroW)>; +def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRQroX)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDRQui)>; +def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHWroW)>; +def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHWroX)>; +def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHXroW)>; +def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHXroX)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDRSl)>; +def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRSpost)>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRSpre)>; +def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRSroW)>; +def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRSroX)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDRSui)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDURBi)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDURDi)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDURHi)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDURQi)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDURSi)>; + +def : InstRW<[A57Write_2cyc_2S], (instrs STNPDi)>; +def : InstRW<[A57Write_4cyc_1I_4S], (instrs STNPQi)>; +def : InstRW<[A57Write_2cyc_2S], (instrs STNPXi)>; +def : InstRW<[A57Write_2cyc_2S], (instrs STPDi)>; +def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPDpost)>; +def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPDpre)>; +def : InstRW<[A57Write_4cyc_1I_4S], (instrs STPQi)>; +def : InstRW<[WriteAdr, A57Write_4cyc_1I_4S], (instrs STPQpost)>; +def : InstRW<[WriteAdr, A57Write_4cyc_2I_4S], (instrs STPQpre)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPSpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPSpre)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPWpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPWpre)>; +def : InstRW<[A57Write_2cyc_2S], (instrs STPXi)>; +def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPXpost)>; +def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPXpre)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBBpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBBpre)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRBpre)>; +def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRBroW)>; +def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRBroX)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRDpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRDpre)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHHpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHHpre)>; +def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHHroW)>; +def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHHroX)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRHpre)>; +def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHroW)>; +def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHroX)>; +def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQpost)>; +def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STRQpre)>; +def : InstRW<[A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQroW)>; +def : InstRW<[A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQroX)>; +def : InstRW<[A57Write_2cyc_1I_2S], (instrs STRQui)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRSpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRSpre)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRWpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRWpre)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRXpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRXpre)>; +def : InstRW<[A57Write_2cyc_2S], (instrs STURQi)>; + +} // SchedModel = CortexA57Model diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td new file mode 100644 index 000000000..55005e1d9 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td @@ -0,0 +1,544 @@ +//=- AArch64SchedA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Contains all of the Cortex-A57 specific SchedWriteRes types. The approach +// below is to define a generic SchedWriteRes for every combination of +// latency and microOps. The naming conventions is to use a prefix, one field +// for latency, and one or more microOp count/type designators. +// Prefix: A57Write +// Latency: #cyc +// MicroOp Count/Types: #(B|I|M|L|S|X|W|V) +// +// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are +// 11 micro-ops to be issued down one I pipe, six S pipes and four V pipes. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Define Generic 1 micro-op types + +def A57Write_5cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 5; } +def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; } +def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; } +def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; } +def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; } +def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17; + let ResourceCycles = [17]; } +def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19; + let ResourceCycles = [19]; } +def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; } +def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; } +def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; } +def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; } +def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32; + let ResourceCycles = [32]; } +def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35; + let ResourceCycles = [35]; } +def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; } +def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; } +def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; } +def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; } +def A57Write_4cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 4; } +def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; } +def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; } +def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; } + + +//===----------------------------------------------------------------------===// +// Define Generic 2 micro-op types + +def A57Write_64cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> { + let Latency = 64; + let NumMicroOps = 2; + let ResourceCycles = [32, 32]; +} +def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_7cyc_1V_1X : SchedWriteRes<[A57UnitV, + A57UnitX]> { + let Latency = 7; + let NumMicroOps = 2; +} +def A57Write_8cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 8; + let NumMicroOps = 2; +} +def A57Write_9cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 2; +} +def A57Write_8cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 8; + let NumMicroOps = 2; +} +def A57Write_6cyc_2L : SchedWriteRes<[A57UnitL, A57UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 10; + let NumMicroOps = 2; +} +def A57Write_10cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 2; +} +def A57Write_1cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_1cyc_1I_1S : SchedWriteRes<[A57UnitI, + A57UnitS]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_2cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_2cyc_2S : SchedWriteRes<[A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_34cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> { + let Latency = 34; + let NumMicroOps = 2; + let ResourceCycles = [17, 17]; +} +def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_3cyc_1I_1S : SchedWriteRes<[A57UnitI, + A57UnitS]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_3cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_4cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 4; + let NumMicroOps = 2; +} +def A57Write_4cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 4; + let NumMicroOps = 2; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 3 micro-op types + +def A57Write_10cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 3; +} +def A57Write_2cyc_1I_2S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 3; +} +def A57Write_3cyc_1I_1S_1V : SchedWriteRes<[A57UnitI, + A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_3cyc_1M_2S : SchedWriteRes<[A57UnitM, + A57UnitS, A57UnitS]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_3cyc_3S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_3cyc_2S_1V : SchedWriteRes<[A57UnitS, A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_5cyc_1I_2L : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL]> { + let Latency = 5; + let NumMicroOps = 3; +} +def A57Write_6cyc_1I_2L : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL]> { + let Latency = 6; + let NumMicroOps = 3; +} +def A57Write_6cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 3; +} +def A57Write_7cyc_3L : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL]> { + let Latency = 7; + let NumMicroOps = 3; +} +def A57Write_8cyc_1I_1L_1V : SchedWriteRes<[A57UnitI, + A57UnitL, + A57UnitV]> { + let Latency = 8; + let NumMicroOps = 3; +} +def A57Write_8cyc_1L_2V : SchedWriteRes<[A57UnitL, + A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 3; +} +def A57Write_8cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 3; +} +def A57Write_9cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 3; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 4 micro-op types + +def A57Write_2cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI, + A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 4; +} +def A57Write_3cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI, + A57UnitS, A57UnitS]> { + let Latency = 3; + let NumMicroOps = 4; +} +def A57Write_3cyc_1I_3S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS]> { + let Latency = 3; + let NumMicroOps = 4; +} +def A57Write_3cyc_1I_2S_1V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 4; +} +def A57Write_4cyc_4S : SchedWriteRes<[A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 4; + let NumMicroOps = 4; +} +def A57Write_7cyc_1I_3L : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, A57UnitL]> { + let Latency = 7; + let NumMicroOps = 4; +} +def A57Write_5cyc_2I_2L : SchedWriteRes<[A57UnitI, A57UnitI, + A57UnitL, A57UnitL]> { + let Latency = 5; + let NumMicroOps = 4; +} +def A57Write_8cyc_1I_1L_2V : SchedWriteRes<[A57UnitI, + A57UnitL, + A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 4; +} +def A57Write_8cyc_4L : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitL, A57UnitL]> { + let Latency = 8; + let NumMicroOps = 4; +} +def A57Write_9cyc_2L_2V : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 4; +} +def A57Write_9cyc_1L_3V : SchedWriteRes<[A57UnitL, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 4; +} +def A57Write_12cyc_4V : SchedWriteRes<[A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 12; + let NumMicroOps = 4; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 5 micro-op types + +def A57Write_3cyc_3S_2V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV]> { + let Latency = 3; + let NumMicroOps = 5; +} +def A57Write_8cyc_1I_4L : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitL, A57UnitL]> { + let Latency = 8; + let NumMicroOps = 5; +} +def A57Write_4cyc_1I_4S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 4; + let NumMicroOps = 5; +} +def A57Write_9cyc_1I_2L_2V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 5; +} +def A57Write_9cyc_1I_1L_3V : SchedWriteRes<[A57UnitI, + A57UnitL, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 5; +} +def A57Write_9cyc_2L_3V : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 5; +} +def A57Write_9cyc_5V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 5; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 6 micro-op types + +def A57Write_3cyc_1I_3S_2V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV]> { + let Latency = 3; + let NumMicroOps = 6; +} +def A57Write_4cyc_2I_4S : SchedWriteRes<[A57UnitI, A57UnitI, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 4; + let NumMicroOps = 6; +} +def A57Write_4cyc_4S_2V : SchedWriteRes<[A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitV, A57UnitV]> { + let Latency = 4; + let NumMicroOps = 6; +} +def A57Write_6cyc_6S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS]> { + let Latency = 6; + let NumMicroOps = 6; +} +def A57Write_9cyc_1I_2L_3V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 6; +} +def A57Write_9cyc_1I_1L_4V : SchedWriteRes<[A57UnitI, + A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 6; +} +def A57Write_9cyc_2L_4V : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 6; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 7 micro-op types + +def A57Write_10cyc_3L_4V : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 7; +} +def A57Write_4cyc_1I_4S_2V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitV, A57UnitV]> { + let Latency = 4; + let NumMicroOps = 7; +} +def A57Write_6cyc_1I_6S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS]> { + let Latency = 6; + let NumMicroOps = 7; +} +def A57Write_9cyc_1I_2L_4V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 7; +} +def A57Write_12cyc_7V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 12; + let NumMicroOps = 7; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 8 micro-op types + +def A57Write_10cyc_1I_3L_4V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 8; +} +def A57Write_11cyc_4L_4V : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 11; + let NumMicroOps = 8; +} +def A57Write_8cyc_8S : SchedWriteRes<[A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 8; + let NumMicroOps = 8; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 9 micro-op types + +def A57Write_8cyc_1I_8S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 8; + let NumMicroOps = 9; +} +def A57Write_11cyc_1I_4L_4V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 11; + let NumMicroOps = 9; +} +def A57Write_15cyc_9V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV, + A57UnitV, A57UnitV, A57UnitV, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 15; + let NumMicroOps = 9; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 10 micro-op types + +def A57Write_6cyc_6S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 10; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 11 micro-op types + +def A57Write_6cyc_1I_6S_4V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 11; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 12 micro-op types + +def A57Write_8cyc_8S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 12; +} + +//===----------------------------------------------------------------------===// +// Define Generic 13 micro-op types + +def A57Write_8cyc_1I_8S_4V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 13; +} + diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td new file mode 100644 index 000000000..7a474ba8e --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td @@ -0,0 +1,871 @@ +//=- AArch64SchedCyclone.td - Cyclone Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for AArch64 Cyclone to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +def CycloneModel : SchedMachineModel { + let IssueWidth = 6; // 6 micro-ops are dispatched per cycle. + let MicroOpBufferSize = 192; // Based on the reorder buffer. + let LoadLatency = 4; // Optimistic load latency. + let MispredictPenalty = 16; // 14-19 cycles are typical. + let CompleteModel = 1; + + list UnsupportedFeatures = [HasSVE]; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Cyclone. + +// 4 integer pipes +def CyUnitI : ProcResource<4> { + let BufferSize = 48; +} + +// 2 branch units: I[0..1] +def CyUnitB : ProcResource<2> { + let Super = CyUnitI; + let BufferSize = 24; +} + +// 1 indirect-branch unit: I[0] +def CyUnitBR : ProcResource<1> { + let Super = CyUnitB; +} + +// 2 shifter pipes: I[2..3] +// When an instruction consumes a CyUnitIS, it also consumes a CyUnitI +def CyUnitIS : ProcResource<2> { + let Super = CyUnitI; + let BufferSize = 24; +} + +// 1 mul pipe: I[0] +def CyUnitIM : ProcResource<1> { + let Super = CyUnitBR; + let BufferSize = 32; +} + +// 1 div pipe: I[1] +def CyUnitID : ProcResource<1> { + let Super = CyUnitB; + let BufferSize = 16; +} + +// 1 integer division unit. This is driven by the ID pipe, but only +// consumes the pipe for one cycle at issue and another cycle at writeback. +def CyUnitIntDiv : ProcResource<1>; + +// 2 ld/st pipes. +def CyUnitLS : ProcResource<2> { + let BufferSize = 28; +} + +// 3 fp/vector pipes. +def CyUnitV : ProcResource<3> { + let BufferSize = 48; +} +// 2 fp/vector arithmetic and multiply pipes: V[0-1] +def CyUnitVM : ProcResource<2> { + let Super = CyUnitV; + let BufferSize = 32; +} +// 1 fp/vector division/sqrt pipe: V[2] +def CyUnitVD : ProcResource<1> { + let Super = CyUnitV; + let BufferSize = 16; +} +// 1 fp compare pipe: V[0] +def CyUnitVC : ProcResource<1> { + let Super = CyUnitVM; + let BufferSize = 16; +} + +// 2 fp division/square-root units. These are driven by the VD pipe, +// but only consume the pipe for one cycle at issue and a cycle at writeback. +def CyUnitFloatDiv : ProcResource<2>; + +//===----------------------------------------------------------------------===// +// Define scheduler read/write resources and latency on Cyclone. +// This mirrors sections 7.7-7.9 of the Tuning Guide v1.0.1. + +let SchedModel = CycloneModel in { + +//--- +// 7.8.1. Moves +//--- + +// A single nop micro-op (uX). +def WriteX : SchedWriteRes<[]> { let Latency = 0; } + +// Move zero is a register rename (to machine register zero). +// The move is replaced by a single nop micro-op. +// MOVZ Rd, #0 +// AND Rd, Rzr, #imm +def WriteZPred : SchedPredicate<[{TII->isGPRZero(*MI)}]>; +def WriteImmZ : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def : InstRW<[WriteImmZ], (instrs MOVZWi,MOVZXi,ANDWri,ANDXri)>; + +// Move GPR is a register rename and single nop micro-op. +// ORR Xd, XZR, Xm +// ADD Xd, Xn, #0 +def WriteIMovPred : SchedPredicate<[{TII->isGPRCopy(*MI)}]>; +def WriteVMovPred : SchedPredicate<[{TII->isFPRCopy(*MI)}]>; +def WriteMov : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar]>; +def : InstRW<[WriteMov], (instrs COPY,ORRXrr,ADDXrr)>; + +// Move non-zero immediate is an integer ALU op. +// MOVN,MOVZ,MOVK +def : WriteRes; + +//--- +// 7.8.2-7.8.5. Arithmetic and Logical, Comparison, Conditional, +// Shifts and Bitfield Operations +//--- + +// ADR,ADRP +// ADD(S)ri,SUB(S)ri,AND(S)ri,EORri,ORRri +// ADD(S)rr,SUB(S)rr,AND(S)rr,BIC(S)rr,EONrr,EORrr,ORNrr,ORRrr +// ADC(S),SBC(S) +// Aliases: CMN, CMP, TST +// +// Conditional operations. +// CCMNi,CCMPi,CCMNr,CCMPr, +// CSEL,CSINC,CSINV,CSNEG +// +// Bit counting and reversal operations. +// CLS,CLZ,RBIT,REV,REV16,REV32 +def : WriteRes; + +// ADD with shifted register operand is a single micro-op that +// consumes a shift pipeline for two cycles. +// ADD(S)rs,SUB(S)rs,AND(S)rs,BIC(S)rs,EONrs,EORrs,ORNrs,ORRrs +// EXAMPLE: ADDrs Xn, Xm LSL #imm +def : WriteRes { + let Latency = 2; + let ResourceCycles = [2]; +} + +// ADD with extended register operand is the same as shifted reg operand. +// ADD(S)re,SUB(S)re +// EXAMPLE: ADDXre Xn, Xm, UXTB #1 +def : WriteRes { + let Latency = 2; + let ResourceCycles = [2]; +} + +// Variable shift and bitfield operations. +// ASRV,LSLV,LSRV,RORV,BFM,SBFM,UBFM +def : WriteRes; + +// EXTR Shifts a pair of registers and requires two micro-ops. +// The second micro-op is delayed, as modeled by ReadExtrHi. +// EXTR Xn, Xm, #imm +def : WriteRes { + let Latency = 2; + let NumMicroOps = 2; +} + +// EXTR's first register read is delayed by one cycle, effectively +// shortening its writer's latency. +// EXTR Xn, Xm, #imm +def : ReadAdvance; + +//--- +// 7.8.6. Multiplies +//--- + +// MUL/MNEG are aliases for MADD/MSUB. +// MADDW,MSUBW,SMADDL,SMSUBL,UMADDL,UMSUBL +def : WriteRes { + let Latency = 4; +} +// MADDX,MSUBX,SMULH,UMULH +def : WriteRes { + let Latency = 5; +} + +//--- +// 7.8.7. Divide +//--- + +// 32-bit divide takes 7-13 cycles. 10 cycles covers a 20-bit quotient. +// The ID pipe is consumed for 2 cycles: issue and writeback. +// SDIVW,UDIVW +def : WriteRes { + let Latency = 10; + let ResourceCycles = [2, 10]; +} +// 64-bit divide takes 7-21 cycles. 13 cycles covers a 32-bit quotient. +// The ID pipe is consumed for 2 cycles: issue and writeback. +// SDIVX,UDIVX +def : WriteRes { + let Latency = 13; + let ResourceCycles = [2, 13]; +} + +//--- +// 7.8.8,7.8.10. Load/Store, single element +//--- + +// Integer loads take 4 cycles and use one LS unit for one cycle. +def : WriteRes { + let Latency = 4; +} + +// Store-load forwarding is 4 cycles. +// +// Note: The store-exclusive sequence incorporates this +// latency. However, general heuristics should not model the +// dependence between a store and subsequent may-alias load because +// hardware speculation works. +def : WriteRes { + let Latency = 4; +} + +// Load from base address plus an optionally scaled register offset. +// Rt latency is latency WriteIS + WriteLD. +// EXAMPLE: LDR Xn, Xm [, lsl 3] +def CyWriteLDIdx : SchedWriteVariant<[ + SchedVar, // Load from scaled register. + SchedVar]>; // Load from register offset. +def : SchedAlias; // Map AArch64->Cyclone type. + +// EXAMPLE: STR Xn, Xm [, lsl 3] +def CyWriteSTIdx : SchedWriteVariant<[ + SchedVar, // Store to scaled register. + SchedVar]>; // Store to register offset. +def : SchedAlias; // Map AArch64->Cyclone type. + +// Read the (unshifted) base register Xn in the second micro-op one cycle later. +// EXAMPLE: LDR Xn, Xm [, lsl 3] +def ReadBaseRS : SchedReadAdvance<1>; +def CyReadAdrBase : SchedReadVariant<[ + SchedVar, // Read base reg after shifting offset. + SchedVar]>; // Read base reg with no shift. +def : SchedAlias; // Map AArch64->Cyclone type. + +//--- +// 7.8.9,7.8.11. Load/Store, paired +//--- + +// Address pre/post increment is a simple ALU op with one cycle latency. +def : WriteRes; + +// LDP high register write is fused with the load, but a nop micro-op remains. +def : WriteRes { + let Latency = 4; +} + +// STP is a vector op and store, except for QQ, which is just two stores. +def : SchedAlias; +def : InstRW<[WriteST, WriteST], (instrs STPQi)>; + +//--- +// 7.8.13. Branches +//--- + +// Branches take a single micro-op. +// The misprediction penalty is defined as a SchedMachineModel property. +def : WriteRes {let Latency = 0;} +def : WriteRes {let Latency = 0;} + +//--- +// 7.8.14. Never-issued Instructions, Barrier and Hint Operations +//--- + +// NOP,SEV,SEVL,WFE,WFI,YIELD +def : WriteRes {let Latency = 0;} +// ISB +def : InstRW<[WriteI], (instrs ISB)>; +// SLREX,DMB,DSB +def : WriteRes; + +// System instructions get an invalid latency because the latency of +// other operations across them is meaningless. +def : WriteRes {let Latency = -1;} + +//===----------------------------------------------------------------------===// +// 7.9 Vector Unit Instructions + +// Simple vector operations take 2 cycles. +def : WriteRes {let Latency = 2;} + +// Define some longer latency vector op types for Cyclone. +def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;} +def CyWriteV4 : SchedWriteRes<[CyUnitV]> {let Latency = 4;} +def CyWriteV5 : SchedWriteRes<[CyUnitV]> {let Latency = 5;} +def CyWriteV6 : SchedWriteRes<[CyUnitV]> {let Latency = 6;} + +// Simple floating-point operations take 2 cycles. +def : WriteRes {let Latency = 2;} + +//--- +// 7.9.1 Vector Moves +//--- + +// TODO: Add Cyclone-specific zero-cycle zeros. LLVM currently +// generates expensive int-float conversion instead: +// FMOVDi Dd, #0.0 +// FMOVv2f64ns Vd.2d, #0.0 + +// FMOVSi,FMOVDi +def : WriteRes {let Latency = 2;} + +// MOVI,MVNI are WriteV +// FMOVv2f32ns,FMOVv2f64ns,FMOVv4f32ns are WriteV + +// Move FPR is a register rename and single nop micro-op. +// ORR.16b Vd,Vn,Vn +// COPY is handled above in the WriteMov Variant. +def WriteVMov : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def : InstRW<[WriteVMov], (instrs ORRv16i8)>; + +// FMOVSr,FMOVDr are WriteF. + +// MOV V,V is a WriteV. + +// CPY D,V[x] is a WriteV + +// INS V[x],V[y] is a WriteV. + +// FMOVWSr,FMOVXDr,FMOVXDHighr +def : WriteRes { + let Latency = 5; +} + +// FMOVSWr,FMOVDXr +def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>; + +// INS V[x],R +def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>; +def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>; + +// SMOV,UMOV R,V[x] +def CyWriteCopyToGPR : WriteSequence<[WriteLD, WriteI]>; +def : InstRW<[CyWriteCopyToGPR], (instregex "SMOVv","UMOVv")>; + +// DUP V,R +def : InstRW<[CyWriteCopyToFPR], (instregex "DUPv")>; + +// DUP V,V[x] is a WriteV. + +//--- +// 7.9.2 Integer Arithmetic, Logical, and Comparisons +//--- + +// BIC,ORR V,#imm are WriteV + +def : InstRW<[CyWriteV3], (instregex "ABSv")>; + +// MVN,NEG,NOT are WriteV + +def : InstRW<[CyWriteV3], (instregex "SQABSv","SQNEGv")>; + +// ADDP is a WriteV. +def CyWriteVADDLP : SchedWriteRes<[CyUnitV]> {let Latency = 2;} +def : InstRW<[CyWriteVADDLP], (instregex "SADDLPv","UADDLPv")>; + +def : InstRW<[CyWriteV3], + (instregex "ADDVv","SMAXVv","UMAXVv","SMINVv","UMINVv")>; + +def : InstRW<[CyWriteV3], (instregex "SADDLV","UADDLV")>; + +// ADD,SUB are WriteV + +// Forward declare. +def CyWriteVABD : SchedWriteRes<[CyUnitV]> {let Latency = 3;} + +// Add/Diff and accumulate uses the vector multiply unit. +def CyWriteVAccum : SchedWriteRes<[CyUnitVM]> {let Latency = 3;} +def CyReadVAccum : SchedReadAdvance<1, + [CyWriteVAccum, CyWriteVADDLP, CyWriteVABD]>; + +def : InstRW<[CyWriteVAccum, CyReadVAccum], + (instregex "SADALP","UADALP")>; + +def : InstRW<[CyWriteVAccum, CyReadVAccum], + (instregex "SABAv","UABAv","SABALv","UABALv")>; + +def : InstRW<[CyWriteV3], (instregex "SQADDv","SQSUBv","UQADDv","UQSUBv")>; + +def : InstRW<[CyWriteV3], (instregex "SUQADDv","USQADDv")>; + +def : InstRW<[CyWriteV4], (instregex "ADDHNv","RADDHNv", "RSUBHNv", "SUBHNv")>; + +// WriteV includes: +// AND,BIC,CMTST,EOR,ORN,ORR +// ADDP +// SHADD,SHSUB,SRHADD,UHADD,UHSUB,URHADD +// SADDL,SSUBL,UADDL,USUBL +// SADDW,SSUBW,UADDW,USUBW + +def : InstRW<[CyWriteV3], (instregex "CMEQv","CMGEv","CMGTv", + "CMLEv","CMLTv", + "CMHIv","CMHSv")>; + +def : InstRW<[CyWriteV3], (instregex "SMAXv","SMINv","UMAXv","UMINv", + "SMAXPv","SMINPv","UMAXPv","UMINPv")>; + +def : InstRW<[CyWriteVABD], (instregex "SABDv","UABDv", + "SABDLv","UABDLv")>; + +//--- +// 7.9.3 Floating Point Arithmetic and Comparisons +//--- + +// FABS,FNEG are WriteF + +def : InstRW<[CyWriteV4], (instrs FADDPv2i32p)>; +def : InstRW<[CyWriteV5], (instrs FADDPv2i64p)>; + +def : InstRW<[CyWriteV3], (instregex "FMAXPv2i","FMAXNMPv2i", + "FMINPv2i","FMINNMPv2i")>; + +def : InstRW<[CyWriteV4], (instregex "FMAXVv","FMAXNMVv","FMINVv","FMINNMVv")>; + +def : InstRW<[CyWriteV4], (instrs FADDSrr,FADDv2f32,FADDv4f32, + FSUBSrr,FSUBv2f32,FSUBv4f32, + FADDPv2f32,FADDPv4f32, + FABD32,FABDv2f32,FABDv4f32)>; +def : InstRW<[CyWriteV5], (instrs FADDDrr,FADDv2f64, + FSUBDrr,FSUBv2f64, + FADDPv2f64, + FABD64,FABDv2f64)>; + +def : InstRW<[CyWriteV3], (instregex "FCMEQ","FCMGT","FCMLE","FCMLT")>; + +def : InstRW<[CyWriteV3], (instregex "FACGE","FACGT", + "FMAXS","FMAXD","FMAXv", + "FMINS","FMIND","FMINv", + "FMAXNMS","FMAXNMD","FMAXNMv", + "FMINNMS","FMINNMD","FMINNMv", + "FMAXPv2f","FMAXPv4f", + "FMINPv2f","FMINPv4f", + "FMAXNMPv2f","FMAXNMPv4f", + "FMINNMPv2f","FMINNMPv4f")>; + +// FCMP,FCMPE,FCCMP,FCCMPE +def : WriteRes {let Latency = 4;} + +// FCSEL is a WriteF. + +//--- +// 7.9.4 Shifts and Bitfield Operations +//--- + +// SHL is a WriteV + +def CyWriteVSHR : SchedWriteRes<[CyUnitV]> {let Latency = 2;} +def : InstRW<[CyWriteVSHR], (instregex "SSHRv","USHRv")>; + +def CyWriteVSRSHR : SchedWriteRes<[CyUnitV]> {let Latency = 3;} +def : InstRW<[CyWriteVSRSHR], (instregex "SRSHRv","URSHRv")>; + +// Shift and accumulate uses the vector multiply unit. +def CyWriteVShiftAcc : SchedWriteRes<[CyUnitVM]> {let Latency = 3;} +def CyReadVShiftAcc : SchedReadAdvance<1, + [CyWriteVShiftAcc, CyWriteVSHR, CyWriteVSRSHR]>; +def : InstRW<[CyWriteVShiftAcc, CyReadVShiftAcc], + (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>; + +// SSHL,USHL are WriteV. + +def : InstRW<[CyWriteV3], (instregex "SRSHLv","URSHLv")>; + +// SQSHL,SQSHLU,UQSHL are WriteV. + +def : InstRW<[CyWriteV3], (instregex "SQRSHLv","UQRSHLv")>; + +// WriteV includes: +// SHLL,SSHLL,USHLL +// SLI,SRI +// BIF,BIT,BSL +// EXT +// CLS,CLZ,CNT,RBIT,REV16,REV32,REV64,XTN +// XTN2 + +def : InstRW<[CyWriteV4], + (instregex "RSHRNv","SHRNv", + "SQRSHRNv","SQRSHRUNv","SQSHRNv","SQSHRUNv", + "UQRSHRNv","UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>; + +//--- +// 7.9.5 Multiplication +//--- + +def CyWriteVMul : SchedWriteRes<[CyUnitVM]> { let Latency = 4;} +def : InstRW<[CyWriteVMul], (instregex "MULv","SMULLv","UMULLv", + "SQDMULLv","SQDMULHv","SQRDMULHv")>; + +// FMUL,FMULX,FNMUL default to WriteFMul. +def : WriteRes { let Latency = 4;} + +def CyWriteV64Mul : SchedWriteRes<[CyUnitVM]> { let Latency = 5;} +def : InstRW<[CyWriteV64Mul], (instrs FMULDrr,FMULv2f64,FMULv2i64_indexed, + FNMULDrr,FMULX64,FMULXv2f64,FMULXv2i64_indexed)>; + +def CyReadVMulAcc : SchedReadAdvance<1, [CyWriteVMul, CyWriteV64Mul]>; +def : InstRW<[CyWriteVMul, CyReadVMulAcc], + (instregex "MLA","MLS","SMLAL","SMLSL","UMLAL","UMLSL", + "SQDMLAL","SQDMLSL")>; + +def CyWriteSMul : SchedWriteRes<[CyUnitVM]> { let Latency = 8;} +def CyWriteDMul : SchedWriteRes<[CyUnitVM]> { let Latency = 10;} +def CyReadSMul : SchedReadAdvance<4, [CyWriteSMul]>; +def CyReadDMul : SchedReadAdvance<5, [CyWriteDMul]>; + +def : InstRW<[CyWriteSMul, CyReadSMul], + (instrs FMADDSrrr,FMSUBSrrr,FNMADDSrrr,FNMSUBSrrr, + FMLAv2f32,FMLAv4f32, + FMLAv1i32_indexed,FMLAv1i64_indexed,FMLAv2i32_indexed)>; +def : InstRW<[CyWriteDMul, CyReadDMul], + (instrs FMADDDrrr,FMSUBDrrr,FNMADDDrrr,FNMSUBDrrr, + FMLAv2f64,FMLAv2i64_indexed, + FMLSv2f64,FMLSv2i64_indexed)>; + +def CyWritePMUL : SchedWriteRes<[CyUnitVD]> { let Latency = 3; } +def : InstRW<[CyWritePMUL], (instregex "PMULv", "PMULLv")>; + +//--- +// 7.9.6 Divide and Square Root +//--- + +// FDIV,FSQRT +// TODO: Add 64-bit variant with 19 cycle latency. +// TODO: Specialize FSQRT for longer latency. +def : WriteRes { + let Latency = 17; + let ResourceCycles = [2, 17]; +} + +def : InstRW<[CyWriteV4], (instregex "FRECPEv","FRECPXv","URECPEv","URSQRTEv")>; + +def WriteFRSQRTE : SchedWriteRes<[CyUnitVM]> { let Latency = 4; } +def : InstRW<[WriteFRSQRTE], (instregex "FRSQRTEv")>; + +def WriteFRECPS : SchedWriteRes<[CyUnitVM]> { let Latency = 8; } +def WriteFRSQRTS : SchedWriteRes<[CyUnitVM]> { let Latency = 10; } +def : InstRW<[WriteFRECPS], (instregex "FRECPSv")>; +def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>; + +//--- +// 7.9.7 Integer-FP Conversions +//--- + +// FCVT lengthen f16/s32 +def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>; + +// FCVT,FCVTN,FCVTXN +// SCVTF,UCVTF V,V +// FRINT(AIMNPXZ) V,V +def : WriteRes {let Latency = 4;} + +// SCVT/UCVT S/D, Rd = VLD5+V4: 9 cycles. +def CyWriteCvtToFPR : WriteSequence<[WriteVLD, CyWriteV4]>; +def : InstRW<[CyWriteCopyToFPR], (instregex "FCVT[AMNPZ][SU][SU][WX][SD]r")>; + +// FCVT Rd, S/D = V6+LD4: 10 cycles +def CyWriteCvtToGPR : WriteSequence<[CyWriteV6, WriteLD]>; +def : InstRW<[CyWriteCvtToGPR], (instregex "[SU]CVTF[SU][WX][SD]r")>; + +// FCVTL is a WriteV + +//--- +// 7.9.8-7.9.10 Cryptography, Data Transposition, Table Lookup +//--- + +def CyWriteCrypto2 : SchedWriteRes<[CyUnitVD]> {let Latency = 2;} +def : InstRW<[CyWriteCrypto2], (instrs AESIMCrr, AESMCrr, SHA1Hrr, + AESDrr, AESErr, SHA1SU1rr, SHA256SU0rr, + SHA1SU0rrr)>; + +def CyWriteCrypto3 : SchedWriteRes<[CyUnitVD]> {let Latency = 3;} +def : InstRW<[CyWriteCrypto3], (instrs SHA256SU1rrr)>; + +def CyWriteCrypto6 : SchedWriteRes<[CyUnitVD]> {let Latency = 6;} +def : InstRW<[CyWriteCrypto6], (instrs SHA1Crrr, SHA1Mrrr, SHA1Prrr, + SHA256Hrrr,SHA256H2rrr)>; + +// TRN,UZP,ZUP are WriteV. + +// TBL,TBX are WriteV. + +//--- +// 7.9.11-7.9.14 Load/Store, single element and paired +//--- + +// Loading into the vector unit takes 5 cycles vs 4 for integer loads. +def : WriteRes { + let Latency = 5; +} + +// Store-load forwarding is 4 cycles. +def : WriteRes { + let Latency = 4; +} + +// WriteVLDPair/VSTPair sequences are expanded by the target description. + +//--- +// 7.9.15 Load, element operations +//--- + +// Only the first WriteVLD and WriteAdr for writeback matches def operands. +// Subsequent WriteVLDs consume resources. Since all loaded values have the +// same latency, this is acceptable. + +// Vd is read 5 cycles after issuing the vector load. +def : ReadAdvance; + +def : InstRW<[WriteVLD], + (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLD, WriteAdr], + (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; + +// Register writes from the load's high half are fused micro-ops. +def : InstRW<[WriteVLD], + (instregex "LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[WriteVLD, WriteAdr], + (instregex "LD1Twov(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVLD, WriteVLD], + (instregex "LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLD, WriteAdr, WriteVLD], + (instregex "LD1Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLD, WriteVLD], + (instregex "LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[WriteVLD, WriteAdr, WriteVLD], + (instregex "LD1Threev(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVLD, WriteVLD, WriteVLD], + (instregex "LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD], + (instregex "LD1Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLD, WriteVLD], + (instregex "LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[WriteVLD, WriteAdr, WriteVLD], + (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVLD, WriteVLD, WriteVLD, WriteVLD], + (instregex "LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD, WriteVLD], + (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD], + (instregex "LD1i(8|16|32)$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr], + (instregex "LD1i(8|16|32)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD], (instrs LD1i64)>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr],(instrs LD1i64_POST)>; + +def : InstRW<[WriteVLDShuffle], + (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr], + (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[WriteVLDShuffle, WriteV], + (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV], + (instregex "LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle], + (instregex "LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle], + (instregex "LD2Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV], + (instregex "LD2i(8|16|32)$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV], + (instregex "LD2i(8|16|32)_POST")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV], + (instregex "LD2i64$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV], + (instregex "LD2i64_POST")>; + +def : InstRW<[WriteVLDShuffle, WriteV], + (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV], + (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV], + (instregex "LD3Threev(8b|4h|2s)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV], + (instregex "LD3Threev(8b|4h|2s)_POST")>; +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle], + (instregex "LD3Threev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle], + (instregex "LD3Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV], + (instregex "LD3i(8|16|32)$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV], + (instregex "LD3i(8|16|32)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV], + (instregex "LD3i64$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV], + (instregex "LD3i64_POST")>; + +def : InstRW<[WriteVLDShuffle, WriteV, WriteV], + (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV], + (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>; + +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV], + (instrs LD3Rv1d,LD3Rv2d)>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV], + (instrs LD3Rv1d_POST,LD3Rv2d_POST)>; + +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV], + (instregex "LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV], + (instregex "LD4Fourv(8b|4h|2s)_POST")>; +def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle, + WriteVLDPairShuffle, WriteVLDPairShuffle], + (instregex "LD4Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle, + WriteVLDPairShuffle, WriteVLDPairShuffle], + (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV], + (instregex "LD4i(8|16|32)$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV], + (instregex "LD4i(8|16|32)_POST")>; + + +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV], + (instrs LD4i64)>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV], + (instrs LD4i64_POST)>; + +def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV], + (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV], + (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>; + +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV], + (instrs LD4Rv1d,LD4Rv2d)>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV], + (instrs LD4Rv1d_POST,LD4Rv2d_POST)>; + +//--- +// 7.9.16 Store, element operations +//--- + +// Only the WriteAdr for writeback matches a def operands. +// Subsequent WriteVLDs only consume resources. + +def : InstRW<[WriteVST], + (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVST], + (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle], + (instregex "ST1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle], + (instregex "ST1Twov(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVST, WriteVST], + (instregex "ST1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVST, WriteVST], + (instregex "ST1Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle, WriteVST], + (instregex "ST1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVST], + (instregex "ST1Threev(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVST, WriteVST, WriteVST], + (instregex "ST1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST], + (instregex "ST1Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST1Fourv(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], + (instregex "ST1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST, WriteVST], + (instregex "ST1Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle], (instregex "ST1i(8|16|32)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST1i(8|16|32)_POST")>; + +def : InstRW<[WriteVSTShuffle], (instrs ST1i64)>; +def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST1i64_POST)>; + +def : InstRW<[WriteVSTShuffle], + (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle], + (instregex "ST2Twov(8b|4h|2s)_POST")>; +def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST2Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle], (instregex "ST2i(8|16|32)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST2i(8|16|32)_POST")>; +def : InstRW<[WriteVSTShuffle], (instrs ST2i64)>; +def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST2i64_POST)>; + +def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST3Threev(8b|4h|2s)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST3Threev(8b|4h|2s)_POST")>; +def : InstRW<[WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST3Threev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST3Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle], (instregex "ST3i(8|16|32)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST3i(8|16|32)_POST")>; + +def :InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64)>; +def :InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64_POST)>; + +def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle], + (instregex "ST4Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle], + (instregex "ST4Fourv(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle, + WriteVSTPairShuffle, WriteVSTPairShuffle], + (instregex "ST4Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle, + WriteVSTPairShuffle, WriteVSTPairShuffle], + (instregex "ST4Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTPairShuffle], (instregex "ST4i(8|16|32)$")>; +def : InstRW<[WriteAdr, WriteVSTPairShuffle], (instregex "ST4i(8|16|32)_POST")>; + +def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST4i64)>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>; + +// Atomic operations are not supported. +def : WriteRes { let Unsupported = 1; } + +//--- +// Unused SchedRead types +//--- + +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +} // SchedModel = CycloneModel diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM1.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM1.td new file mode 100644 index 000000000..ecc68aed1 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM1.td @@ -0,0 +1,847 @@ +//=- AArch64SchedExynosM1.td - Samsung Exynos M1 Sched Defs --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for the Samsung Exynos M1 to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// The Exynos-M1 is a traditional superscalar microprocessor with a +// 4-wide in-order stage for decode and dispatch and a wider issue stage. +// The execution units and loads and stores are out-of-order. + +def ExynosM1Model : SchedMachineModel { + let IssueWidth = 4; // Up to 4 uops per cycle. + let MicroOpBufferSize = 96; // ROB size. + let LoopMicroOpBufferSize = 24; // Based on the instruction queue size. + let LoadLatency = 4; // Optimistic load cases. + let MispredictPenalty = 14; // Minimum branch misprediction penalty. + let CompleteModel = 1; // Use the default model otherwise. + + list UnsupportedFeatures = [HasSVE]; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on the Exynos-M1, +// which has 9 pipelines, each with its own queue with out-of-order dispatch. + +let SchedModel = ExynosM1Model in { + +def M1UnitA : ProcResource<2>; // Simple integer +def M1UnitC : ProcResource<1>; // Simple and complex integer +def M1UnitD : ProcResource<1>; // Integer division (inside C, serialized) +def M1UnitB : ProcResource<2>; // Branch +def M1UnitL : ProcResource<1>; // Load +def M1UnitS : ProcResource<1>; // Store +def M1PipeF0 : ProcResource<1>; // FP #0 +let Super = M1PipeF0 in { + def M1UnitFMAC : ProcResource<1>; // FP multiplication + def M1UnitNAL0 : ProcResource<1>; // Simple vector + def M1UnitNMISC : ProcResource<1>; // Miscellanea + def M1UnitFCVT : ProcResource<1>; // FP conversion + def M1UnitNCRYPT : ProcResource<1>; // Cryptographic +} +def M1PipeF1 : ProcResource<1>; // FP #1 +let Super = M1PipeF1 in { + def M1UnitFADD : ProcResource<1>; // Simple FP + def M1UnitNAL1 : ProcResource<1>; // Simple vector + def M1UnitFVAR : ProcResource<1>; // FP division & square root (serialized) + def M1UnitFST : ProcResource<1>; // FP store +} + +def M1UnitALU : ProcResGroup<[M1UnitA, + M1UnitC]>; // All integer +def M1UnitNALU : ProcResGroup<[M1UnitNAL0, + M1UnitNAL1]>; // All simple vector + +//===----------------------------------------------------------------------===// +// Predicates. + +def M1BranchLinkFastPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR && + MI->getOperand(0).getReg() != AArch64::LR}]>; +def M1ShiftLeftFastPred : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>; + +//===----------------------------------------------------------------------===// +// Coarse scheduling model. + +def M1WriteA1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; } +def M1WriteA2 : SchedWriteRes<[M1UnitALU]> { let Latency = 2; } +def M1WriteAA : SchedWriteRes<[M1UnitALU]> { let Latency = 2; + let ResourceCycles = [2]; } +def M1WriteAB : SchedWriteRes<[M1UnitALU, + M1UnitC]> { let Latency = 1; + let NumMicroOps = 2; } +def M1WriteAC : SchedWriteRes<[M1UnitALU, + M1UnitALU, + M1UnitC]> { let Latency = 2; + let NumMicroOps = 3; } +def M1WriteAD : SchedWriteRes<[M1UnitALU, + M1UnitC]> { let Latency = 2; + let NumMicroOps = 2; } +def M1WriteAX : SchedWriteVariant<[SchedVar, + SchedVar]>; +def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; } +def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; } + +def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; } +def M1WriteBX : SchedWriteVariant<[SchedVar, + SchedVar]>; + +def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; } +def M1WriteL6 : SchedWriteRes<[M1UnitL]> { let Latency = 6; } +def M1WriteLA : SchedWriteRes<[M1UnitL]> { let Latency = 6; + let ResourceCycles = [2]; } +def M1WriteLB : SchedWriteRes<[M1UnitL, + M1UnitA]> { let Latency = 4; + let NumMicroOps = 2; } +def M1WriteLC : SchedWriteRes<[M1UnitL, + M1UnitA]> { let Latency = 5; + let NumMicroOps = 2; } +def M1WriteLD : SchedWriteRes<[M1UnitL, + M1UnitA]> { let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [2, 1]; } +def M1WriteLH : SchedWriteRes<[]> { let Latency = 5; + let NumMicroOps = 0; } +def M1WriteLX : SchedWriteVariant<[SchedVar, + SchedVar]>; +def M1WriteLY : SchedWriteVariant<[SchedVar, + SchedVar]>; + +def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; } +def M1WriteS3 : SchedWriteRes<[M1UnitS]> { let Latency = 3; } +def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; } +def M1WriteSA : SchedWriteRes<[M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST]> { let Latency = 1; + let NumMicroOps = 2; } +def M1WriteSB : SchedWriteRes<[M1UnitS, + M1UnitFST, + M1UnitA]> { let Latency = 3; + let NumMicroOps = 2; } +def M1WriteSC : SchedWriteRes<[M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitA]> { let Latency = 3; + let NumMicroOps = 3; } +def M1WriteSD : SchedWriteRes<[M1UnitS, + M1UnitFST, + M1UnitA]> { let Latency = 1; + let NumMicroOps = 2; } +def M1WriteSE : SchedWriteRes<[M1UnitS, + M1UnitA]> { let Latency = 2; + let NumMicroOps = 2; } +def M1WriteSX : SchedWriteVariant<[SchedVar, + SchedVar]>; +def M1WriteSY : SchedWriteVariant<[SchedVar, + SchedVar]>; + +def M1ReadAdrBase : SchedReadVariant<[SchedVar, + SchedVar]>; + +// Branch instructions. +def : WriteRes { let Latency = 0; } +def : WriteRes { let Latency = 1; } + +// Arithmetic and logical integer instructions. +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } + +// Move instructions. +def : WriteRes { let Latency = 1; } + +// Divide and multiply instructions. +def : WriteRes { let Latency = 13; + let ResourceCycles = [1, 13]; } +def : WriteRes { let Latency = 21; + let ResourceCycles = [1, 21]; } +// TODO: Long multiplication take 5 cycles and also the ALU. +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 4; + let ResourceCycles = [2]; } + +// Miscellaneous instructions. +def : WriteRes { let Latency = 2; + let NumMicroOps = 2; } + +// Addressing modes. +def : WriteRes { let Latency = 1; + let NumMicroOps = 0; } +def : SchedAlias; + +// Load instructions. +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; + let NumMicroOps = 0; } +def : SchedAlias; + +// Store instructions. +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : SchedAlias; + +// FP data instructions. +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 15; + let ResourceCycles = [15]; } +def : WriteRes { let Latency = 4; } + +// FP miscellaneous instructions. +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 4; } + +// FP load instructions. +def : WriteRes { let Latency = 5; } + +// FP store instructions. +def : WriteRes { let Latency = 1; + let NumMicroOps = 1; } + +// ASIMD FP instructions. +def : WriteRes { let Latency = 3; } + +// Other miscellaneous instructions. +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } + +//===----------------------------------------------------------------------===// +// Fast forwarding. + +// TODO: Add FP register forwarding rules. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +// TODO: The forwarding for WriteIM32 saves actually 2 cycles. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// Finer scheduling model. + +def M1WriteNEONA : SchedWriteRes<[M1UnitNALU, + M1UnitNALU, + M1UnitFADD]> { let Latency = 9; + let NumMicroOps = 3; } +def M1WriteNEONB : SchedWriteRes<[M1UnitNALU, + M1UnitFST]> { let Latency = 5; + let NumMicroOps = 2;} +def M1WriteNEONC : SchedWriteRes<[M1UnitNALU, + M1UnitFST]> { let Latency = 6; + let NumMicroOps = 2; } +def M1WriteNEOND : SchedWriteRes<[M1UnitNALU, + M1UnitFST, + M1UnitL]> { let Latency = 10; + let NumMicroOps = 3; } +def M1WriteNEONE : SchedWriteRes<[M1UnitFCVT, + M1UnitFST]> { let Latency = 8; + let NumMicroOps = 2; } +def M1WriteNEONF : SchedWriteRes<[M1UnitFCVT, + M1UnitFST, + M1UnitL]> { let Latency = 13; + let NumMicroOps = 3; } +def M1WriteNEONG : SchedWriteRes<[M1UnitNMISC, + M1UnitFST]> { let Latency = 6; + let NumMicroOps = 2; } +def M1WriteNEONH : SchedWriteRes<[M1UnitNALU, + M1UnitFST]> { let Latency = 3; + let NumMicroOps = 2; } +def M1WriteNEONI : SchedWriteRes<[M1UnitFST, + M1UnitL]> { let Latency = 9; + let NumMicroOps = 2; } +def M1WriteNEONJ : SchedWriteRes<[M1UnitNMISC, + M1UnitFMAC]> { let Latency = 6; + let NumMicroOps = 2; } +def M1WriteNEONK : SchedWriteRes<[M1UnitNMISC, + M1UnitFMAC]> { let Latency = 7; + let NumMicroOps = 2; } +def M1WriteNEONL : SchedWriteRes<[M1UnitNALU]> { let Latency = 2; + let ResourceCycles = [2]; } +def M1WriteFADD3 : SchedWriteRes<[M1UnitFADD]> { let Latency = 3; } +def M1WriteFCVT3 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 3; } +def M1WriteFCVT4 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 4; } +def M1WriteFMAC4 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 4; } +def M1WriteFMAC5 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 5; } +// TODO +def M1WriteFVAR15 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 15; + let ResourceCycles = [15]; } +def M1WriteFVAR23 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 23; + let ResourceCycles = [23]; } +def M1WriteNALU1 : SchedWriteRes<[M1UnitNALU]> { let Latency = 1; } +def M1WriteNALU2 : SchedWriteRes<[M1UnitNALU]> { let Latency = 2; } +def M1WriteNAL11 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 1; } +def M1WriteNAL12 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 2; } +def M1WriteNAL13 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 3; } +def M1WriteNCRYPT1 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; } +def M1WriteNCRYPT5 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 5; } +def M1WriteNMISC1 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 1; } +def M1WriteNMISC2 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 2; } +def M1WriteNMISC3 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 3; } +def M1WriteNMISC4 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 4; } +def M1WriteTB : SchedWriteRes<[M1UnitC, + M1UnitALU]> { let Latency = 2; + let NumMicroOps = 2; } +def M1WriteVLDA : SchedWriteRes<[M1UnitL, + M1UnitL]> { let Latency = 6; + let NumMicroOps = 2; } +def M1WriteVLDB : SchedWriteRes<[M1UnitL, + M1UnitL, + M1UnitL]> { let Latency = 7; + let NumMicroOps = 3; } +def M1WriteVLDC : SchedWriteRes<[M1UnitL, + M1UnitL, + M1UnitL, + M1UnitL]> { let Latency = 8; + let NumMicroOps = 4; } +def M1WriteVLDD : SchedWriteRes<[M1UnitL, + M1UnitNALU]> { let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [2, 1]; } +def M1WriteVLDE : SchedWriteRes<[M1UnitL, + M1UnitNALU]> { let Latency = 6; + let NumMicroOps = 2; } +def M1WriteVLDF : SchedWriteRes<[M1UnitL, + M1UnitL]> { let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; } +def M1WriteVLDG : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU]> { let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2, 1, 1]; } +def M1WriteVLDH : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU]> { let Latency = 6; + let NumMicroOps = 3; } +def M1WriteVLDI : SchedWriteRes<[M1UnitL, + M1UnitL, + M1UnitL]> { let Latency = 12; + let NumMicroOps = 3; + let ResourceCycles = [2, 2, 2]; } +def M1WriteVLDJ : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU]> { let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1, 1]; } +def M1WriteVLDK : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU]> { let Latency = 9; + let NumMicroOps = 5; + let ResourceCycles = [2, 1, 1, 1, 1]; } +def M1WriteVLDL : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU, + M1UnitL, + M1UnitNALU]> { let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1, 1, 1, 1, 1]; } +def M1WriteVLDM : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU, + M1UnitL, + M1UnitNALU, + M1UnitNALU]> { let Latency = 7; + let NumMicroOps = 6; + let ResourceCycles = [1, 1, 1, 1, 1, 1]; } +def M1WriteVLDN : SchedWriteRes<[M1UnitL, + M1UnitL, + M1UnitL, + M1UnitL]> { let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 2, 1]; } +def M1WriteVSTA : WriteSequence<[WriteVST], 2>; +def M1WriteVSTB : WriteSequence<[WriteVST], 3>; +def M1WriteVSTC : WriteSequence<[WriteVST], 4>; +def M1WriteVSTD : SchedWriteRes<[M1UnitS, + M1UnitFST, + M1UnitFST]> { let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [7, 1, 1]; } +def M1WriteVSTE : SchedWriteRes<[M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitFST]> { let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [7, 1, 1, 1, 1]; } +def M1WriteVSTF : SchedWriteRes<[M1UnitNALU, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitFST, + M1UnitFST]> { let Latency = 15; + let NumMicroOps = 5; + let ResourceCycles = [1, 7, 1, 7, 1, 1, 1]; } +def M1WriteVSTG : SchedWriteRes<[M1UnitNALU, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitFST, + M1UnitFST]> { let Latency = 16; + let NumMicroOps = 6; + let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1]; } +def M1WriteVSTH : SchedWriteRes<[M1UnitNALU, + M1UnitS, + M1UnitFST, + M1UnitFST, + M1UnitFST]> { let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [1, 7, 1, 7, 1]; } +def M1WriteVSTI : SchedWriteRes<[M1UnitNALU, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitFST, + M1UnitFST]> { let Latency = 17; + let NumMicroOps = 7; + let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; } + +// Branch instructions +def : InstRW<[M1WriteB1], (instrs Bcc)>; +def : InstRW<[M1WriteA1], (instrs BL)>; +def : InstRW<[M1WriteBX], (instrs BLR)>; +def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>; +def : InstRW<[M1WriteAD], (instregex "^TBN?Z[WX]")>; + +// Arithmetic and logical integer instructions. +def : InstRW<[M1WriteA1], (instrs COPY)>; +def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>; + +// Divide and multiply instructions. + +// Miscellaneous instructions. + +// Load instructions. +def : InstRW<[M1WriteLB, + WriteLDHi, + WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>; +def : InstRW<[M1WriteLX, + ReadAdrBase], (instregex "^PRFMro[WX]")>; + +// Store instructions. + +// FP data instructions. +def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)[DS]r")>; +def : InstRW<[M1WriteFADD3], (instregex "^F(ADD|SUB)[DS]rr")>; +def : InstRW<[M1WriteNEONG], (instregex "^FCCMPE?[DS]rr")>; +def : InstRW<[M1WriteNMISC4], (instregex "^FCMPE?[DS]r")>; +def : InstRW<[M1WriteFVAR15], (instrs FDIVSrr)>; +def : InstRW<[M1WriteFVAR23], (instrs FDIVDrr)>; +def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN).+rr")>; +def : InstRW<[M1WriteFMAC4], (instregex "^FN?MUL[DS]rr")>; +def : InstRW<[M1WriteFMAC5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>; +def : InstRW<[M1WriteFCVT3], (instregex "^FRINT.+r")>; +def : InstRW<[M1WriteNEONH], (instregex "^FCSEL[DS]rrr")>; +def : InstRW<[M1WriteFVAR15], (instrs FSQRTSr)>; +def : InstRW<[M1WriteFVAR23], (instrs FSQRTDr)>; + +// FP miscellaneous instructions. +def : InstRW<[M1WriteFCVT3], (instregex "^FCVT[DS][DS]r")>; +def : InstRW<[M1WriteNEONF], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>; +def : InstRW<[M1WriteNEONE], (instregex "^[SU]CVTF[SU]")>; +def : InstRW<[M1WriteNALU1], (instregex "^FMOV[DS][ir]")>; +def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev1")>; +def : InstRW<[M1WriteNMISC1], (instregex "^FRECPXv1")>; +def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)S(16|32|64)")>; +def : InstRW<[M1WriteS4], (instregex "^FMOV[WX][DS](High)?r")>; +def : InstRW<[M1WriteNEONI], (instregex "^FMOV[DS][WX](High)?r")>; + +// FP load instructions. +def : InstRW<[WriteVLD], (instregex "^LDR[DSQ]l")>; +def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>; +def : InstRW<[WriteVLD, + WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>; +def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>; +def : InstRW<[M1WriteLY, + ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>; +def : InstRW<[M1WriteLD, + ReadAdrBase], (instregex "^LDRQro[WX]")>; +def : InstRW<[WriteVLD, + M1WriteLH], (instregex "^LDN?P[DS]i")>; +def : InstRW<[M1WriteLA, + M1WriteLH], (instregex "^LDN?PQi")>; +def : InstRW<[M1WriteLC, + M1WriteLH, + WriteAdr], (instregex "^LDP[DS](post|pre)")>; +def : InstRW<[M1WriteLD, + M1WriteLH, + WriteAdr], (instregex "^LDPQ(post|pre)")>; + +// FP store instructions. +def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>; +def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>; +def : InstRW<[M1WriteSY, + ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>; +def : InstRW<[M1WriteSB, + ReadAdrBase], (instregex "^STRQro[WX]")>; +def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "^STP[DS](post|pre)")>; +def : InstRW<[M1WriteSC, + WriteAdr], (instregex "^STPQ(post|pre)")>; + +// ASIMD instructions. +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]ABAL?v")>; +def : InstRW<[M1WriteNMISC1], (instregex "^[SU]ABDL?v")>; +def : InstRW<[M1WriteNMISC1], (instregex "^(SQ)?ABSv")>; +def : InstRW<[M1WriteNMISC1], (instregex "^SQNEGv")>; +def : InstRW<[M1WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?H(ADD|SUB)v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?AD[AD](L|LP|P|W)V?2?v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?SUB[LW]2?v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^R?(ADD|SUB)HN?2?v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]+Q(ADD|SUB)v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]RHADDv")>; +def : InstRW<[M1WriteNMISC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>; +def : InstRW<[M1WriteNALU1], (instregex "^CMTSTv")>; +def : InstRW<[M1WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>; +def : InstRW<[M1WriteNMISC1], (instregex "^[SU](MIN|MAX)v")>; +def : InstRW<[M1WriteNMISC2], (instregex "^[SU](MIN|MAX)Pv")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU](MIN|MAX)Vv")>; +def : InstRW<[M1WriteNMISC4], (instregex "^(MUL|SQR?DMULH)v")>; +def : InstRW<[M1WriteNMISC4], (instregex "^ML[AS]v")>; +def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>; +def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>; +def : InstRW<[M1WriteNAL13], (instregex "^(S|SR|U|UR)SRAv")>; +def : InstRW<[M1WriteNALU1], (instregex "^SHL[dv]")>; +def : InstRW<[M1WriteNALU1], (instregex "^[SU]SH[LR][dv]")>; +def : InstRW<[M1WriteNALU1], (instregex "^S[RS]I[dv]")>; +def : InstRW<[M1WriteNAL13], (instregex "^(([SU]Q)?R)?SHRU?N[bhsv]")>; +def : InstRW<[M1WriteNAL13], (instregex "^[SU]RSH[LR][dv]")>; +def : InstRW<[M1WriteNAL13], (instregex "^[SU]QR?SHLU?[bdhsv]")>; + +// ASIMD FP instructions. +def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^F(ABD|ADD|SUB)v")>; +def : InstRW<[M1WriteNEONA], (instregex "^FADDP")>; +def : InstRW<[M1WriteNMISC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>; +def : InstRW<[M1WriteFCVT3], (instregex "^[FVSU]CVTX?[AFLMNPZ][SU]?(_Int)?v")>; +def : InstRW<[M1WriteFVAR15], (instregex "FDIVv.f32")>; +def : InstRW<[M1WriteFVAR23], (instregex "FDIVv2f64")>; +def : InstRW<[M1WriteFVAR15], (instregex "FSQRTv.f32")>; +def : InstRW<[M1WriteFVAR23], (instregex "FSQRTv2f64")>; +def : InstRW<[M1WriteNMISC1], (instregex "^F(MAX|MIN)(NM)?V?v")>; +def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN)(NM)?Pv")>; +def : InstRW<[M1WriteNEONJ], (instregex "^FMULX?v.i")>; +def : InstRW<[M1WriteFMAC4], (instregex "^FMULX?v.f")>; +def : InstRW<[M1WriteNEONK], (instregex "^FML[AS]v.i")>; +def : InstRW<[M1WriteFMAC5], (instregex "^FML[AS]v.f")>; +def : InstRW<[M1WriteFCVT3], (instregex "^FRINT[AIMNPXZ]v")>; + +// ASIMD miscellaneous instructions. +def : InstRW<[M1WriteNALU1], (instregex "^RBITv")>; +def : InstRW<[M1WriteNAL11], (instregex "^(BIF|BIT|BSL)v")>; +def : InstRW<[M1WriteNEONB], (instregex "^DUPv.+gpr")>; +def : InstRW<[M1WriteNALU1], (instregex "^DUPv.+lane")>; +def : InstRW<[M1WriteNALU1], (instregex "^EXTv8")>; +def : InstRW<[M1WriteNEONL], (instregex "^EXTv16")>; +def : InstRW<[M1WriteNAL13], (instregex "^[SU]?Q?XTU?Nv")>; +def : InstRW<[M1WriteNALU1], (instregex "^CPY")>; +def : InstRW<[M1WriteNALU1], (instregex "^INSv.+lane")>; +def : InstRW<[M1WriteNALU1], (instregex "^MOVI[Dv]")>; +def : InstRW<[M1WriteNALU1], (instregex "^FMOVv")>; +def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev[248]")>; +def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)Sv")>; +def : InstRW<[M1WriteNALU1], (instregex "^REV(16|32|64)v")>; +def : InstRW<[M1WriteNAL11], (instregex "^TB[LX]v8i8One")>; +def : InstRW<[WriteSequence<[M1WriteNAL11], 2>], + (instregex "^TB[LX]v8i8Two")>; +def : InstRW<[WriteSequence<[M1WriteNAL11], 3>], + (instregex "^TB[LX]v8i8Three")>; +def : InstRW<[WriteSequence<[M1WriteNAL11], 4>], + (instregex "^TB[LX]v8i8Four")>; +def : InstRW<[M1WriteNAL12], (instregex "^TB[LX]v16i8One")>; +def : InstRW<[WriteSequence<[M1WriteNAL12], 2>], + (instregex "^TB[LX]v16i8Two")>; +def : InstRW<[WriteSequence<[M1WriteNAL12], 3>], + (instregex "^TB[LX]v16i8Three")>; +def : InstRW<[WriteSequence<[M1WriteNAL12], 4>], + (instregex "^TB[LX]v16i8Four")>; +def : InstRW<[M1WriteNEOND], (instregex "^[SU]MOVv")>; +def : InstRW<[M1WriteNEONC], (instregex "^INSv.+gpr")>; +def : InstRW<[M1WriteNALU1], (instregex "^(TRN|UZP)[12](v8i8|v4i16|v2i32)")>; +def : InstRW<[M1WriteNALU2], (instregex "^(TRN|UZP)[12](v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[M1WriteNALU1], (instregex "^ZIP[12]v")>; + +// ASIMD load instructions. +def : InstRW<[M1WriteVLDD], (instregex "LD1i(8|16|32)$")>; +def : InstRW<[M1WriteVLDD, + WriteAdr], (instregex "LD1i(8|16|32)_POST$")>; +def : InstRW<[M1WriteVLDE], (instregex "LD1i(64)$")>; +def : InstRW<[M1WriteVLDE, + WriteAdr], (instregex "LD1i(64)_POST$")>; + +def : InstRW<[M1WriteL5], (instregex "LD1Rv(8b|4h|2s)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteL5], (instregex "LD1Rv(1d)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Rv(1d)_POST$")>; +def : InstRW<[M1WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[M1WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[M1WriteVLDG], (instregex "LD2i(8|16)$")>; +def : InstRW<[M1WriteVLDG, + WriteAdr], (instregex "LD2i(8|16)_POST$")>; +def : InstRW<[M1WriteVLDG], (instregex "LD2i(32)$")>; +def : InstRW<[M1WriteVLDG, + WriteAdr], (instregex "LD2i(32)_POST$")>; +def : InstRW<[M1WriteVLDH], (instregex "LD2i(64)$")>; +def : InstRW<[M1WriteVLDH, + WriteAdr], (instregex "LD2i(64)_POST$")>; + +def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(1d)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD2Rv(1d)_POST$")>; +def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDF, + WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDF, + WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(2d)$")>; +def : InstRW<[M1WriteVLDF, + WriteAdr], (instregex "LD2Twov(2d)_POST$")>; + +def : InstRW<[M1WriteVLDJ], (instregex "LD3i(8|16)$")>; +def : InstRW<[M1WriteVLDJ, + WriteAdr], (instregex "LD3i(8|16)_POST$")>; +def : InstRW<[M1WriteVLDJ], (instregex "LD3i(32)$")>; +def : InstRW<[M1WriteVLDJ, + WriteAdr], (instregex "LD3i(32)_POST$")>; +def : InstRW<[M1WriteVLDL], (instregex "LD3i(64)$")>; +def : InstRW<[M1WriteVLDL, + WriteAdr], (instregex "LD3i(64)_POST$")>; + +def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(1d)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD3Rv(1d)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(2d)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD3Rv(2d)_POST$")>; + +def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDI, + WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDI, + WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(2d)$")>; +def : InstRW<[M1WriteVLDI, + WriteAdr], (instregex "LD3Threev(2d)_POST$")>; + +def : InstRW<[M1WriteVLDK], (instregex "LD4i(8|16)$")>; +def : InstRW<[M1WriteVLDK, + WriteAdr], (instregex "LD4i(8|16)_POST$")>; +def : InstRW<[M1WriteVLDK], (instregex "LD4i(32)$")>; +def : InstRW<[M1WriteVLDK, + WriteAdr], (instregex "LD4i(32)_POST$")>; +def : InstRW<[M1WriteVLDM], (instregex "LD4i(64)$")>; +def : InstRW<[M1WriteVLDM, + WriteAdr], (instregex "LD4i(64)_POST$")>; + +def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(1d)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD4Rv(1d)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(2d)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD4Rv(2d)_POST$")>; + +def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDN, + WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDN, + WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(2d)$")>; +def : InstRW<[M1WriteVLDN, + WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; + +// ASIMD store instructions. +def : InstRW<[M1WriteVSTD], (instregex "ST1i(8|16|32)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST1i(8|16|32)_POST$")>; +def : InstRW<[M1WriteVSTD], (instregex "ST1i(64)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST1i(64)_POST$")>; + +def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteVST], (instregex "ST1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVSTA, + WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVSTA, + WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVSTB, + WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVSTB, + WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVSTC, + WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVSTC, + WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[M1WriteVSTD], (instregex "ST2i(8|16|32)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST2i(8|16|32)_POST$")>; +def : InstRW<[M1WriteVSTD], (instregex "ST2i(64)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST2i(64)_POST$")>; + +def : InstRW<[M1WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(16b|8h|4s)$")>; +def : InstRW<[M1WriteVSTE, + WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(2d)$")>; +def : InstRW<[M1WriteVSTE, + WriteAdr], (instregex "ST2Twov(2d)_POST$")>; + +def : InstRW<[M1WriteVSTH], (instregex "ST3i(8|16)$")>; +def : InstRW<[M1WriteVSTH, + WriteAdr], (instregex "ST3i(8|16)_POST$")>; +def : InstRW<[M1WriteVSTH], (instregex "ST3i(32)$")>; +def : InstRW<[M1WriteVSTH, + WriteAdr], (instregex "ST3i(32)_POST$")>; +def : InstRW<[M1WriteVSTF], (instregex "ST3i(64)$")>; +def : InstRW<[M1WriteVSTF, + WriteAdr], (instregex "ST3i(64)_POST$")>; + +def : InstRW<[M1WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>; +def : InstRW<[M1WriteVSTF, + WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(16b|8h|4s)$")>; +def : InstRW<[M1WriteVSTG, + WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(2d)$")>; +def : InstRW<[M1WriteVSTG, + WriteAdr], (instregex "ST3Threev(2d)_POST$")>; + +def : InstRW<[M1WriteVSTH], (instregex "ST4i(8|16)$")>; +def : InstRW<[M1WriteVSTH, + WriteAdr], (instregex "ST4i(8|16)_POST$")>; +def : InstRW<[M1WriteVSTH], (instregex "ST4i(32)$")>; +def : InstRW<[M1WriteVSTH, + WriteAdr], (instregex "ST4i(32)_POST$")>; +def : InstRW<[M1WriteVSTF], (instregex "ST4i(64)$")>; +def : InstRW<[M1WriteVSTF, + WriteAdr], (instregex "ST4i(64)_POST$")>; + +def : InstRW<[M1WriteVSTF], (instregex "ST4Fourv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVSTF, + WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(16b|8h|4s)$")>; +def : InstRW<[M1WriteVSTI, + WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(2d)$")>; +def : InstRW<[M1WriteVSTI, + WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; + +// Cryptography instructions. +def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; } +def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>; +def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>; +def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>; + +def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>; +def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>; +def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA1[CMP]")>; +def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA256SU0")>; +def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA256(H|SU1)")>; + +// CRC instructions. +def : InstRW<[M1WriteC2], (instregex "^CRC32")>; + +} // SchedModel = ExynosM1Model diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td new file mode 100644 index 000000000..5e5369a5a --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td @@ -0,0 +1,860 @@ +//=- AArch64SchedExynosM3.td - Samsung Exynos M3 Sched Defs --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for the Samsung Exynos M3 to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// The Exynos-M3 is an advanced superscalar microprocessor with a 6-wide +// in-order stage for decode and dispatch and a wider issue stage. +// The execution units and loads and stores are out-of-order. + +def ExynosM3Model : SchedMachineModel { + let IssueWidth = 6; // Up to 6 uops per cycle. + let MicroOpBufferSize = 228; // ROB size. + let LoopMicroOpBufferSize = 40; // Based on the instruction queue size. + let LoadLatency = 4; // Optimistic load cases. + let MispredictPenalty = 16; // Minimum branch misprediction penalty. + let CompleteModel = 1; // Use the default model otherwise. + + list UnsupportedFeatures = [HasSVE]; + + // FIXME: Remove when all errors have been fixed. + let FullInstRWOverlapCheck = 0; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on the Exynos-M3, +// which has 12 pipelines, each with its own queue with out-of-order dispatch. + +let SchedModel = ExynosM3Model in { + +def M3UnitA : ProcResource<2>; // Simple integer +def M3UnitC : ProcResource<2>; // Simple and complex integer +def M3UnitD : ProcResource<1>; // Integer division (inside C0, serialized) +def M3UnitB : ProcResource<2>; // Branch +def M3UnitL : ProcResource<2>; // Load +def M3UnitS : ProcResource<1>; // Store +def M3PipeF0 : ProcResource<1>; // FP #0 +let Super = M3PipeF0 in { + def M3UnitFMAC0 : ProcResource<1>; // FP multiplication + def M3UnitFADD0 : ProcResource<1>; // Simple FP + def M3UnitFCVT0 : ProcResource<1>; // FP conversion + def M3UnitFSQR : ProcResource<2>; // FP square root (serialized) + def M3UnitNALU0 : ProcResource<1>; // Simple vector + def M3UnitNMSC : ProcResource<1>; // FP and vector miscellanea + def M3UnitNSHT0 : ProcResource<1>; // Vector shifting + def M3UnitNSHF0 : ProcResource<1>; // Vector shuffling +} +def M3PipeF1 : ProcResource<1>; // FP #1 +let Super = M3PipeF1 in { + def M3UnitFMAC1 : ProcResource<1>; // FP multiplication + def M3UnitFADD1 : ProcResource<1>; // Simple FP + def M3UnitFDIV0 : ProcResource<2>; // FP division (serialized) + def M3UnitFCVT1 : ProcResource<1>; // FP conversion + def M3UnitFST0 : ProcResource<1>; // FP store + def M3UnitNALU1 : ProcResource<1>; // Simple vector + def M3UnitNCRY0 : ProcResource<1>; // Cryptographic + def M3UnitNMUL : ProcResource<1>; // Vector multiplication + def M3UnitNSHT1 : ProcResource<1>; // Vector shifting + def M3UnitNSHF1 : ProcResource<1>; // Vector shuffling +} +def M3PipeF2 : ProcResource<1>; // FP #2 +let Super = M3PipeF2 in { + def M3UnitFMAC2 : ProcResource<1>; // FP multiplication + def M3UnitFADD2 : ProcResource<1>; // Simple FP + def M3UnitFDIV1 : ProcResource<2>; // FP division (serialized) + def M3UnitFST1 : ProcResource<1>; // FP store + def M3UnitNALU2 : ProcResource<1>; // Simple vector + def M3UnitNCRY1 : ProcResource<1>; // Cryptographic + def M3UnitNSHT2 : ProcResource<1>; // Vector shifting + def M3UnitNSHF2 : ProcResource<1>; // Vector shuffling +} + + +def M3UnitALU : ProcResGroup<[M3UnitA, + M3UnitC]>; +def M3UnitFMAC : ProcResGroup<[M3UnitFMAC0, + M3UnitFMAC1, + M3UnitFMAC2]>; +def M3UnitFADD : ProcResGroup<[M3UnitFADD0, + M3UnitFADD1, + M3UnitFADD2]>; +def M3UnitFDIV : ProcResGroup<[M3UnitFDIV0, + M3UnitFDIV1]>; +def M3UnitFCVT : ProcResGroup<[M3UnitFCVT0, + M3UnitFCVT1]>; +def M3UnitFST : ProcResGroup<[M3UnitFST0, + M3UnitFST1]>; +def M3UnitNALU : ProcResGroup<[M3UnitNALU0, + M3UnitNALU1, + M3UnitNALU2]>; +def M3UnitNCRY : ProcResGroup<[M3UnitNCRY0, + M3UnitNCRY1]>; +def M3UnitNSHT : ProcResGroup<[M3UnitNSHT0, + M3UnitNSHT1, + M3UnitNSHT2]>; +def M3UnitNSHF : ProcResGroup<[M3UnitNSHF0, + M3UnitNSHF1, + M3UnitNSHF2]>; + +//===----------------------------------------------------------------------===// +// Predicates. + +def M3BranchLinkFastPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR && + MI->getOperand(0).isReg() && + MI->getOperand(0).getReg() != AArch64::LR}]>; +def M3ResetFastPred : SchedPredicate<[{TII->isExynosResetFast(*MI)}]>; +def M3RotateRightFastPred : SchedPredicate<[{(MI->getOpcode() == AArch64::EXTRWrri || + MI->getOpcode() == AArch64::EXTRXrri) && + MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && + MI->getOperand(1).getReg() == MI->getOperand(2).getReg()}]>; +def M3ShiftLeftFastPred : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>; + +//===----------------------------------------------------------------------===// +// Coarse scheduling model. + +def M3WriteZ0 : SchedWriteRes<[]> { let Latency = 0; + let NumMicroOps = 1; } + +def M3WriteA1 : SchedWriteRes<[M3UnitALU]> { let Latency = 1; } +def M3WriteAA : SchedWriteRes<[M3UnitALU]> { let Latency = 2; + let ResourceCycles = [2]; } +def M3WriteAB : SchedWriteRes<[M3UnitALU, + M3UnitC]> { let Latency = 1; + let NumMicroOps = 2; } +def M3WriteAC : SchedWriteRes<[M3UnitALU, + M3UnitALU, + M3UnitC]> { let Latency = 2; + let NumMicroOps = 3; } +def M3WriteAD : SchedWriteRes<[M3UnitALU, + M3UnitC]> { let Latency = 2; + let NumMicroOps = 2; } +def M3WriteC1 : SchedWriteRes<[M3UnitC]> { let Latency = 1; } +def M3WriteC2 : SchedWriteRes<[M3UnitC]> { let Latency = 2; } +def M3WriteAX : SchedWriteVariant<[SchedVar, + SchedVar, + SchedVar]>; +def M3WriteAY : SchedWriteVariant<[SchedVar, + SchedVar]>; + +def M3WriteB1 : SchedWriteRes<[M3UnitB]> { let Latency = 1; } +def M3WriteBX : SchedWriteVariant<[SchedVar, + SchedVar]>; + +def M3WriteL4 : SchedWriteRes<[M3UnitL]> { let Latency = 4; } +def M3WriteL5 : SchedWriteRes<[M3UnitL]> { let Latency = 5; } +def M3WriteLA : SchedWriteRes<[M3UnitL, + M3UnitL]> { let Latency = 5; + let NumMicroOps = 1; } +def M3WriteLB : SchedWriteRes<[M3UnitA, + M3UnitL]> { let Latency = 5; + let NumMicroOps = 2; } +def M3WriteLC : SchedWriteRes<[M3UnitA, + M3UnitL, + M3UnitL]> { let Latency = 5; + let NumMicroOps = 2; } +def M3WriteLD : SchedWriteRes<[M3UnitA, + M3UnitL]> { let Latency = 4; + let NumMicroOps = 2; } +def M3WriteLH : SchedWriteRes<[]> { let Latency = 5; + let NumMicroOps = 0; } + +def M3WriteLX : SchedWriteVariant<[SchedVar, + SchedVar]>; + +def M3WriteS1 : SchedWriteRes<[M3UnitS]> { let Latency = 1; } +def M3WriteSA : SchedWriteRes<[M3UnitA, + M3UnitS, + M3UnitFST]> { let Latency = 2; + let NumMicroOps = 2; } +def M3WriteSB : SchedWriteRes<[M3UnitA, + M3UnitS]> { let Latency = 1; + let NumMicroOps = 2; } +def M3WriteSC : SchedWriteRes<[M3UnitA, + M3UnitS]> { let Latency = 2; + let NumMicroOps = 2; } + +def M3WriteSX : SchedWriteVariant<[SchedVar, + SchedVar]>; +def M3WriteSY : SchedWriteVariant<[SchedVar, + SchedVar]>; + +def M3ReadAdrBase : SchedReadVariant<[SchedVar, + SchedVar]>; + +// Branch instructions. +def : SchedAlias; +def : WriteRes { let Latency = 1; } + +// Arithmetic and logical integer instructions. +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } + +// Move instructions. +def : WriteRes { let Latency = 1; } + +// Divide and multiply instructions. +def : WriteRes { let Latency = 12; + let ResourceCycles = [1, 12]; } +def : WriteRes { let Latency = 21; + let ResourceCycles = [1, 21]; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 4; + let ResourceCycles = [2]; } + +// Miscellaneous instructions. +def : WriteRes { let Latency = 1; + let NumMicroOps = 2; } + +// Addressing modes. +def : WriteRes { let Latency = 1; + let NumMicroOps = 0; } +def : SchedAlias; + +// Load instructions. +def : SchedAlias; +def : WriteRes { let Latency = 4; + let NumMicroOps = 0; } +def : SchedAlias; + +// Store instructions. +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; + +// FP data instructions. +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 12; + let ResourceCycles = [12]; } +def : WriteRes { let Latency = 4; } + +// FP miscellaneous instructions. +// TODO: Conversion between register files is much different. +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } + +// FP load instructions. +def : SchedAlias; + +// FP store instructions. +def : WriteRes { let Latency = 1; + let NumMicroOps = 1; } + +// ASIMD FP instructions. +def : WriteRes { let Latency = 3; } + +// Other miscellaneous instructions. +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } + +//===----------------------------------------------------------------------===// +// Generic fast forwarding. + +// TODO: Add FP register forwarding rules. + +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +// TODO: The forwarding for 32 bits actually saves 2 cycles. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// Finer scheduling model. + +def M3WriteNEONA : SchedWriteRes<[M3UnitNSHF, + M3UnitFADD]> { let Latency = 3; + let NumMicroOps = 2; } +def M3WriteNEONB : SchedWriteRes<[M3UnitNALU, + M3UnitFST]> { let Latency = 10; + let NumMicroOps = 2; } +def M3WriteNEOND : SchedWriteRes<[M3UnitNSHF, + M3UnitFST]> { let Latency = 6; + let NumMicroOps = 2; } +def M3WriteNEONH : SchedWriteRes<[M3UnitNALU, + M3UnitS]> { let Latency = 5; + let NumMicroOps = 2; } +def M3WriteNEONI : SchedWriteRes<[M3UnitNSHF, + M3UnitS]> { let Latency = 5; + let NumMicroOps = 2; } +def M3WriteNEONV : SchedWriteRes<[M3UnitFDIV0, + M3UnitFDIV1]> { let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [8, 8]; } +def M3WriteNEONW : SchedWriteRes<[M3UnitFDIV0, + M3UnitFDIV1]> { let Latency = 12; + let NumMicroOps = 2; + let ResourceCycles = [13, 13]; } +def M3WriteNEONX : SchedWriteRes<[M3UnitFSQR, + M3UnitFSQR]> { let Latency = 18; + let NumMicroOps = 2; + let ResourceCycles = [19, 19]; } +def M3WriteNEONY : SchedWriteRes<[M3UnitFSQR, + M3UnitFSQR]> { let Latency = 25; + let NumMicroOps = 2; + let ResourceCycles = [26, 26]; } +def M3WriteNEONZ : SchedWriteRes<[M3UnitNMSC, + M3UnitNMSC]> { let Latency = 5; + let NumMicroOps = 2; } +def M3WriteFADD2 : SchedWriteRes<[M3UnitFADD]> { let Latency = 2; } +def M3WriteFCVT2 : SchedWriteRes<[M3UnitFCVT]> { let Latency = 2; } +def M3WriteFCVT3 : SchedWriteRes<[M3UnitFCVT]> { let Latency = 3; } +def M3WriteFCVT3A : SchedWriteRes<[M3UnitFCVT0]> { let Latency = 3; } +def M3WriteFCVT4A : SchedWriteRes<[M3UnitFCVT0]> { let Latency = 4; } +def M3WriteFCVT4 : SchedWriteRes<[M3UnitFCVT]> { let Latency = 4; } +def M3WriteFDIV10 : SchedWriteRes<[M3UnitFDIV]> { let Latency = 7; + let ResourceCycles = [8]; } +def M3WriteFDIV12 : SchedWriteRes<[M3UnitFDIV]> { let Latency = 12; + let ResourceCycles = [13]; } +def M3WriteFMAC3 : SchedWriteRes<[M3UnitFMAC]> { let Latency = 3; } +def M3WriteFMAC4 : SchedWriteRes<[M3UnitFMAC]> { let Latency = 4; } +def M3WriteFMAC5 : SchedWriteRes<[M3UnitFMAC]> { let Latency = 5; } +def M3WriteFSQR17 : SchedWriteRes<[M3UnitFSQR]> { let Latency = 18; + let ResourceCycles = [19]; } +def M3WriteFSQR25 : SchedWriteRes<[M3UnitFSQR]> { let Latency = 25; + let ResourceCycles = [26]; } +def M3WriteNALU1 : SchedWriteRes<[M3UnitNALU]> { let Latency = 1; } +def M3WriteNCRY1A : SchedWriteRes<[M3UnitNCRY0]> { let Latency = 1; } +def M3WriteNCRY3A : SchedWriteRes<[M3UnitNCRY0]> { let Latency = 3; } +def M3WriteNCRY5A : SchedWriteRes<[M3UnitNCRY]> { let Latency = 5; } +def M3WriteNMSC1 : SchedWriteRes<[M3UnitNMSC]> { let Latency = 1; } +def M3WriteNMSC2 : SchedWriteRes<[M3UnitNMSC]> { let Latency = 2; } +def M3WriteNMSC3 : SchedWriteRes<[M3UnitNMSC]> { let Latency = 3; } +def M3WriteNMUL3 : SchedWriteRes<[M3UnitNMUL]> { let Latency = 3; } +def M3WriteNSHF1 : SchedWriteRes<[M3UnitNSHF]> { let Latency = 1; } +def M3WriteNSHF3 : SchedWriteRes<[M3UnitNSHF]> { let Latency = 3; } +def M3WriteNSHT1 : SchedWriteRes<[M3UnitNSHT]> { let Latency = 1; } +def M3WriteNSHT2 : SchedWriteRes<[M3UnitNSHT]> { let Latency = 2; } +def M3WriteNSHT3 : SchedWriteRes<[M3UnitNSHT]> { let Latency = 3; } +def M3WriteVLDA : SchedWriteRes<[M3UnitL, + M3UnitL]> { let Latency = 5; + let NumMicroOps = 2; } +def M3WriteVLDB : SchedWriteRes<[M3UnitL, + M3UnitL, + M3UnitL]> { let Latency = 6; + let NumMicroOps = 3; } +def M3WriteVLDC : SchedWriteRes<[M3UnitL, + M3UnitL, + M3UnitL, + M3UnitL]> { let Latency = 6; + let NumMicroOps = 4; } +def M3WriteVLDD : SchedWriteRes<[M3UnitL, + M3UnitNALU]> { let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [2, 1]; } +def M3WriteVLDE : SchedWriteRes<[M3UnitL, + M3UnitNALU]> { let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [2, 1]; } +def M3WriteVLDF : SchedWriteRes<[M3UnitL, + M3UnitL]> { let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [5, 5]; } +def M3WriteVLDG : SchedWriteRes<[M3UnitL, + M3UnitNALU, + M3UnitNALU]> { let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2, 1, 1]; } +def M3WriteVLDH : SchedWriteRes<[M3UnitL, + M3UnitNALU, + M3UnitNALU]> { let Latency = 6; + let NumMicroOps = 3; + let ResourceCycles = [2, 1, 1]; } +def M3WriteVLDI : SchedWriteRes<[M3UnitL, + M3UnitL, + M3UnitL]> { let Latency = 12; + let NumMicroOps = 3; + let ResourceCycles = [6, 6, 6]; } +def M3WriteVLDJ : SchedWriteRes<[M3UnitL, + M3UnitNALU, + M3UnitNALU, + M3UnitNALU]> { let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1, 1]; } +def M3WriteVLDK : SchedWriteRes<[M3UnitL, + M3UnitNALU, + M3UnitNALU, + M3UnitNALU, + M3UnitNALU]> { let Latency = 9; + let NumMicroOps = 5; + let ResourceCycles = [4, 1, 1, 1, 1]; } +def M3WriteVLDL : SchedWriteRes<[M3UnitL, + M3UnitNALU, + M3UnitNALU, + M3UnitL, + M3UnitNALU]> { let Latency = 6; + let NumMicroOps = 5; + let ResourceCycles = [6, 1, 1, 6, 1]; } +def M3WriteVLDM : SchedWriteRes<[M3UnitL, + M3UnitNALU, + M3UnitNALU, + M3UnitL, + M3UnitNALU, + M3UnitNALU]> { let Latency = 7; + let NumMicroOps = 6; + let ResourceCycles = [6, 1, 1, 6, 1, 1]; } +def M3WriteVLDN : SchedWriteRes<[M3UnitL, + M3UnitL, + M3UnitL, + M3UnitL]> { let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [6, 6, 6, 6]; } +def M3WriteVSTA : WriteSequence<[WriteVST], 2>; +def M3WriteVSTB : WriteSequence<[WriteVST], 3>; +def M3WriteVSTC : WriteSequence<[WriteVST], 4>; +def M3WriteVSTD : SchedWriteRes<[M3UnitS, + M3UnitFST, + M3UnitS, + M3UnitFST]> { let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1, 3, 1, 3]; } +def M3WriteVSTE : SchedWriteRes<[M3UnitS, + M3UnitFST, + M3UnitS, + M3UnitFST, + M3UnitS, + M3UnitFST]> { let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1, 3, 1, 3, 1, 3]; } +def M3WriteVSTF : SchedWriteRes<[M3UnitNALU, + M3UnitFST, + M3UnitFST, + M3UnitS, + M3UnitFST, + M3UnitS, + M3UnitFST]> { let Latency = 15; + let NumMicroOps = 7; + let ResourceCycles = [1, 3, 3, 1, 3, 1, 3]; } +def M3WriteVSTG : SchedWriteRes<[M3UnitNALU, + M3UnitFST, + M3UnitFST, + M3UnitS, + M3UnitFST, + M3UnitS, + M3UnitFST, + M3UnitS, + M3UnitFST]> { let Latency = 16; + let NumMicroOps = 9; + let ResourceCycles = [1, 3, 3, 1, 3, 1, 3, 1, 3]; } +def M3WriteVSTH : SchedWriteRes<[M3UnitNALU, + M3UnitFST, + M3UnitFST, + M3UnitS, + M3UnitFST]> { let Latency = 14; + let NumMicroOps = 5; + let ResourceCycles = [1, 3, 3, 1, 3]; } +def M3WriteVSTI : SchedWriteRes<[M3UnitNALU, + M3UnitFST, + M3UnitFST, + M3UnitS, + M3UnitFST, + M3UnitS, + M3UnitFST, + M3UnitS, + M3UnitFST]> { let Latency = 17; + let NumMicroOps = 9; + let ResourceCycles = [1, 3, 3, 1, 3, 1, 3, 1, 3]; } + +// Special cases. +def M3WriteAES : SchedWriteRes<[M3UnitNCRY]> { let Latency = 1; } +def M3ReadAES : SchedReadAdvance<1, [M3WriteAES]>; +def M3ReadFMAC : SchedReadAdvance<1, [M3WriteFMAC4, + M3WriteFMAC5]>; +def M3WriteMOVI : SchedWriteVariant<[SchedVar, + SchedVar]>; +def M3ReadNMUL : SchedReadAdvance<1, [M3WriteNMUL3]>; + +// Branch instructions +def : InstRW<[M3WriteB1], (instrs Bcc)>; +def : InstRW<[M3WriteA1], (instrs BL)>; +def : InstRW<[M3WriteBX], (instrs BLR)>; +def : InstRW<[M3WriteC1], (instregex "^CBN?Z[WX]")>; +def : InstRW<[M3WriteAD], (instregex "^TBN?Z[WX]")>; + +// Arithmetic and logical integer instructions. +def : InstRW<[M3WriteA1], (instrs COPY)>; +def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?Xrx64")>; +def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]$")>; +def : InstRW<[M3WriteAX], (instregex "^(ADD|BIC|SUB)S[WX]r[sx]$")>; +def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|EOR|ORR|SUB)[WX]ri")>; + +// Move instructions. +def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>; +def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>; + +// Divide and multiply instructions. + +// Miscellaneous instructions. +def : InstRW<[M3WriteAY], (instrs EXTRWrri, EXTRXrri)>; + +// Load instructions. +def : InstRW<[M3WriteLD, + WriteLDHi, + WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>; +def : InstRW<[M3WriteLX, + ReadAdrBase], (instregex "^PRFMro[WX]")>; + +// Store instructions. + +// FP data instructions. +def : InstRW<[M3WriteNSHF1], (instregex "^FABS[DS]r")>; +def : InstRW<[M3WriteFADD2], (instregex "^F(ADD|SUB)[DS]rr")>; +def : InstRW<[M3WriteFDIV10], (instrs FDIVSrr)>; +def : InstRW<[M3WriteFDIV12], (instrs FDIVDrr)>; +def : InstRW<[M3WriteNMSC1], (instregex "^F(MAX|MIN).+rr")>; +def : InstRW<[M3WriteFMAC3], (instregex "^FN?MUL[DS]rr")>; +def : InstRW<[M3WriteFMAC4, + M3ReadFMAC], (instregex "^FN?M(ADD|SUB)[DS]rrr")>; +def : InstRW<[M3WriteNALU1], (instregex "^FNEG[DS]r")>; +def : InstRW<[M3WriteFCVT3A], (instregex "^FRINT.+r")>; +def : InstRW<[M3WriteNEONH], (instregex "^FCSEL[DS]rrr")>; +def : InstRW<[M3WriteFSQR17], (instrs FSQRTSr)>; +def : InstRW<[M3WriteFSQR25], (instrs FSQRTDr)>; + +// FP miscellaneous instructions. +def : InstRW<[M3WriteFCVT3], (instregex "^FCVT[DHS][DHS]r")>; +def : InstRW<[M3WriteFCVT4A], (instregex "^[SU]CVTF[SU][XW][DHS]ri")>; +def : InstRW<[M3WriteFCVT3A], (instregex "^FCVT[AMNPZ][SU]U[XW][DHS]r")>; +def : InstRW<[M3WriteFCVT3A], (instregex "^FCVTZ[SU][dhs]")>; +def : InstRW<[M3WriteNALU1], (instregex "^FMOV[DS][ir]")>; +def : InstRW<[M3WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev1")>; +def : InstRW<[M3WriteNMSC1], (instregex "^FRECPXv1")>; +def : InstRW<[M3WriteFMAC4, + M3ReadFMAC], (instregex "^F(RECP|RSQRT)S(16|32|64)")>; +def : InstRW<[M3WriteNALU1], (instregex "^FMOV[WX][DS]r")>; +def : InstRW<[M3WriteNALU1], (instregex "^FMOV[DS][WX]r")>; +def : InstRW<[M3WriteNEONI], (instregex "^FMOV(DX|XD)Highr")>; + +// FP load instructions. +def : InstRW<[WriteVLD], (instregex "^LDR[DSQ]l")>; +def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>; +def : InstRW<[WriteVLD, + WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>; +def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>; +def : InstRW<[M3WriteLX, + ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>; +def : InstRW<[M3WriteLB, + ReadAdrBase], (instregex "^LDRQro[WX]")>; +def : InstRW<[WriteVLD, + M3WriteLH], (instregex "^LDN?P[DS]i")>; +def : InstRW<[M3WriteLA, + M3WriteLH], (instregex "^LDN?PQi")>; +def : InstRW<[M3WriteLB, + M3WriteLH, + WriteAdr], (instregex "^LDP[DS](post|pre)")>; +def : InstRW<[M3WriteLC, + M3WriteLH, + WriteAdr], (instregex "^LDPQ(post|pre)")>; + +// FP store instructions. +def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>; +def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>; +def : InstRW<[M3WriteSY, + ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>; +def : InstRW<[M3WriteSA, + ReadAdrBase], (instregex "^STRQro[WX]")>; +def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "^STP[DS](post|pre)")>; +def : InstRW<[M3WriteSA, + WriteAdr], (instregex "^STPQ(post|pre)")>; + +// ASIMD instructions. +def : InstRW<[M3WriteNMSC3], (instregex "^[SU]ABAL?v")>; +def : InstRW<[M3WriteNMSC1], (instregex "^[SU]ABDL?v")>; +def : InstRW<[M3WriteNMSC1], (instregex "^(SQ)?(ABS|NEG)v")>; +def : InstRW<[M3WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>; +def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Pv")>; +def : InstRW<[M3WriteNMSC3], (instregex "^[SU]H(ADD|SUB)v")>; +def : InstRW<[M3WriteNMSC3], (instregex "^[SU](ADD|SUB)[LW]V?v")>; +def : InstRW<[M3WriteNMSC3], (instregex "^R?(ADD|SUB)HN2?v")>; +def : InstRW<[M3WriteNMSC3], (instregex "^[SU]Q(ADD|SUB)v")>; +def : InstRW<[M3WriteNMSC3], (instregex "^(SU|US)QADDv")>; +def : InstRW<[M3WriteNMSC3], (instregex "^[SU]RHADDv")>; +def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Vv")>; +def : InstRW<[M3WriteNMSC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>; +def : InstRW<[M3WriteNALU1], (instregex "^CMTSTv")>; +def : InstRW<[M3WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>; +def : InstRW<[M3WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>; +def : InstRW<[M3WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>; +def : InstRW<[M3WriteNMSC3], (instregex "^[SU](MIN|MAX)Vv")>; +def : InstRW<[M3WriteNMUL3], (instregex "^(MUL|SQR?DMULH)v")>; +def : InstRW<[M3WriteNMUL3, + M3ReadNMUL], (instregex "^ML[AS]v")>; +def : InstRW<[M3WriteNMUL3], (instregex "^[SU]ML[AS]Lv")>; +def : InstRW<[M3WriteNMUL3], (instregex "^SQDML[AS]L")>; +def : InstRW<[M3WriteNMUL3], (instregex "^(S|U|SQD)MULLv")>; +def : InstRW<[M3WriteNMSC3], (instregex "^[SU]ADALPv")>; +def : InstRW<[M3WriteNSHT3], (instregex "^[SU]R?SRAv")>; +def : InstRW<[M3WriteNSHT1], (instregex "^SHL[dv]")>; +def : InstRW<[M3WriteNSHT1], (instregex "^[SU]SH[LR][dv]")>; +def : InstRW<[M3WriteNSHT1], (instregex "^S[RS]I[dv]")>; +def : InstRW<[M3WriteNSHT2], (instregex "^[SU]?SHLLv")>; +def : InstRW<[M3WriteNSHT3], (instregex "^(([SU]Q)?R)?SHRU?N[bhsv]")>; +def : InstRW<[M3WriteNSHT3], (instregex "^[SU]RSH[LR][dv]")>; +def : InstRW<[M3WriteNSHT3], (instregex "^[SU]QR?SHLU?[bdhsv]")>; + +// ASIMD FP instructions. +def : InstRW<[M3WriteNSHF1], (instregex "^FABSv")>; +def : InstRW<[M3WriteFADD2], (instregex "^F(ABD|ADD|SUB)v")>; +def : InstRW<[M3WriteNEONA], (instregex "^FADDP")>; +def : InstRW<[M3WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>; +def : InstRW<[M3WriteFCVT3], (instregex "^FCVT(L|N|XN)v")>; +def : InstRW<[M3WriteFCVT2], (instregex "^FCVT[AMNPZ][SU]v")>; +def : InstRW<[M3WriteFCVT2], (instregex "^[SU]CVTFv")>; +def : InstRW<[M3WriteFDIV10], (instrs FDIVv2f32)>; +def : InstRW<[M3WriteNEONV], (instrs FDIVv4f32)>; +def : InstRW<[M3WriteNEONW], (instrs FDIVv2f64)>; +def : InstRW<[M3WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?v")>; +def : InstRW<[M3WriteNMSC2], (instregex "^F(MAX|MIN)(NM)?Pv")>; +def : InstRW<[M3WriteNEONZ], (instregex "^F(MAX|MIN)(NM)?Vv")>; +def : InstRW<[M3WriteFMAC3], (instregex "^FMULX?v.[fi]")>; +def : InstRW<[M3WriteFMAC4, + M3ReadFMAC], (instregex "^FML[AS]v.f")>; +def : InstRW<[M3WriteFMAC5, + M3ReadFMAC], (instregex "^FML[AS]v.i")>; +def : InstRW<[M3WriteNALU1], (instregex "^FNEGv")>; +def : InstRW<[M3WriteFCVT3A], (instregex "^FRINT[AIMNPXZ]v")>; +def : InstRW<[M3WriteFSQR17], (instrs FSQRTv2f32)>; +def : InstRW<[M3WriteNEONX], (instrs FSQRTv4f32)>; +def : InstRW<[M3WriteNEONY], (instrs FSQRTv2f64)>; + +// ASIMD miscellaneous instructions. +def : InstRW<[M3WriteNALU1], (instregex "^RBITv")>; +def : InstRW<[M3WriteNALU1], (instregex "^(BIF|BIT|BSL)v")>; +def : InstRW<[M3WriteNEONB], (instregex "^DUPv.+gpr")>; +def : InstRW<[M3WriteNSHF1], (instregex "^DUPv.+lane")>; +def : InstRW<[M3WriteNSHF1], (instregex "^EXTv")>; +def : InstRW<[M3WriteNSHF1], (instregex "^[SU]?Q?XTU?Nv")>; +def : InstRW<[M3WriteNSHF1], (instregex "^CPY")>; +def : InstRW<[M3WriteNSHF1], (instregex "^INSv.+lane")>; +def : InstRW<[M3WriteMOVI], (instregex "^MOVI")>; +def : InstRW<[M3WriteNALU1], (instregex "^FMOVv")>; +def : InstRW<[M3WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev[248]")>; +def : InstRW<[M3WriteFMAC4, + M3ReadFMAC], (instregex "^F(RECP|RSQRT)Sv")>; +def : InstRW<[M3WriteNSHF1], (instregex "^REV(16|32|64)v")>; +def : InstRW<[M3WriteNSHF1], (instregex "^TB[LX]v")>; +def : InstRW<[M3WriteNEOND], (instregex "^[SU]MOVv")>; +def : InstRW<[M3WriteNSHF3], (instregex "^INSv.+gpr")>; +def : InstRW<[M3WriteNSHF1], (instregex "^(TRN|UZP|ZIP)[12]v")>; + +// ASIMD load instructions. +def : InstRW<[M3WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteL5, + WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteL5, + WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVLDA, + WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDA, + WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVLDB, + WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDB, + WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVLDC, + WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDC, + WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDD], (instregex "LD1i(8|16|32)$")>; +def : InstRW<[M3WriteVLDD, + WriteAdr], (instregex "LD1i(8|16|32)_POST")>; +def : InstRW<[M3WriteVLDE], (instregex "LD1i(64)$")>; +def : InstRW<[M3WriteVLDE, + WriteAdr], (instregex "LD1i(64)_POST")>; + +def : InstRW<[M3WriteL5], (instregex "LD1Rv(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteL5, + WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteL5, + WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[M3WriteVLDF, + WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST")>; +def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDF, + WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDG], (instregex "LD2i(8|16|32)$")>; +def : InstRW<[M3WriteVLDG, + WriteAdr], (instregex "LD2i(8|16|32)_POST")>; +def : InstRW<[M3WriteVLDH], (instregex "LD2i(64)$")>; +def : InstRW<[M3WriteVLDH, + WriteAdr], (instregex "LD2i(64)_POST")>; + +def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVLDA, + WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDA, + WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>; +def : InstRW<[M3WriteVLDI, + WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST")>; +def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDI, + WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDJ], (instregex "LD3i(8|16|32)$")>; +def : InstRW<[M3WriteVLDJ, + WriteAdr], (instregex "LD3i(8|16|32)_POST")>; +def : InstRW<[M3WriteVLDL], (instregex "LD3i(64)$")>; +def : InstRW<[M3WriteVLDL, + WriteAdr], (instregex "LD3i(64)_POST")>; + +def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVLDB, + WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDB, + WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[M3WriteVLDN, + WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST")>; +def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDN, + WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDK], (instregex "LD4i(8|16|32)$")>; +def : InstRW<[M3WriteVLDK, + WriteAdr], (instregex "LD4i(8|16|32)_POST")>; +def : InstRW<[M3WriteVLDM], (instregex "LD4i(64)$")>; +def : InstRW<[M3WriteVLDM, + WriteAdr], (instregex "LD4i(64)_POST")>; + +def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVLDC, + WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDC, + WriteAdr], (instregex "LD4Rv(16b|8h|4s|2d)_POST")>; + +// ASIMD store instructions. +def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVST], (instregex "ST1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVSTA, + WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVSTA, + WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVSTB, + WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVSTB, + WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVSTC, + WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVSTC, + WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVSTD], (instregex "ST1i(8|16|32|64)$")>; +def : InstRW<[M3WriteVSTD, + WriteAdr], (instregex "ST1i(8|16|32|64)_POST")>; + +def : InstRW<[M3WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[M3WriteVSTD, + WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST")>; +def : InstRW<[M3WriteVSTE], (instregex "ST2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVSTE, + WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVSTD], (instregex "ST2i(8|16|32)$")>; +def : InstRW<[M3WriteVSTD, + WriteAdr], (instregex "ST2i(8|16|32)_POST")>; +def : InstRW<[M3WriteVSTD], (instregex "ST2i(64)$")>; +def : InstRW<[M3WriteVSTD, + WriteAdr], (instregex "ST2i(64)_POST")>; + +def : InstRW<[M3WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>; +def : InstRW<[M3WriteVSTF, + WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST")>; +def : InstRW<[M3WriteVSTG], (instregex "ST3Threev(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVSTG, + WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVSTH], (instregex "ST3i(8|16|32)$")>; +def : InstRW<[M3WriteVSTH, + WriteAdr], (instregex "ST3i(8|16|32)_POST")>; +def : InstRW<[M3WriteVSTF], (instregex "ST3i(64)$")>; +def : InstRW<[M3WriteVSTF, + WriteAdr], (instregex "ST3i(64)_POST")>; + +def : InstRW<[M3WriteVSTF], (instregex "ST4Fourv(8b|4h|2s)$")>; +def : InstRW<[M3WriteVSTF, + WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST")>; +def : InstRW<[M3WriteVSTI], (instregex "ST4Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVSTI, + WriteAdr], (instregex "ST4Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVSTF], (instregex "ST4i(8|16|32|64)$")>; +def : InstRW<[M3WriteVSTF, + WriteAdr], (instregex "ST4i(8|16|32|64)_POST")>; + +// Cryptography instructions. +def : InstRW<[M3WriteAES], (instregex "^AES[DE]")>; +def : InstRW<[M3WriteAES, + M3ReadAES], (instregex "^AESI?MC")>; + +def : InstRW<[M3WriteNCRY3A], (instregex "^PMULL?v")>; + +def : InstRW<[M3WriteNCRY1A], (instregex "^SHA1([CHMP]|SU[01])")>; +def : InstRW<[M3WriteNCRY1A], (instregex "^SHA256SU0")>; +def : InstRW<[M3WriteNCRY5A], (instregex "^SHA256(H2?|SU1)")>; + +// CRC instructions. +def : InstRW<[M3WriteC2], (instregex "^CRC32")>; + +} // SchedModel = ExynosM3Model diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td new file mode 100644 index 000000000..84825458e --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td @@ -0,0 +1,119 @@ +//==- AArch64SchedFalkor.td - Falkor Scheduling Definitions -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Qualcomm Falkor to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Define the SchedMachineModel and provide basic properties for coarse grained +// instruction cost model. + +def FalkorModel : SchedMachineModel { + let IssueWidth = 8; // 8 uops are dispatched per cycle. + let MicroOpBufferSize = 128; // Out-of-order with temporary unified issue buffer. + let LoopMicroOpBufferSize = 16; + let LoadLatency = 3; // Optimistic load latency. + let MispredictPenalty = 11; // Minimum branch misprediction penalty. + let CompleteModel = 1; + + list UnsupportedFeatures = [HasSVE]; + + // FIXME: Remove when all errors have been fixed. + let FullInstRWOverlapCheck = 0; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Falkor. + +let SchedModel = FalkorModel in { + + def FalkorUnitB : ProcResource<1>; // Branch + def FalkorUnitLD : ProcResource<1>; // Load pipe + def FalkorUnitSD : ProcResource<1>; // Store data + def FalkorUnitST : ProcResource<1>; // Store pipe + def FalkorUnitX : ProcResource<1>; // Complex arithmetic + def FalkorUnitY : ProcResource<1>; // Simple arithmetic + def FalkorUnitZ : ProcResource<1>; // Simple arithmetic + + def FalkorUnitVSD : ProcResource<1>; // Vector store data + def FalkorUnitVX : ProcResource<1>; // Vector X-pipe + def FalkorUnitVY : ProcResource<1>; // Vector Y-pipe + + def FalkorUnitGTOV : ProcResource<1>; // Scalar to Vector + def FalkorUnitVTOG : ProcResource<1>; // Vector to Scalar + + // Define the resource groups. + def FalkorUnitXY : ProcResGroup<[FalkorUnitX, FalkorUnitY]>; + def FalkorUnitXYZ : ProcResGroup<[FalkorUnitX, FalkorUnitY, FalkorUnitZ]>; + def FalkorUnitXYZB : ProcResGroup<[FalkorUnitX, FalkorUnitY, FalkorUnitZ, + FalkorUnitB]>; + def FalkorUnitZB : ProcResGroup<[FalkorUnitZ, FalkorUnitB]>; + def FalkorUnitVXVY : ProcResGroup<[FalkorUnitVX, FalkorUnitVY]>; + +} + +//===----------------------------------------------------------------------===// +// Map the target-defined scheduler read/write resources and latency for +// Falkor. + +let SchedModel = FalkorModel in { + +// These WriteRes entries are not used in the Falkor sched model. +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } + +// These ReadAdvance entries are not used in the Falkor sched model. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Detailed Refinements +// ----------------------------------------------------------------------------- +include "AArch64SchedFalkorDetails.td" + +} diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td new file mode 100644 index 000000000..ff14e639d --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td @@ -0,0 +1,1292 @@ +//==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the uop and latency details for the machine model for the +// Qualcomm Falkor subtarget. +// +//===----------------------------------------------------------------------===// + +// Contains all of the Falkor specific SchedWriteRes types. The approach +// below is to define a generic SchedWriteRes for every combination of +// latency and microOps. The naming conventions is to use a prefix, one field +// for latency, and one or more microOp count/type designators. +// Prefix: FalkorWr +// MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD) +// Latency: #cyc +// +// e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued +// down one Z pipe, six SD pipes, four VX pipes and the total latency is +// six cycles. +// +// Contains all of the Falkor specific ReadAdvance types for forwarding logic. +// +// Contains all of the Falkor specific WriteVariant types for immediate zero +// and LSLFast. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Define 0 micro-op types +def FalkorWr_LdInc_none_2cyc : SchedWriteRes<[]> { + let Latency = 2; + let NumMicroOps = 0; +} +def FalkorWr_StInc_none_2cyc : SchedWriteRes<[]> { + let Latency = 2; + let NumMicroOps = 0; +} +def FalkorWr_none_3cyc : SchedWriteRes<[]> { + let Latency = 3; + let NumMicroOps = 0; +} +def FalkorWr_none_4cyc : SchedWriteRes<[]> { + let Latency = 4; + let NumMicroOps = 0; +} + +//===----------------------------------------------------------------------===// +// Define 1 micro-op types + +def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; } +def FalkorWr_IMUL32_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } +def FalkorWr_IMUL64_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } +def FalkorWr_IMUL64_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; } +def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; } +def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; } +def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; } +def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; } +def FalkorWr_1XYZ_0cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 0; } +def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; } +def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; } +def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; } +def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; } +def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; } + +def FalkorWr_1VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 0; } +def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; } +def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; } +def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; } +def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; } +def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; } +def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; } +def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; } +def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; } +def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; } + +def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; } +def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; } +def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; } + +def FalkorWr_1GTOV_0cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 0; } +def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; } +def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; } +def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; } + +//===----------------------------------------------------------------------===// +// Define 2 micro-op types + +def FalkorWr_2VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 0; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 1; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 2; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_VMUL32_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 5; + let NumMicroOps = 2; +} +def FalkorWr_FMUL32_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 5; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 2; +} +def FalkorWr_FMUL64_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_1XYZ_1LD_4cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_2LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 10; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_12cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 12; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 14; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 21; + let NumMicroOps = 2; +} + +def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def FalkorWr_2GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def FalkorWr_2XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> { + let Latency = 0; + let NumMicroOps = 2; +} + +def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [2, 8]; +} + +def FalkorWr_1X_1Z_11cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [2, 11]; +} + +def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 2; +} + +def FalkorWr_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitVSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 2; +} + +//===----------------------------------------------------------------------===// +// Define 3 micro-op types + +def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, + FalkorUnitLD]> { + let Latency = 0; + let NumMicroOps = 3; +} + +def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, + FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 5; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 3; +} + +def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 3; +} + +def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitZ]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_1XYZ_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 3; +} +def FalkorWr_1XYZ_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitVSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 3; +} +//===----------------------------------------------------------------------===// +// Define 4 micro-op types + +def FalkorWr_2VX_2VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 14; + let NumMicroOps = 4; +} + +def FalkorWr_2VX_2VY_20cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 20; + let NumMicroOps = 4; +} + +def FalkorWr_2VX_2VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 21; + let NumMicroOps = 4; +} + +def FalkorWr_2VX_2VY_24cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 24; + let NumMicroOps = 4; +} + +def FalkorWr_4VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 2; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 4; +} + +def FalkorWr_4LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 4; +} + +def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST, + FalkorUnitSD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def FalkorWr_2VSD_2ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 4; +} + +//===----------------------------------------------------------------------===// +// Define 5 micro-op types + +def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 5; +} +def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 5; +} +def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY]> { + let Latency = 7; + let NumMicroOps = 5; +} +def FalkorWr_1XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST, + FalkorUnitVSD, FalkorUnitST, + FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 5; +} +def FalkorWr_1VXVY_2ST_2VSD_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitST, + FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 5; +} +//===----------------------------------------------------------------------===// +// Define 6 micro-op types + +def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 6; +} + +def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST, + FalkorUnitVSD, FalkorUnitXYZ, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + +def FalkorWr_2VXVY_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + +def FalkorWr_3VSD_3ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + +//===----------------------------------------------------------------------===// +// Define 8 micro-op types + +def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 8; +} + +def FalkorWr_4VSD_4ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 8; +} + +//===----------------------------------------------------------------------===// +// Define 9 micro-op types + +def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, + FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitLD, + FalkorUnitLD, FalkorUnitXYZ, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 9; +} + +def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, + FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitXYZ, + FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 9; +} + +//===----------------------------------------------------------------------===// +// Define 10 micro-op types + +def FalkorWr_2VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 10; +} + +//===----------------------------------------------------------------------===// +// Define 12 micro-op types + +def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 12; +} + +// Forwarding logic is modeled for multiply add/accumulate and +// load/store base register increment. +// ----------------------------------------------------------------------------- +def FalkorReadIMA32 : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>; +def FalkorReadIMA64 : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>; +def FalkorReadVMA : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr_VMUL32_2VXVY_4cyc]>; +def FalkorReadFMA32 : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>; +def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>; + +def FalkorReadIncLd : SchedReadAdvance<1, [FalkorWr_LdInc_none_2cyc]>; +def FalkorReadIncSt : SchedReadAdvance<1, [FalkorWr_StInc_none_2cyc]>; + +// SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast +// ----------------------------------------------------------------------------- +def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).isImm() && + MI->getOperand(1).getImm() == 0}]>; +def FalkorOp1ZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR || + + MI->getOperand(1).getReg() == AArch64::XZR}]>; +def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>; + +def FalkorWr_FMOV : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_MOVZ : SchedWriteVariant<[ + SchedVar, + SchedVar]>; // imm fwd + + +def FalkorWr_ADDSUBsx : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_LDRro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_LDRSro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_ORRi : SchedWriteVariant<[ + SchedVar, // imm fwd + SchedVar]>; + +def FalkorWr_PRFMro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_STRVro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_STRQro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_STRro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +//===----------------------------------------------------------------------===// +// Specialize the coarse model by associating instruction groups with the +// subtarget-defined types. As the modeled is refined, this will override most +// of the earlier mappings. + +// Miscellaneous +// ----------------------------------------------------------------------------- + +// FIXME: This could be better modeled by looking at the regclasses of the operands. +def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs COPY)>; + +// SIMD Floating-point Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)v2f32$")>; + +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v2i32p)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(32|64)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(32|64|v2f32|v2i32)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64|v2i32)rz$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)v2f32$")>; + +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?Vv4i32v$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)v2f32$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i32p|v2i64p|v2f32)$")>; + +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v2f32)$")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTXNv1i64)>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)v2i32(_shift)?$")>; + +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instregex "^(FMUL|FMULX)(v2f32|(v1i32_indexed|v2i32_indexed))$")>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instrs FMULX32)>; + +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instregex "^(FMUL|FMULX)v1i64_indexed$")>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instrs FMULX64)>; + +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32)$")>; + +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v2i64p)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs FCVTLv4i16, FCVTLv2i32)>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)$")>; + +def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVv2f32)>; +def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTv2f32)>; + +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32)$")>; + +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32)$")>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs FCVTLv8i16, FCVTLv4i32)>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32)(_shift)?$")>; + +def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], + (instregex "^(FMUL|FMULX)(v2f64|v4f32|v4i32_indexed)$")>; + +def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], + (instregex "^(FMUL|FMULX)v2i64_indexed$")>; + +def : InstRW<[FalkorWr_3VXVY_4cyc], (instrs FCVTNv4i16, FCVTNv2i32, FCVTXNv2f32)>; +def : InstRW<[FalkorWr_3VXVY_5cyc], (instrs FCVTNv8i16, FCVTNv4i32, FCVTXNv4f32)>; + +def : InstRW<[FalkorWr_2VX_2VY_14cyc],(instrs FDIVv2f64)>; +def : InstRW<[FalkorWr_2VX_2VY_20cyc],(instrs FDIVv4f32)>; +def : InstRW<[FalkorWr_2VX_2VY_21cyc],(instrs FSQRTv2f64)>; +def : InstRW<[FalkorWr_2VX_2VY_24cyc],(instrs FSQRTv4f32)>; + +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], + (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>; + +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32], + (instregex "^FML(A|S)(v2f32|(v1i32_indexed|v2i32_indexed))$")>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64], + (instregex "^FML(A|S)v1i64_indexed$")>; +def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32], + (instregex "^FML(A|S)(v4f32|v4i32_indexed)$")>; +def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64], + (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>; + +// SIMD Integer Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^ADD(v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs ADDPv2i64p)>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v8i8$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIC|ORR)(v2i32|v4i16)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^NEG(v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$")>; + +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHLv1i64$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHRd$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs PMULv8i8)>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHLd$")>; + +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(ABD|ADALP)(v8i8|v4i16|v2i32)(_v.*)?$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)ADDLVv4i16v$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(s|h|b)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHRd$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^R?SHRN(v2i32|v4i16|v8i8)_shift$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs ADDVv4i16v)>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQABS(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64)$")>; + +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)ADDLVv8i8v$")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs ADDVv8i8v)>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], + (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], + (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], + (instregex "^SQDMULL(i16|i32)$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], + (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; + +def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>; + +def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs ADDVv4i32v)>; + +def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs ADDVv8i16v)>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(ADD|SUB)HNv.*$")>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)ABA(v2i32|v4i16|v8i8)$")>; + +def : InstRW<[FalkorWr_2VXVY_5cyc], (instrs ADDVv16i8v)>; + +def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32)_shift?$")>; +def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^R(ADD|SUB)HNv.*$")>; + +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^ADD(v16i8|v8i16|v4i32|v2i64)$")>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs ADDPv2i64)>; // sz==11 +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v16i8$")>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIC|ORR)(v8i16|v4i32)$")>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(NEG|SUB)(v16i8|v8i16|v4i32|v2i64)$")>; + +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)ADDLv.*$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v16i8|v2i64|v4i32|v8i16)(_v.*)?$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHR(v16i8|v8i16|v4i32|v2i64)_shift$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SUBLv.*$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS)(v16i8|v2i64|v4i32|v8i16)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^ADDP(v4i32|v8i16|v16i8)$")>; // sz!=11 +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL(v8i8|v16i8)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>; + +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABD(v16i8|v8i16|v4i32|v2i64)$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABDLv.*$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16|v4i32|v2i64)(_v.*)?$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^R?SHRN(v2i64|v4i32|v8i16|v16i8)_shift$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL(v1i64|v2i64)$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>; + +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], + (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], + (instregex "^SQDMULLv.*$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>; + +def : InstRW<[FalkorWr_3VXVY_3cyc], (instregex "^(S|U)ADDLVv4i32v$")>; + +def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(S|U)ADDLVv8i16v$")>; + +def : InstRW<[FalkorWr_3VXVY_6cyc], (instregex "^(S|U)ADDLVv16i8v$")>; + +def : InstRW<[FalkorWr_4VXVY_2cyc], (instregex "^(S|U)(ADD|SUB)Wv.*$")>; + +def : InstRW<[FalkorWr_4VXVY_3cyc], (instregex "^(S|U)ABALv.*$")>; + +def : InstRW<[FalkorWr_4VXVY_4cyc], (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>; + +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], + (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^SQD(MLAL|MLSL)v[248].*$")>; + +// SIMD Load Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instrs LD2i64)>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instrs LD2i64_POST)>; + +def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd], (instregex "^LD1i(8|16|32)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD1i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(8b|4h|2s)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD3i64)>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instrs LD3i64_POST)>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD4i64)>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instrs LD4i64_POST)>; + +def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd], (instregex "^LD2i(8|16|32)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD2i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd], + (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instrs LD3Threev2d)>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd], + (instrs LD3Threev2d_POST)>; +def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd], + (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd], (instregex "^LD3i(8|16|32)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD3i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], + (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], + (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd], + (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instrs LD4Fourv2d)>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd], + (instrs LD4Fourv2d_POST)>; +def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd], + (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd], (instregex "^LD4i(8|16|32)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD4i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd], + (instregex "^LD3Threev(8b|4h|2s)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd], + (instregex "^LD3Threev(8b|4h|2s)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd], + (instregex "^LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd], + (instregex "^LD4Fourv(8b|4h|2s)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD3Threev(16b|8h|4s)$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD4Fourv(16b|8h|4s)$")>; + +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD3Threev(16b|8h|4s)_POST$")>; + +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD4Fourv(16b|8h|4s)_POST$")>; + +// Arithmetic and Logical Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADC(S)?(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADD(S)?(W|X)r(r|i)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^AND(S)?(W|X)r(i|r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_ORRi], (instregex "^ORR(W|X)ri$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SBC(S)?(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SUB(S)?(W|X)r(r|i)$")>; +def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>; +def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>; + +// SIMD Miscellaneous Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^CPY(i8|i16|i32|i64)$")>; +def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs EXTv8i8)>; +def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd +def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>; + +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v2i32|v4i16|v8i8)$")>; + +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "(S|U)QXTU?Nv.*$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPXv1i32, FRECPXv1i64)>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs URECPEv2i32, URSQRTEv2i32)>; + +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>; + +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instrs FRECPS64, FRSQRTS64)>; + +def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc], + (instregex "^INSv(i32|i64)(gpr|lane)$")>; +def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v16i8$")>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>; +def : InstRW<[FalkorWr_2VXVY_0cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd +def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>; + +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs URECPEv4i32, URSQRTEv4i32)>; + +def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs TBLv8i8Two)>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^TBX(v8|v16)i8One$")>; + +def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], + (instrs FRECPSv4f32, FRSQRTSv4f32)>; + +def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], + (instrs FRECPSv2f64, FRSQRTSv2f64)>; + +def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBL(v8i8Three|v16i8Two)$")>; +def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBX(v8i8Two|v16i8Two)$")>; + +def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBL(v8i8Four|v16i8Three)$")>; +def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBX(v8i8Three|v16i8Three)$")>; + +def : InstRW<[FalkorWr_5VXVY_7cyc], (instrs TBLv16i8Four)>; +def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>; + +// SIMD Store Instructions +// ----------------------------------------------------------------------------- + +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STR(Q|D|S|H|B)ui$")>; +def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STR(Q|D|S|H|B)(post|pre)$")>; +def : InstRW<[FalkorWr_STRVro, ReadDefault, FalkorReadIncSt], + (instregex "^STR(D|S|H|B)ro(W|X)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STPQi$")>; +def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STPQ(post|pre)$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STP(D|S)(i)$")>; +def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STP(D|S)(post|pre)$")>; +def : InstRW<[FalkorWr_STRQro, ReadDefault, FalkorReadIncSt], + (instregex "^STRQro(W|X)$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STUR(Q|D|S|B|H)i$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instrs STNPDi, STNPSi)>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instrs STNPQi)>; + +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>; +def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1(One(v8b|v4h|v2s|v1d)_POST|(i8|i16|i32|i64)_POST)$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))_POST$")>; + +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3(i8|i16|i32|i64)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4(i8|i16|i32|i64)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3(i8|i16|i32|i64)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4(i8|i16|i32|i64)_POST$")>; + +def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3Three(v8b|v4h|v2s)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3Three(v8b|v4h|v2s)_POST$")>; + +def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], + (instrs ST3Threev2d)>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], + (instrs ST3Threev2d_POST)>; + +def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4Four(v8b|v4h|v2s)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4Four(v8b|v4h|v2s)_POST$")>; + +def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], + (instrs ST4Fourv2d)>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], + (instrs ST4Fourv2d_POST)>; + +def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3Three(v16b|v8h|v4s)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3Three(v16b|v8h|v4s)_POST$")>; + +def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4Four(v16b|v8h|v4s)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4Four(v16b|v8h|v4s)_POST$")>; + +// Branch Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1none_0cyc], (instrs B, TCRETURNdi)>; +def : InstRW<[FalkorWr_1Z_0cyc], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>; +def : InstRW<[FalkorWr_1Z_0cyc], (instrs RET_ReallyLR, TCRETURNri)>; +def : InstRW<[FalkorWr_1ZB_0cyc], (instrs Bcc)>; +def : InstRW<[FalkorWr_1XYZB_0cyc], (instrs BL)>; +def : InstRW<[FalkorWr_1Z_1XY_0cyc], (instrs BLR)>; + +// Cryptography Extensions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs SHA1Hrr)>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs AESIMCrr, AESMCrr)>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs AESDrr, AESErr)>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>; +def : InstRW<[FalkorWr_1VX_1VY_4cyc], (instregex "^SHA1(C|M|P)rrr$")>; +def : InstRW<[FalkorWr_1VX_1VY_5cyc], (instrs SHA256H2rrr, SHA256Hrrr)>; +def : InstRW<[FalkorWr_4VXVY_3cyc], (instrs SHA256SU1rrr)>; + +// FP Load Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDUR(Q|D|S|H|B)i$")>; +def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd], + (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instrs LDNPQi)>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instrs LDPQi)>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "LDNP(D|S)i$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "LDP(D|S)i$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "LDP(D|S)(pre|post)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "^LDPQ(pre|post)$")>; + +// FP Data Processing Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(S|D)rr$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(S|D)r(r|i)$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(S|D)r$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(S|D)rrr$")>; + +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(S|D)rr$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(32|64)p$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTSHr, FCVTDHr)>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(S|D)r$")>; + +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(32|64)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(S|D)rr$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTHSr, FCVTHDr)>; + +def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTSDr, FCVTDSr)>; + +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instregex "^F(N)?MULSrr$")>; + +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instregex "^F(N)?MULDrr$")>; + +def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVSrr)>; +def : InstRW<[FalkorWr_1VX_1VY_14cyc],(instrs FDIVDrr)>; +def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTSr)>; +def : InstRW<[FalkorWr_1VX_1VY_21cyc],(instrs FSQRTDr)>; + +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32], + (instregex "^F(N)?M(ADD|SUB)Srrr$")>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64], + (instregex "^F(N)?M(ADD|SUB)Drrr$")>; + +// FP Miscellaneous Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(WS|XD|XDHigh)r$")>; +def : InstRW<[FalkorWr_1GTOV_0cyc], (instregex "^FMOV(S|D)i$")>; // imm fwd +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(d|s)$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(SW|DX|DXHigh)r$")>; +def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "^FMOV(Sr|Dr|v.*_ns)$")>; // imm fwd +// FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr +def : InstRW<[FalkorWr_2VXVY_0cyc], (instrs FMOVD0, FMOVS0)>; // imm fwd + +def : InstRW<[FalkorWr_1GTOV_4cyc], (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>; + +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>; + +// Load Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>; +def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFUMi)>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "^LDNP(W|X)i$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "^LDP(W|X)i$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "^LDP(W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR(BB|HH|W|X)ui$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR(BB|HH|W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd], + (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR(W|X)l$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDTR(B|H|W|X)i$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDUR(BB|HH|W|X)i$")>; +def : InstRW<[FalkorWr_PRFMro], (instregex "^PRFMro(W|X)$")>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd], + (instrs LDPSWi)>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd], + (instregex "^LDPSW(post|pre)$")>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>; +def : InstRW<[FalkorWr_LDRSro, FalkorReadIncLd], + (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instrs LDRSWl)>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instregex "^LDURS(BW|BX|HW|HX|W)i$")>; + +// Miscellaneous Data-Processing Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(S|U)?BFM(W|X)ri$")>; +def : InstRW<[FalkorWr_1X_2cyc], (instregex "^CRC32.*$")>; +def : InstRW<[FalkorWr_1XYZ_2cyc], (instregex "^(CLS|CLZ|RBIT|REV|REV16|REV32)(W|X)r$")>; +def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^EXTR(W|X)rri$")>; + +// Divide and Multiply Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64], + (instregex "^(S|U)M(ADD|SUB)Lrrr$")>; +def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32], + (instregex "^M(ADD|SUB)Wrrr$")>; + +def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>; +def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64], + (instregex "^M(ADD|SUB)Xrrr$")>; + +def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>; +def : InstRW<[FalkorWr_1X_1Z_11cyc], (instregex "^(S|U)DIVXr$")>; + +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], + (instregex "^(S|U)MULLv.*$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^(S|U)(MLAL|MLSL)v.*$")>; + +// Move and Shift Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV)(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_0cyc], (instregex "^MOVK(W|X)i$")>; // imm fwd +def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^ADRP?$")>; // imm fwd +def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^MOVN(W|X)i$")>; // imm fwd +def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>; +def : InstRW<[FalkorWr_1XYZ_0cyc], (instrs MOVi32imm, MOVi64imm)>; // imm fwd (approximation) +def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>], + (instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>; +def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>], + (instrs LOADgot)>; + +// Other Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1LD_0cyc], (instrs CLREX, DMB, DSB)>; +def : InstRW<[FalkorWr_1none_0cyc], (instrs BRK, DCPS1, DCPS2, DCPS3, HINT, HLT, HVC, ISB, SMC, SVC)>; +def : InstRW<[FalkorWr_1ST_0cyc], (instrs SYSxt, SYSLxt)>; +def : InstRW<[FalkorWr_1Z_0cyc], (instrs MSRpstateImm1, MSRpstateImm4)>; + +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^(LDAR(B|H|W|X)|LDAXR(B|H|W|X)|LDXR(B|H|W|X))$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "^(LDAXP(W|X)|LDXP(W|X))$")>; +def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS, MOVbaseTLS)>; + +def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>; + +def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instrs STNPWi, STNPXi)>; +def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>; + +def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STLR(B|H|W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STXP(W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STXR(B|H|W|X)$")>; + +def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STLXP(W|X)$")>; +def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STLXR(B|H|W|X)$")>; + +// Store Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STP(W|X)i$")>; +def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STP(W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STR(BB|HH|W|X)ui$")>; +def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STR(BB|HH|W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_STRro, ReadDefault, FalkorReadIncSt], + (instregex "^STR(BB|HH|W|X)ro(W|X)$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STTR(B|H|W|X)i$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STUR(BB|HH|W|X)i$")>; + diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td new file mode 100644 index 000000000..68de3e077 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td @@ -0,0 +1,138 @@ +//==- AArch64SchedKryo.td - Qualcomm Kryo Scheduling Defs ---*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Qualcomm Kryo to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// The issue width is set to five, matching the five issue queues for expanded +// uops. Now, the latency spreadsheet has information based on fragmented uops, +// but these do not actually take up an issue queue. + +def KryoModel : SchedMachineModel { + let IssueWidth = 5; // 5-wide issue for expanded uops + let MicroOpBufferSize = 128; // Out-of-order with temporary unified issue buffer + let LoadLatency = 4; // Optimistic load latency + let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch + + // Enable partial & runtime unrolling. The magic number is chosen based on + // experiments and benchmarking data. + let LoopMicroOpBufferSize = 16; + let CompleteModel = 1; + + list UnsupportedFeatures = [HasSVE]; + + // FIXME: Remove when all errors have been fixed. + let FullInstRWOverlapCheck = 0; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Kryo. + +let SchedModel = KryoModel in { + def KryoUnitXA : ProcResource<1>; // Type X(A) micro-ops + def KryoUnitXB : ProcResource<1>; // Type X(B) micro-ops + def KryoUnitYA : ProcResource<1>; // Type Y(A) micro-ops + def KryoUnitYB : ProcResource<1>; // Type Y(B) micro-ops + def KryoUnitX : ProcResGroup<[KryoUnitXA, // Type X micro-ops + KryoUnitXB]>; + def KryoUnitY : ProcResGroup<[KryoUnitYA, // Type Y micro-ops + KryoUnitYB]>; + def KryoUnitXY : ProcResGroup<[KryoUnitXA, // Type XY micro-ops + KryoUnitXB, + KryoUnitYA, + KryoUnitYB]>; + def KryoUnitLSA : ProcResource<1>; // Type LS(A) micro-ops + def KryoUnitLSB : ProcResource<1>; // Type LS(B) micro-ops + def KryoUnitLS : ProcResGroup<[KryoUnitLSA, // Type LS micro-ops + KryoUnitLSB]>; +} + +let SchedModel = KryoModel in { + +//===----------------------------------------------------------------------===// +// Map the target-defined scheduler read/write resources and latency for +// Kryo. + +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes + { let Latency = 2; let NumMicroOps = 2; } +def : WriteRes + { let Latency = 2; let NumMicroOps = 2; } +def : WriteRes + { let Latency = 2; let NumMicroOps = 2; } +def : WriteRes { let Latency = 2; } +def : WriteRes + { let Latency = 8; let NumMicroOps = 1; } // Fragent -1 +def : WriteRes + { let Latency = 8; let NumMicroOps = 1; } // Fragent -1 +def : WriteRes { let Latency = 5; } +def : WriteRes { let Latency = 5; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } +def : WriteRes + { let Latency = 3; let NumMicroOps = 2; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } +def : WriteRes + { let Latency = 6; let NumMicroOps = 2; } +def : WriteRes + { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1 +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } + +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } + +def : WriteRes { let Latency = 4; } + +def : WriteRes { let Unsupported = 1; } + +// No forwarding logic is modelled yet. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + + +//===----------------------------------------------------------------------===// +// Specialize the coarse model by associating instruction groups with the +// subtarget-defined types. As the modeled is refined, this will override most +// of the above SchedWriteRes and SchedAlias mappings. + +// Miscellaneous +// ----------------------------------------------------------------------------- + +def : InstRW<[WriteI], (instrs COPY)>; + + +// Detailed Refinedments +// ----------------------------------------------------------------------------- +include "AArch64SchedKryoDetails.td" + + +} // SchedModel = KryoModel diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td new file mode 100644 index 000000000..cf4cdabb8 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td @@ -0,0 +1,2378 @@ +//=- AArch64SchedKryoDetails.td - QC Kryo Scheduling Defs ----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the uop and latency details for the machine model for the +// Qualcomm Kryo subtarget. +// +//===----------------------------------------------------------------------===// + +def KryoWrite_3cyc_X_noRSV_138ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_X_noRSV_138ln], + (instregex "(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)")>; + +def KryoWrite_3cyc_X_X_139ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_X_X_139ln], + (instregex "(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift")>; + +def KryoWrite_4cyc_XY_XY_noRSV_172ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 4; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_4cyc_XY_XY_noRSV_172ln], + (instregex "(S|U)ABA(v8i8|v4i16|v2i32)")>; +def KryoWrite_4cyc_XY_XY_XY_XY_178ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY, KryoUnitXY]> { + let Latency = 4; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_4cyc_XY_XY_XY_XY_178ln], + (instregex "(S|U)ABA(v16i8|v8i16|v4i32)")>; +def KryoWrite_3cyc_XY_XY_XY_XY_177ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_XY_XY_177ln], + (instregex "(S|U)ABALv.*")>; +def KryoWrite_3cyc_XY_XY_166ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_166ln], + (instregex "(S|U)(ABD|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)")>; +def KryoWrite_3cyc_XY_noRSV_159ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_159ln], + (instregex "(S|U)(ABD|RHADD)(v8i8|v4i16|v2i32)")>; +def KryoWrite_3cyc_XY_XY_165ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_165ln], + (instregex "(S|U)ABDLv.*")>; +def KryoWrite_3cyc_X_noRSV_154ln : + SchedWriteRes<[KryoUnitX]> { +let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_X_noRSV_154ln], + (instregex "(S|U)ADALP(v8i8|v4i16|v2i32)_v.*")>; +def KryoWrite_3cyc_X_X_155ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_X_X_155ln], + (instregex "(S|U)ADALP(v16i8|v8i16|v4i32)_v.*")>; +def KryoWrite_2cyc_XY_XY_151ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_151ln], + (instregex "(S|U)(ADD|SUB)Lv.*")>; +def KryoWrite_2cyc_XY_noRSV_148ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_148ln], + (instregex "((S|U)ADDLP|ABS)(v2i32|v4i16|v8i8)(_v.*)?")>; +def KryoWrite_2cyc_XY_XY_150ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_150ln], + (instregex "((S|U)ADDLP|ABS)(v2i64|v4i32|v8i16|v16i8)(_v.*)?")>; +def KryoWrite_3cyc_XY_XY_XY_noRSV_179ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_XY_noRSV_179ln], + (instrs SADDLVv4i32v, UADDLVv4i32v)>; +def KryoWrite_5cyc_XY_XY_XY_noRSV_180ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY]> { + let Latency = 5; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_5cyc_XY_XY_XY_noRSV_180ln], + (instrs SADDLVv8i16v, UADDLVv8i16v)>; +def KryoWrite_6cyc_XY_XY_X_noRSV_181ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitX]> { + let Latency = 6; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_6cyc_XY_XY_X_noRSV_181ln], + (instrs SADDLVv16i8v, UADDLVv16i8v)>; +def KryoWrite_3cyc_XY_noRSV_158ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_158ln], + (instrs SADDLVv4i16v, UADDLVv4i16v, ADDVv4i16v)>; +def KryoWrite_4cyc_X_noRSV_169ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_169ln], + (instrs SADDLVv8i8v, UADDLVv8i8v, ADDVv8i8v)>; +def KryoWrite_2cyc_XY_XY_XY_XY_176ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_XY_XY_176ln], + (instregex "(S|U)(ADDW|SUBW)v.*")>; +def KryoWrite_4cyc_X_noRSV_40ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_40ln], + (instregex "(S|U)CVTFS(W|X)(D|S)ri")>; +def KryoWrite_4cyc_X_noRSV_97ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_97ln], + (instregex "(S|U)CVTFU(W|X)(D|S)ri")>; +def KryoWrite_4cyc_X_noRSV_110ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_110ln], + (instregex "(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>; +def KryoWrite_4cyc_X_X_114ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_X_114ln], + (instregex "(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>; +def KryoWrite_1cyc_XA_Y_98ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XA_Y_98ln], + (instregex "(S|U)DIV(_Int)?(W|X)r")>; +def KryoWrite_2cyc_XY_XY_152ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_152ln], + (instregex "(S|U)H(ADD|SUB)(v16i8|v8i16|v4i32)")>; +def KryoWrite_2cyc_XY_noRSV_149ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_149ln], + (instregex "((S|U)H(ADD|SUB)|ADDP)(v8i8|v4i16|v2i32)")>; +def KryoWrite_4cyc_X_70ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_X_70ln], + (instregex "(S|U)(MADDL|MSUBL)rrr")>; +def KryoWrite_4cyc_X_X_191ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_X_191ln], + (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; +def KryoWrite_1cyc_XY_195ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_195ln], + (instregex "(S|U)MOVv.*")>; +def KryoWrite_5cyc_X_71ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_5cyc_X_71ln], + (instrs SMULHrr, UMULHrr)>; +def KryoWrite_3cyc_XY_noRSV_186ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_186ln], + (instregex "^(S|U)QADD(v8i8|v4i16|v2i32)")>; +def KryoWrite_3cyc_XY_XY_187ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_187ln], + (instregex "^(S|U)QADD(v16i8|v8i16|v4i32|v2i64)")>; +def KryoWrite_3cyc_XY_noRSV_69ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_69ln], + (instregex "(S|U|SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64)")>; +def KryoWrite_3cyc_XY_noRSV_248ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_248ln], + (instregex "(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>; +def KryoWrite_3cyc_XY_XY_250ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_250ln], + (instregex "(S|U)(QSHLU?|RSHR)(v16i8|v8i16|v4i32|v2i64)_shift$")>; +def KryoWrite_3cyc_XY_noRSV_246ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_246ln], + (instregex "(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32)$")>; +def KryoWrite_3cyc_XY_XY_251ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_251ln], + (instregex "(S|U)(QSHL|RSHL|QRSHL)(v16i8|v8i16|v4i32|v2i64)$")>; +def KryoWrite_6cyc_XY_X_238ln : + SchedWriteRes<[KryoUnitXY, KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_XY_X_238ln], + (instregex "((S|U)QR?SHRN|SQR?SHRUN)(v16i8|v8i16|v4i32)_shift$")>; +def KryoWrite_3cyc_XY_noRSV_249ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_249ln], + (instregex "((S|U)QR?SHRN|SQR?SHRUN)(s|h|b)?")>; +def KryoWrite_6cyc_XY_X_noRSV_252ln : + SchedWriteRes<[KryoUnitXY, KryoUnitX]> { + let Latency = 6; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_6cyc_XY_X_noRSV_252ln], + (instregex "((S|U)QR?SHRN|SQR?SHRUN)(v8i8|v4i16|v2i32)_shift?")>; +def KryoWrite_3cyc_XY_noRSV_161ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_161ln], + (instregex "(S|U)QSUB(v8i8|v4i16|v2i32|v1i64|v1i32|v1i16|v1i8)")>; +def KryoWrite_3cyc_XY_noRSV_163ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_163ln], + (instregex "(S|U)QXTU?N(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)")>; +def KryoWrite_3cyc_XY_noRSV_162ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_162ln], + (instregex "(S|U)QXTU?N(v1i8|v1i16|v1i32)")>; +def KryoWrite_3cyc_XY_noRSV_247ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_247ln], + (instregex "(S|U)RSHR(d|(v8i8|v4i16|v2i32)_shift)$")>; +def KryoWrite_2cyc_XY_noRSV_239ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_239ln], + (instregex "(S|U)SHL(d|v8i8|v4i16|v2i32|v1i64)$")>; +def KryoWrite_2cyc_XY_XY_243ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_243ln], + (instregex "(S|U)SHL(v16i8|v8i16|v4i32|v2i64)$")>; +def KryoWrite_2cyc_XY_XY_241ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_241ln], + (instregex "(S|U)?SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>; +def KryoWrite_2cyc_XY_noRSV_240ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_240ln], + (instregex "((S|U)SHR|SHL)(d|(v8i8|v4i16|v2i32)_shift)$")>; +def KryoWrite_2cyc_XY_XY_242ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_242ln], + (instregex "((S|U)SHR|SHL)(v16i8|v8i16|v4i32|v2i64)_shift$")>; +def KryoWrite_2cyc_XY_XY_183ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_183ln], + (instregex "(S|U)(MAX|MIN)P?(v16i8|v8i16|v4i32)")>; +def KryoWrite_2cyc_XY_noRSV_182ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_182ln], + (instregex "(S|U)(MAX|MIN)P?(v8i8|v4i16|v2i32)")>; +def KryoWrite_3cyc_XY_noRSV_184ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_184ln], + (instregex "(S|U)(MAX|MIN)V(v4i16v|v8i8v|v4i32)")>; +def KryoWrite_4cyc_X_noRSV_185ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_185ln], + (instregex "(S|U)(MAX|MIN)V(v16i8v|v8i16v)")>; +def KryoWrite_2cyc_XY_noRSV_67ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_67ln], + (instrs ABSv1i64)>; +def KryoWrite_1cyc_XY_63ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_63ln, ReadI, ReadI], + (instregex "ADC.*")>; +def KryoWrite_1cyc_XY_63_1ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_63_1ln], + (instregex "ADR.*")>; +def KryoWrite_1cyc_XY_62ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_62ln, ReadI], + (instregex "ADDS?(W|X)ri")>; +def KryoWrite_2cyc_XY_XY_64ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_64ln, ReadI, ReadI], + (instregex "ADDS?(W|X)r(r|s|x)(64)?")>; +def KryoWrite_1cyc_XY_noRSV_65ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_65ln], + (instrs ADDv1i64)>; +def KryoWrite_1cyc_XY_noRSV_144ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_144ln], + (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; +def KryoWrite_1cyc_XY_XY_146ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_146ln], + (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; +def KryoWrite_4cyc_XY_X_noRSV_171ln : + SchedWriteRes<[KryoUnitXY, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_4cyc_XY_X_noRSV_171ln], + (instregex "(ADD|SUB)HNv.*")>; +def KryoWrite_1cyc_XY_noRSV_66ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_66ln], + (instrs ADDPv2i64p)>; +def KryoWrite_2cyc_XY_XY_153ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_153ln], + (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; +def KryoWrite_3cyc_XY_XY_noRSV_170ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_noRSV_170ln], + (instrs ADDVv4i32v)>; +def KryoWrite_4cyc_XY_XY_noRSV_173ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 4; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_4cyc_XY_XY_noRSV_173ln], + (instrs ADDVv8i16v)>; +def KryoWrite_5cyc_XY_X_noRSV_174ln : + SchedWriteRes<[KryoUnitXY, KryoUnitX]> { + let Latency = 5; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_5cyc_XY_X_noRSV_174ln], + (instrs ADDVv16i8v)>; +def KryoWrite_3cyc_XY_XY_X_X_27ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_X_X_27ln], + (instrs AESDrr, AESErr)>; +def KryoWrite_2cyc_X_X_22ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_X_X_22ln], + (instrs AESIMCrr, AESMCrr)>; +def KryoWrite_1cyc_XY_noRSV_76ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_76ln], + (instregex "((AND|ORN|EOR|EON)S?(Wr[rsi]|v8i8|v4i16|v2i32)|(ORR|BIC)S?(Wr[rs]|v8i8|v4i16|v2i32))")>; +def KryoWrite_1cyc_XY_XY_79ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_79ln], + (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; +def KryoWrite_1cyc_X_72ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_X_72ln], + (instregex "(S|U)?BFM.*")>; +def KryoWrite_1cyc_XY_noRSV_77ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_77ln], + (instregex "(BIC|ORR)S?Wri")>; +def KryoWrite_1cyc_XY_XY_78ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_78ln], + (instregex "(BIC|ORR)S?Xri")>; +def KryoWrite_1cyc_X_noRSV_74ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_noRSV_74ln], + (instrs BIFv8i8, BITv8i8, BSLv8i8)>; +def KryoWrite_1cyc_X_X_75ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_X_75ln], + (instrs BIFv16i8, BITv16i8, BSLv16i8)>; +def KryoWrite_0cyc_noRSV_11ln : + SchedWriteRes<[]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_noRSV_11ln], + (instrs BRK, DCPS1, DCPS2, DCPS3, HLT, HVC, ISB, HINT, SMC, SVC)>; +def KryoWrite_0cyc_XY_16ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_XY_16ln, ReadI], + (instregex "(CCMN|CCMP)(W|X)i")>; +def KryoWrite_0cyc_XY_16_1ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_XY_16_1ln, ReadI, ReadI], + (instregex "(CCMN|CCMP)(W|X)r")>; +def KryoWrite_2cyc_XY_3ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_2cyc_XY_3ln, ReadI], + (instregex "(CLS|CLZ)(W|X)r")>; +def KryoWrite_2cyc_XY_noRSV_7ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_7ln], + (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; +def KryoWrite_2cyc_XY_XY_8ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_8ln], + (instregex "(CLS|CLZ|CNT)(v2i32|v4i16|v8i8)")>; +def KryoWrite_2cyc_XY_noRSV_80ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_80ln], + (instregex "CM(EQ|GE|HS|GT|HI|TST)(v8i8|v4i16|v2i32|v1i64)$")>; +def KryoWrite_2cyc_XY_XY_83ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_83ln], + (instregex "CM(EQ|GE|HS|GT|HI|TST)(v16i8|v8i16|v4i32|v2i64)$")>; +def KryoWrite_2cyc_XY_noRSV_81ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_81ln], + (instregex "CM(EQ|LE|GE|GT|LT)(v8i8|v4i16|v2i32|v1i64)rz$")>; +def KryoWrite_2cyc_XY_XY_82ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_82ln], + (instregex "CM(EQ|LE|GE|GT|LT)(v16i8|v8i16|v4i32|v2i64)rz$")>; +def KryoWrite_3cyc_XY_4ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_XY_4ln, ReadI, ReadISReg], + (instregex "CRC32.*")>; +def KryoWrite_1cyc_XY_20ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_20ln, ReadI, ReadI], + (instregex "CSEL(W|X)r")>; +def KryoWrite_1cyc_X_17ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_X_17ln, ReadI, ReadI], + (instregex "(CSINC|CSNEG)(W|X)r")>; +def KryoWrite_1cyc_XY_18ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_18ln, ReadI, ReadI], + (instregex "(CSINV)(W|X)r")>; +def KryoWrite_3cyc_LS_X_13ln : + SchedWriteRes<[KryoUnitLS, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_X_13ln], + (instrs DRPS)>; +def KryoWrite_0cyc_LS_10ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_LS_10ln], + (instrs DSB, DMB, CLREX)>; +def KryoWrite_1cyc_X_noRSV_196ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_noRSV_196ln], + (instregex "DUP(v8i8|v4i16|v2i32)(gpr|lane)")>; +def KryoWrite_1cyc_X_X_197ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_X_197ln], + (instregex "DUP(v16i8|v8i16|v4i32|v2i64)(gpr|lane)")>; +def KryoWrite_3cyc_LS_LS_X_15ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_X_15ln], + (instrs ERET)>; +def KryoWrite_1cyc_X_noRSV_207ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_noRSV_207ln], + (instrs EXTv8i8)>; +def KryoWrite_1cyc_X_X_212ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_X_212ln], + (instrs EXTv16i8)>; +def KryoWrite_2cyc_XY_X_136ln : + SchedWriteRes<[KryoUnitXY, KryoUnitX]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_X_136ln], + (instrs EXTRWrri, EXTRXrri)>; +def KryoWrite_2cyc_XY_noRSV_35ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_35ln], + (instregex "F(MAX|MIN)(NM)?P?(D|S)rr")>; +def KryoWrite_2cyc_XY_XY_106ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_106ln], + (instregex "(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2i64p|v2f64|v4f32)")>; +def KryoWrite_2cyc_XY_noRSV_104ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_104ln], + (instregex "(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f32|v2i32p)")>; +def KryoWrite_3cyc_XY_noRSV_107ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_107ln], + (instregex "F(MAX|MIN)(NM)?Vv4i32v")>; +def KryoWrite_3cyc_XY_noRSV_101ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_101ln], + (instregex "FABD(32|64|v2f32)")>; +def KryoWrite_3cyc_XY_XY_103ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_103ln], + (instregex "(FABD|FADD|FSUB|FADDP)(v4f32|v2f64)")>; +def KryoWrite_1cyc_XY_noRSV_48ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_48ln], + (instregex "F(ABS|NEG)(D|S)r")>; +def KryoWrite_1cyc_XY_noRSV_124ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_124ln], + (instregex "F(ABS|NEG)v2f32")>; +def KryoWrite_1cyc_XY_XY_125ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_125ln], + (instregex "F(ABS|NEG)(v2f64|v4f32)")>; +def KryoWrite_2cyc_XY_noRSV_33ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_33ln], + (instregex "(FAC(GE|GT)|FCM(EQ|GE|GT))(32|64)")>; +def KryoWrite_3cyc_XY_noRSV_30ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_30ln], + (instregex "(FADD|FSUB)(D|S)rr")>; +def KryoWrite_3cyc_XY_noRSV_100ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_100ln], + (instregex "(FADD|FSUB|FADDP)v2f32")>; +def KryoWrite_3cyc_XY_noRSV_29ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_29ln], + (instregex "FADDP(v2i32p|v2i64p)")>; +def KryoWrite_0cyc_XY_31ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_XY_31ln], + (instregex "FCCMPE?(D|S)rr")>; +def KryoWrite_2cyc_XY_noRSV_34ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_34ln], + (instregex "FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64)rz")>; +def KryoWrite_2cyc_XY_XY_36ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_36ln], + (instregex "FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz")>; +def KryoWrite_2cyc_XY_noRSV_105ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_105ln], + (instregex "FCM(EQ|LE|GE|GT|LT)v2i32rz")>; +def KryoWrite_0cyc_XY_32ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_XY_32ln], + (instregex "FCMPE?(D|S)r(r|i)")>; +def KryoWrite_1cyc_XY_noRSV_49ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_49ln], + (instrs FCSELDrrr, FCSELSrrr)>; +def KryoWrite_4cyc_X_noRSV_41ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_41ln], + (instrs FCVTDHr, FCVTDSr, FCVTHDr, FCVTHSr, FCVTSDr, FCVTSHr)>; +def KryoWrite_4cyc_X_38ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_X_38ln], + (instregex "FCVT(((A|N|M|P)(S|U)(S|U)|Z(S|U)_Int(S|U))(W|X)(D|S)ri?|Z(S|U)(d|s))$")>; +def KryoWrite_4cyc_X_noRSV_113ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_113ln], + (instregex "FCVT((A|N|M|P)(S|U)|Z(S|U)_Int)(v1i32|v1i64|v2f32)$")>; +def KryoWrite_4cyc_X_X_117ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_X_117ln], + (instregex "FCVT((A|N|M|P)(S|U)|Z(S|U)_Int)(v4f32|v2f64)$")>; +def KryoWrite_5cyc_X_X_XY_noRSV_119ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitXY]> { + let Latency = 5; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_5cyc_X_X_XY_noRSV_119ln], + (instregex "FCVTX?N(v2f32|v4f32|v2i32|v4i16|v4i32|v8i16)$")>; +def KryoWrite_4cyc_X_X_116ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_X_116ln], + (instregex "FCVTL(v2i32|v4i16|v4i32|v8i16)$")>; +def KryoWrite_4cyc_X_noRSV_112ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_112ln], + (instrs FCVTXNv1i64)>; +def KryoWrite_4cyc_X_37ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_X_37ln], + (instregex "FCVTZ(S|U)(S|U)(W|X)(D|S)ri?$")>; +def KryoWrite_4cyc_X_noRSV_111ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_111ln], + (instregex "FCVTZ(S|U)(v2f32|v1i32|v1i64|v2i32(_shift)?)$")>; +def KryoWrite_4cyc_X_X_115ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_X_115ln], + (instregex "FCVTZ(S|U)(v2f64|v4f32|(v2i64|v4i32)(_shift)?)$")>; +def KryoWrite_10cyc_XA_Y_noRSV_43ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY]> { + let Latency = 10; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_10cyc_XA_Y_noRSV_43ln], + (instrs FDIVSrr)>; +def KryoWrite_14cyc_XA_Y_noRSV_43ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY]> { + let Latency = 14; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_14cyc_XA_Y_noRSV_43ln], + (instrs FDIVDrr)>; +def KryoWrite_10cyc_XA_Y_noRSV_121ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY]> { + let Latency = 10; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_10cyc_XA_Y_noRSV_121ln], + (instrs FDIVv2f32)>; +def KryoWrite_14cyc_XA_Y_XA_Y_123ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> { + let Latency = 14; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_14cyc_XA_Y_XA_Y_123ln], + (instrs FDIVv2f64, FDIVv4f32)>; +def KryoWrite_5cyc_X_noRSV_55ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_noRSV_55ln], + (instregex "FN?M(ADD|SUB)Srrr")>; +def KryoWrite_6cyc_X_noRSV_57ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_X_noRSV_57ln], + (instregex "FN?M(ADD|SUB)Drrr")>; +def KryoWrite_5cyc_X_noRSV_51ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_noRSV_51ln], + (instrs FMLAv2f32, FMLSv2f32, FMLAv1i32_indexed, FMLSv1i32_indexed)>; +def KryoWrite_5cyc_X_X_56ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_X_56ln], + (instrs FMLAv4f32, FMLSv4f32)>; +def KryoWrite_6cyc_X_X_61ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_X_X_61ln], + (instrs FMLAv2f64, FMLSv2f64)>; +def KryoWrite_5cyc_X_noRSV_128ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_noRSV_128ln], + (instrs FMLAv2i32_indexed, FMLSv2i32_indexed)>; +def KryoWrite_5cyc_X_X_131ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_X_131ln], + (instrs FMLAv4i32_indexed, FMLSv4i32_indexed)>; +def KryoWrite_6cyc_X_X_134ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_X_X_134ln], + (instrs FMLAv2i64_indexed, FMLSv2i64_indexed)>; +def KryoWrite_6cyc_X_noRSV_60ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_X_noRSV_60ln], + (instrs FMLAv1i64_indexed, FMLSv1i64_indexed, FMULv1i64_indexed, FMULXv1i64_indexed)>; +def KryoWrite_1cyc_XY_45ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_45ln], + (instregex "FMOV(XDHigh|DXHigh|DX)r")>; +def KryoWrite_1cyc_XY_noRSV_47ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_47ln], + (instregex "FMOV(Di|Dr|Si|Sr|SWr|WSr|XDr|v.*_ns)")>; +def KryoWrite_5cyc_X_noRSV_53ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_noRSV_53ln], + (instrs FMULv1i32_indexed, FMULXv1i32_indexed)>; +def KryoWrite_5cyc_X_noRSV_127ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_noRSV_127ln], + (instrs FMULv2f32, FMULXv2f32, FMULv2i32_indexed, FMULXv2i32_indexed)>; +def KryoWrite_5cyc_X_X_130ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_X_130ln], + (instrs FMULv4f32, FMULXv4f32, FMULv4i32_indexed, FMULXv4i32_indexed)>; +def KryoWrite_6cyc_X_X_133ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_X_X_133ln], + (instrs FMULv2f64, FMULXv2f64, FMULv2i64_indexed, FMULXv2i64_indexed)>; +def KryoWrite_5cyc_X_noRSV_54ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_noRSV_54ln], + (instrs FMULSrr, FNMULSrr, FMULX32)>; +def KryoWrite_6cyc_X_noRSV_59ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_X_noRSV_59ln], + (instrs FMULDrr, FNMULDrr, FMULX64)>; +def KryoWrite_3cyc_XY_noRSV_28ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_28ln], + (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64 )>; +def KryoWrite_3cyc_XY_noRSV_99ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_99ln], + (instrs FRECPEv2f32, FRSQRTEv2f32)>; +def KryoWrite_3cyc_XY_XY_102ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_102ln], + (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>; +def KryoWrite_5cyc_X_noRSV_52ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_noRSV_52ln], + (instrs FRECPS32, FRSQRTS32)>; +def KryoWrite_6cyc_X_noRSV_58ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_X_noRSV_58ln], + (instrs FRECPS64, FRSQRTS64)>; +def KryoWrite_5cyc_X_noRSV_126ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_noRSV_126ln], + (instrs FRECPSv2f32, FRSQRTSv2f32)>; +def KryoWrite_5cyc_X_X_129ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_X_129ln], + (instrs FRECPSv4f32, FRSQRTSv4f32)>; +def KryoWrite_6cyc_X_X_132ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_X_X_132ln], + (instrs FRECPSv2f64, FRSQRTSv2f64)>; +def KryoWrite_3cyc_XY_noRSV_50ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_50ln], + (instrs FRECPXv1i32, FRECPXv1i64)>; +def KryoWrite_2cyc_XY_noRSV_39ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_39ln], + (instregex "FRINT(A|I|M|N|P|X|Z)(S|D)r")>; +def KryoWrite_2cyc_XY_noRSV_108ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_108ln], + (instregex "FRINT(A|I|M|N|P|X|Z)v2f32")>; +def KryoWrite_2cyc_XY_XY_109ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_109ln], + (instregex "FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)")>; +def KryoWrite_12cyc_XA_Y_noRSV_42ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY]> { + let Latency = 12; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_12cyc_XA_Y_noRSV_42ln], + (instrs FSQRTSr)>; +def KryoWrite_21cyc_XA_Y_noRSV_42ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY]> { + let Latency = 21; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_21cyc_XA_Y_noRSV_42ln], + (instrs FSQRTDr)>; +def KryoWrite_12cyc_XA_Y_noRSV_120ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY]> { + let Latency = 12; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_12cyc_XA_Y_noRSV_120ln], + (instrs FSQRTv2f32)>; +def KryoWrite_21cyc_XA_Y_XA_Y_122ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> { + let Latency = 21; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_21cyc_XA_Y_XA_Y_122ln], + (instrs FSQRTv4f32)>; +def KryoWrite_36cyc_XA_Y_XA_Y_122ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> { + let Latency = 36; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_36cyc_XA_Y_XA_Y_122ln], + (instrs FSQRTv2f64)>; +def KryoWrite_1cyc_X_201ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_X_201ln], + (instregex "INSv.*")>; +def KryoWrite_3cyc_LS_255ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_255ln], + (instregex "LD1(One(v16b|v8h|v4s|v2d)|i64)$")>; +def KryoWrite_4cyc_LS_X_270ln : + SchedWriteRes<[KryoUnitLS, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_LS_X_270ln], + (instregex "LD1(i8|i16|i32)$")>; +def KryoWrite_3cyc_LS_noRSV_285ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_noRSV_285ln], + (instregex "LD1One(v8b|v4h|v2s|v1d)$")>; +def KryoWrite_3cyc_LS_XY_289ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_289ln, WriteAdr], + (instregex "LD1(One(v16b|v8h|v4s|v2d)|i64)_POST$")>; +def KryoWrite_4cyc_LS_XY_X_298ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_4cyc_LS_XY_X_298ln, WriteAdr], + (instregex "LD1(i8|i16|i32)_POST$")>; +def KryoWrite_3cyc_LS_LS_LS_308ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_LS_308ln], + (instregex "LD1Three(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_3cyc_LS_XY_noRSV_317ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_317ln, WriteAdr], + (instregex "LD1One(v8b|v4h|v2s|v1d)_POST$")>; +def KryoWrite_3cyc_LS_LS_LS_LS_328ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_LS_LS_328ln, WriteAdr], + (instregex "LD1Four(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_3cyc_LS_XY_LS_LS_332ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_332ln, WriteAdr], + (instregex "LD1Three(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_348ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_348ln], + (instregex "LD1Three(v8b|v4h|v2s|v1d)$")>; +def KryoWrite_3cyc_LS_XY_LS_LS_LS_351ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_LS_351ln], + (instregex "LD1Four(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_noRSV_358ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_noRSV_358ln], + (instregex "LD1Four(v8b|v4h|v2s|v1d)$")>; +def KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_360ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_360ln, WriteAdr], + (instregex "LD1Three(v8b|v4h|v2s|v1d)_POST$")>; +def KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_noRSV_368ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 7; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_noRSV_368ln, WriteAdr], + (instregex "LD1Four(v8b|v4h|v2s|v1d)_POST$")>; +def KryoWrite_3cyc_LS_LS_281ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_281ln], + (instregex "LD(1|2)Two(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_3cyc_LS_noRSV_noRSV_311ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_noRSV_noRSV_311ln], + (instregex "LD(1|2)Two(v8b|v4h|v2s|v1d)$")>; +def KryoWrite_3cyc_LS_XY_LS_313ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_313ln, WriteAdr], + (instregex "LD(1|2)Two(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_3cyc_LS_XY_noRSV_noRSV_334ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_noRSV_334ln, WriteAdr], + (instregex "LD(1|2)Two(v8b|v4h|v2s|v1d)_POST$")>; +def KryoWrite_3cyc_LS_256ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_256ln], + (instregex "LD1R(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_3cyc_LS_noRSV_286ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_noRSV_286ln], + (instregex "LD1R(v8b|v4h|v2s|v1d)$")>; +def KryoWrite_3cyc_LS_XY_290ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_290ln, WriteAdr], + (instregex "LD1R(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_3cyc_LS_XY_noRSV_318ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_318ln, WriteAdr], + (instregex "LD1R(v8b|v4h|v2s|v1d)_POST$")>; +def KryoWrite_3cyc_LS_257ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_257ln], + (instregex "LD2i64$")>; +def KryoWrite_3cyc_LS_XY_291ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_291ln, WriteAdr], + (instregex "LD2i64_POST$")>; +def KryoWrite_4cyc_LS_X_X_296ln : + SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_4cyc_LS_X_X_296ln], + (instregex "LD2(i8|i16|i32)$")>; +def KryoWrite_4cyc_LS_XY_X_X_321ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_4cyc_LS_XY_X_X_321ln, WriteAdr], + (instregex "LD2(i8|i16|i32)_POST$")>; +def KryoWrite_3cyc_LS_LS_282ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_282ln], + (instregex "LD2R(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_3cyc_LS_noRSV_noRSV_312ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_noRSV_noRSV_312ln], + (instregex "LD2R(v8b|v4h|v2s|v1d)$")>; +def KryoWrite_3cyc_LS_XY_LS_314ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_314ln, WriteAdr], + (instregex "LD2R(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_3cyc_LS_XY_noRSV_noRSV_335ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_noRSV_335ln, WriteAdr], + (instregex "LD2R(v8b|v4h|v2s|v1d)_POST$")>; +def KryoWrite_3cyc_LS_LS_283ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_283ln], + (instregex "LD3i64$")>; +def KryoWrite_3cyc_LS_LS_LS_309ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_LS_309ln], + (instregex "LD3Threev2d$")>; +def KryoWrite_3cyc_LS_XY_LS_315ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_315ln, WriteAdr], + (instregex "LD3i64_POST$")>; +def KryoWrite_4cyc_LS_X_X_X_320ln : + SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_4cyc_LS_X_X_X_320ln], + (instregex "LD3(i8|i16|i32)$")>; +def KryoWrite_3cyc_LS_XY_LS_LS_331ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_331ln, WriteAdr], + (instregex "LD3Threev2d_POST$")>; +def KryoWrite_4cyc_LS_XY_X_X_X_338ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_4cyc_LS_XY_X_X_X_338ln, WriteAdr], + (instregex "LD3(i8|i16|i32)_POST$")>; +def KryoWrite_4cyc_LS_LS_X_X_X_noRSV_noRSV_noRSV_373ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 8; +} +def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_noRSV_noRSV_noRSV_373ln], + (instregex "LD3Three(v8b|v4h|v2s)$")>; +def KryoWrite_4cyc_LS_XY_LS_X_X_X_noRSV_noRSV_noRSV_380ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitX, KryoUnitX, + KryoUnitX]> { + let Latency = 4; let NumMicroOps = 9; +} +def : InstRW<[KryoWrite_4cyc_LS_XY_LS_X_X_X_noRSV_noRSV_noRSV_380ln, WriteAdr], + (instregex "LD3Three(v8b|v4h|v2s)_POST$")>; +def KryoWrite_4cyc_LS_LS_X_X_X_LS_LS_X_X_X_381ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 10; +} +def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_LS_LS_X_X_X_381ln], + (instregex "LD3Three(v16b|v8h|v4s)$")>; +def KryoWrite_4cyc_LS_LS_X_X_X_LS_XY_LS_X_X_X_383ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitX, KryoUnitX, + KryoUnitX]> { + let Latency = 4; let NumMicroOps = 11; +} +def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_LS_XY_LS_X_X_X_383ln, WriteAdr], + (instregex "LD3Three(v16b|v8h|v4s)_POST$")>; +def KryoWrite_3cyc_LS_LS_LS_310ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_LS_310ln], + (instregex "LD3R(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_3cyc_LS_XY_LS_LS_333ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_333ln, WriteAdr], + (instregex "LD3R(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_349ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_349ln], + (instregex "LD3R(v8b|v4h|v2s|v1d)$")>; +def KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_361ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_361ln, WriteAdr], + (instregex "LD3R(v8b|v4h|v2s|v1d)_POST$")>; +def KryoWrite_3cyc_LS_LS_284ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_284ln], + (instregex "LD4i64$")>; +def KryoWrite_3cyc_LS_XY_LS_316ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_316ln, WriteAdr], + (instregex "LD4i64_POST$")>; +def KryoWrite_3cyc_LS_LS_LS_LS_329ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_LS_LS_329ln], + (instregex "LD4Four(v2d)$")>; +def KryoWrite_4cyc_LS_X_X_X_X_337ln : + SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_4cyc_LS_X_X_X_X_337ln], + (instregex "LD4(i8|i16|i32)$")>; +def KryoWrite_3cyc_LS_XY_LS_LS_LS_350ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_LS_350ln, WriteAdr], + (instregex "LD4Four(v2d)_POST$")>; +def KryoWrite_4cyc_LS_XY_X_X_X_X_355ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX]> { + let Latency = 4; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_4cyc_LS_XY_X_X_X_X_355ln, WriteAdr], + (instregex "LD4(i8|i16|i32)_POST$")>; +def KryoWrite_4cyc_LS_LS_X_X_X_X_noRSV_noRSV_noRSV_noRSV_382ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX]> { + let Latency = 4; let NumMicroOps = 10; +} +def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_X_noRSV_noRSV_noRSV_noRSV_382ln], + (instregex "LD4Four(v8b|v4h|v2s)$")>; +def KryoWrite_4cyc_LS_XY_LS_X_X_X_X_noRSV_noRSV_noRSV_noRSV_384ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 11; +} +def : InstRW<[KryoWrite_4cyc_LS_XY_LS_X_X_X_X_noRSV_noRSV_noRSV_noRSV_384ln, WriteAdr], + (instregex "LD4Four(v8b|v4h|v2s)_POST$")>; +def KryoWrite_4cyc_LS_LS_X_X_X_X_LS_LS_X_X_X_X_386ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 12; +} +def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_X_LS_LS_X_X_X_X_386ln], + (instregex "LD4Four(v16b|v8h|v4s)$")>; +def KryoWrite_4cyc_LS_LS_X_X_X_X_LS_XY_LS_X_X_X_X_389ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitX, + KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 13; +} +def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_X_LS_XY_LS_X_X_X_X_389ln, WriteAdr], + (instregex "LD4Four(v16b|v8h|v4s)_POST$")>; +def KryoWrite_3cyc_LS_LS_LS_LS_330ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_LS_LS_330ln], + (instregex "LD4R(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_3cyc_LS_XY_LS_LS_LS_352ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_LS_352ln, WriteAdr], + (instregex "LD4R(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_noRSV_359ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_noRSV_359ln], + (instregex "LD4R(v8b|v4h|v2s|v1d)$")>; +def KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_noRSV_369ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 7; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_noRSV_369ln, WriteAdr], + (instregex "LD4R(v8b|v4h|v2s|v1d)_POST$")>; +def KryoWrite_3cyc_LS_LS_400ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_400ln], + (instregex "LDAX?R(B|H|W|X)")>; +def : InstRW<[KryoWrite_3cyc_LS_LS_400ln, WriteLDHi], + (instregex "LDAXP(W|X)")>; +def KryoWrite_3cyc_LS_LS_401ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_401ln, WriteLDHi], + (instrs LDNPQi)>; +def KryoWrite_3cyc_LS_noRSV_noRSV_408ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_noRSV_noRSV_408ln, WriteLDHi], + (instrs LDNPDi, LDNPSi)>; +def KryoWrite_3cyc_LS_394ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_394ln, WriteLDHi], + (instrs LDNPWi, LDNPXi)>; +def KryoWrite_3cyc_LS_LS_402ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_402ln, WriteLDHi], + (instrs LDPQi)>; +def KryoWrite_3cyc_LS_noRSV_noRSV_409ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_noRSV_noRSV_409ln, WriteLDHi], + (instrs LDPDi, LDPSi)>; +def KryoWrite_3cyc_LS_XY_LS_410ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_410ln, WriteLDHi, WriteAdr], + (instregex "LDPQ(post|pre)")>; +def KryoWrite_3cyc_LS_XY_noRSV_noRSV_411ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_noRSV_411ln, WriteLDHi, WriteAdr], + (instregex "LDP(D|S)(post|pre)")>; +def KryoWrite_3cyc_LS_393ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_393ln, WriteLDHi], + (instrs LDPWi, LDPXi)>; +def KryoWrite_3cyc_LS_XY_403ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_403ln, WriteLDHi, WriteAdr], + (instregex "LDP(W|X)(post|pre)")>; +def KryoWrite_4cyc_LS_395ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_LS_395ln, WriteLDHi], + (instrs LDPSWi)>; +def KryoWrite_4cyc_LS_XY_405ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_LS_XY_405ln, WriteLDHi, WriteAdr], + (instrs LDPSWpost, LDPSWpre)>; +def KryoWrite_3cyc_LS_264ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_264ln], + (instrs LDRQui, LDRQl)>; +def KryoWrite_4cyc_X_LS_271ln : + SchedWriteRes<[KryoUnitX, KryoUnitLS]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_LS_271ln], + (instrs LDRQroW, LDRQroX)>; +def KryoWrite_3cyc_LS_noRSV_287ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_noRSV_287ln], + (instregex "LDR((D|S)l|(D|S|H|B)ui)")>; +def KryoWrite_3cyc_LS_XY_293ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_293ln, WriteAdr], + (instrs LDRQpost, LDRQpre)>; +def KryoWrite_4cyc_X_LS_noRSV_297ln : + SchedWriteRes<[KryoUnitX, KryoUnitLS]> { + let Latency = 4; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_4cyc_X_LS_noRSV_297ln], + (instregex "LDR(D|S|H|B)ro(W|X)")>; +def KryoWrite_3cyc_LS_XY_noRSV_319ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_319ln, WriteAdr], + (instregex "LDR(D|S|H|B)(post|pre)")>; +def KryoWrite_3cyc_LS_261ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_261ln], + (instregex "LDR(BB|HH|W|X)ui")>; +def KryoWrite_3cyc_LS_XY_292ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_292ln, WriteAdr], + (instregex "LDR(BB|HH|W|X)(post|pre)")>; +def KryoWrite_4cyc_X_LS_272ln : + SchedWriteRes<[KryoUnitX, KryoUnitLS]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_LS_272ln], + (instregex "(LDR(BB|HH|W|X)ro(W|X)|PRFMro(W|X))")>; +def KryoWrite_3cyc_LS_262ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_262ln], + (instrs LDRWl, LDRXl)>; +def KryoWrite_4cyc_LS_268ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_LS_268ln], + (instregex "LDRS(BW|BX|HW|HX|W)ui")>; +def KryoWrite_5cyc_X_LS_273ln : + SchedWriteRes<[KryoUnitX, KryoUnitLS]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_LS_273ln], + (instregex "LDRS(BW|BX|HW|HX|W)ro(W|X)")>; +def KryoWrite_4cyc_LS_XY_294ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_LS_XY_294ln, WriteAdr], + (instregex "LDRS(BW|BX|HW|HX|W)(post|pre)")>; +def KryoWrite_4cyc_LS_269ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_LS_269ln], + (instrs LDRSWl)>; +def KryoWrite_3cyc_LS_260ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_260ln], + (instregex "LDTR(B|H|W|X)i")>; +def KryoWrite_4cyc_LS_267ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_LS_267ln], + (instregex "LDTRS(BW|BX|HW|HX|W)i")>; +def KryoWrite_3cyc_LS_263ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_263ln], + (instrs LDURQi)>; +def KryoWrite_3cyc_LS_noRSV_288ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_noRSV_288ln], + (instregex "LDUR(D|S|H|B)i")>; +def KryoWrite_3cyc_LS_259ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_259ln], + (instregex "LDUR(BB|HH|W|X)i")>; +def KryoWrite_4cyc_LS_266ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_LS_266ln], + (instregex "LDURS(B|H)?(W|X)i")>; +def KryoWrite_3cyc_LS_258ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_258ln, WriteLDHi], + (instregex "LDXP(W|X)")>; +def KryoWrite_3cyc_LS_258_1ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_258_1ln], + (instregex "LDXR(B|H|W|X)")>; +def KryoWrite_2cyc_XY_XY_137ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_137ln], + (instrs LSLVWr, LSLVXr)>; +def KryoWrite_1cyc_XY_135ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_135ln], + (instregex "(LS|AS|RO)RV(W|X)r")>; +def KryoWrite_4cyc_X_84ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_X_84ln], + (instrs MADDWrrr, MSUBWrrr)>; +def KryoWrite_5cyc_X_85ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_5cyc_X_85ln], + (instrs MADDXrrr, MSUBXrrr)>; +def KryoWrite_4cyc_X_noRSV_188ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_188ln], + (instregex "(MLA|MLS|MUL)(v8i8|v4i16|v2i32)(_indexed)?")>; +def KryoWrite_4cyc_X_X_192ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_X_192ln], + (instregex "(MLA|MLS|MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?")>; +def KryoWrite_1cyc_XY_noRSV_198ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_198ln], + (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)")>; +def KryoWrite_1cyc_XY_XY_199ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_199ln], + (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)")>; +def KryoWrite_1cyc_X_89ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_X_89ln], + (instrs MOVKWi, MOVKXi)>; +def KryoWrite_1cyc_XY_91ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_91ln], + (instrs MOVNWi, MOVNXi)>; +def KryoWrite_1cyc_XY_90ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_90ln], + (instrs MOVZWi, MOVZXi)>; +def KryoWrite_2cyc_XY_93ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_2cyc_XY_93ln], + (instrs MRS)>; +def KryoWrite_0cyc_X_87ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_X_87ln], + (instrs MSRpstateImm4)>; +def : InstRW<[KryoWrite_0cyc_X_87ln], + (instrs MSRpstateImm1)>; +def KryoWrite_0cyc_XY_88ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_XY_88ln], + (instrs MSR)>; +def KryoWrite_1cyc_XY_noRSV_143ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_143ln], + (instregex "NEG(v8i8|v4i16|v2i32|v1i64)")>; +def KryoWrite_1cyc_XY_XY_145ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_145ln], + (instregex "NEG(v16i8|v8i16|v4i32|v2i64)")>; +def KryoWrite_1cyc_XY_noRSV_193ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_193ln], + (instrs NOTv8i8)>; +def KryoWrite_1cyc_XY_XY_194ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_194ln], + (instrs NOTv16i8)>; +def KryoWrite_2cyc_XY_noRSV_234ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_234ln], + (instrs PMULv8i8)>; +def KryoWrite_2cyc_XY_XY_236ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_236ln], + (instrs PMULv16i8)>; +def KryoWrite_2cyc_XY_XY_235ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_235ln], + (instrs PMULLv8i8, PMULLv16i8)>; +def KryoWrite_3cyc_XY_XY_237ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_237ln], + (instrs PMULLv1i64, PMULLv2i64)>; +def KryoWrite_0cyc_LS_254ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_LS_254ln], + (instrs PRFMl, PRFMui)>; +def KryoWrite_0cyc_LS_253ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_LS_253ln], + (instrs PRFUMi)>; +def KryoWrite_6cyc_XY_X_noRSV_175ln : + SchedWriteRes<[KryoUnitXY, KryoUnitX]> { + let Latency = 6; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_6cyc_XY_X_noRSV_175ln], + (instregex "R(ADD|SUB)HNv.*")>; +def KryoWrite_2cyc_XY_204ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_2cyc_XY_204ln], + (instrs RBITWr, RBITXr)>; +def KryoWrite_2cyc_XY_noRSV_218ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_218ln], + (instrs RBITv8i8)>; +def KryoWrite_2cyc_XY_XY_219ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_219ln], + (instrs RBITv16i8)>; +def KryoWrite_1cyc_X_202ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_X_202ln], + (instregex "REV(16|32)?(W|X)r")>; +def KryoWrite_1cyc_XY_noRSV_214ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_214ln], + (instregex "REV(16|32|64)(v8i8|v4i16|v2i32)")>; +def KryoWrite_1cyc_XY_XY_216ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_216ln], + (instregex "REV(16|32|64)(v16i8|v8i16|v4i32)")>; +def KryoWrite_3cyc_X_noRSV_244ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_X_noRSV_244ln], + (instregex "S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)")>; +def KryoWrite_3cyc_X_X_245ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_X_X_245ln], + (instregex "S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift")>; +def KryoWrite_1cyc_XY_2ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_2ln, ReadI, ReadI], + (instregex "SBCS?(W|X)r")>; +def KryoWrite_2cyc_XA_XA_XA_24ln : + SchedWriteRes<[KryoUnitXA, KryoUnitXA, KryoUnitXA]> { + let Latency = 2; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_2cyc_XA_XA_XA_24ln], + (instrs SHA1Crrr, SHA1Mrrr, SHA1Prrr)>; +def KryoWrite_1cyc_XY_noRSV_21ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_21ln], + (instrs SHA1Hrr)>; +def KryoWrite_2cyc_X_X_23ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_X_X_23ln], + (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>; +def KryoWrite_4cyc_XA_XA_XA_25ln : + SchedWriteRes<[KryoUnitXA, KryoUnitXA, KryoUnitXA]> { + let Latency = 4; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_4cyc_XA_XA_XA_25ln], + (instrs SHA256Hrrr, SHA256H2rrr)>; +def KryoWrite_3cyc_XY_XY_X_X_26ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_X_X_26ln], + (instrs SHA256SU1rrr)>; +def KryoWrite_4cyc_X_noRSV_189ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_189ln], + (instregex "SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?")>; +def KryoWrite_3cyc_XY_noRSV_68ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_68ln], + (instregex "SQ(ABS|NEG)(v1i8|v1i16|v1i32|v1i64)")>; +def KryoWrite_3cyc_XY_noRSV_157ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_157ln], + (instregex "SQ(ABS|NEG)(v8i8|v4i16|v2i32)")>; +def KryoWrite_3cyc_XY_XY_164ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_164ln], + (instregex "SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)")>; +def KryoWrite_4cyc_X_noRSV_190ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_190ln], + (instregex "SQD(MLAL|MLSL|MULL)(i16|i32)")>; +def KryoWrite_0cyc_LS_Y_274ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_274ln], + (instregex "ST1(One(v8b|v4h|v2s|v1d|v16b|v8h|v4s|v2d)|(i8|i16|i32|i64)|Two(v8b|v4h|v2s|v1d))$")>; +def KryoWrite_1cyc_LS_Y_X_301ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_X_301ln], + (instregex "ST1(One(v8b|v4h|v2s|v1d|v16b|v8h|v4s|v2d)|(i8|i16|i32|i64)|Two(v8b|v4h|v2s|v1d))_POST$")>; +def KryoWrite_1cyc_LS_Y_XY_305ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_305ln], + (instregex "ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_323ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 4; +} +def : InstRW<[WriteAdr, KryoWrite_0cyc_LS_Y_LS_Y_323ln], + (instregex "ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>; +def KryoWrite_1cyc_LS_Y_XY_LS_Y_345ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_1cyc_LS_Y_XY_LS_Y_345ln], + (instregex "ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_356ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitLS, + KryoUnitY]> { + let Latency = 0; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_356ln], + (instregex "ST1Three(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_1cyc_LS_Y_XY_LS_Y_LS_Y_366ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY, + KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 7; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_LS_Y_366ln], + (instregex "ST1Three(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_LS_Y_371ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitLS, + KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 8; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_LS_Y_371ln], + (instregex "ST1Four(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_XY_LS_Y_LS_Y_377ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitXY, + KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 9; +} +def : InstRW<[WriteAdr, KryoWrite_0cyc_LS_Y_LS_Y_XY_LS_Y_LS_Y_377ln], + (instregex "ST1Four(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_0cyc_LS_Y_275ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_275ln], + (instregex "ST2(Two(v8b|v4h|v2s|v1d|v16b|v8h|v4s|v2d)|(i8|i16|i32|i64))$")>; +def KryoWrite_1cyc_LS_Y_XY_306ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_306ln], + (instregex "ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_322ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_322ln], + (instregex "ST2Two(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_1cyc_LS_Y_XY_LS_Y_344ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 5; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_344ln], + (instregex "ST2Two(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_324ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_324ln], + (instregex "ST3(Threev1d|(i8|i16|i32|i64))$")>; +def KryoWrite_1cyc_LS_Y_XY_LS_Y_346ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 5; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_346ln], + (instregex "ST3(Threev1d|(i8|i16|i32|i64))_POST$")>; +def KryoWrite_1cyc_X_X_LS_Y_LS_Y_353ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitLS, + KryoUnitY]> { + let Latency = 1; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_1cyc_X_X_LS_Y_LS_Y_353ln], + (instregex "ST3Three(v8b|v4h|v2s)$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_357ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitLS, + KryoUnitY]> { + let Latency = 0; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_357ln], + (instregex "ST3Threev2d$")>; +def KryoWrite_1cyc_X_X_LS_Y_XY_LS_Y_363ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitXY, + KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 7; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_X_X_LS_Y_XY_LS_Y_363ln], + (instregex "ST3Three(v8b|v4h|v2s)_POST$")>; +def KryoWrite_1cyc_LS_Y_XY_LS_Y_LS_Y_367ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY, + KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 7; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_LS_Y_367ln], + (instregex "ST3Threev2d_POST$")>; +def KryoWrite_1cyc_X_X_LS_Y_LS_Y_X_X_LS_Y_LS_Y_385ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitLS, + KryoUnitY, KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, + KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 12; +} +def : InstRW<[KryoWrite_1cyc_X_X_LS_Y_LS_Y_X_X_LS_Y_LS_Y_385ln], + (instregex "ST3Three(v16b|v8h|v4s)$")>; +def KryoWrite_1cyc_X_X_LS_Y_LS_Y_X_X_LS_Y_XY_LS_Y_388ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitLS, + KryoUnitY, KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, + KryoUnitXY, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 13; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_X_X_LS_Y_LS_Y_X_X_LS_Y_XY_LS_Y_388ln], + (instregex "ST3Three(v16b|v8h|v4s)_POST$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_325ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_325ln], + (instregex "ST4(Fourv1d|(i8|i16|i32|i64))$")>; +def KryoWrite_1cyc_LS_Y_XY_LS_Y_347ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 5; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_347ln], + (instregex "ST4(Fourv1d|(i8|i16|i32|i64))_POST$")>; +def KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_370ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX, + KryoUnitX, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 8; +} +def : InstRW<[KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_370ln], + (instregex "ST4Four(v8b|v4h|v2s)$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_LS_Y_372ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitLS, + KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 8; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_LS_Y_372ln], + (instregex "ST4Fourv2d$")>; +def KryoWrite_1cyc_X_X_LS_Y_XY_X_X_LS_Y_375ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitXY, + KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 9; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_X_X_LS_Y_XY_X_X_LS_Y_375ln], + (instregex "ST4Four(v8b|v4h|v2s)_POST$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_XY_LS_Y_LS_Y_379ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitXY, + KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 9; +} +def : InstRW<[WriteAdr, KryoWrite_0cyc_LS_Y_LS_Y_XY_LS_Y_LS_Y_379ln], + (instregex "ST4Fourv2d_POST$")>; +def KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_390ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX, + KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX, KryoUnitX, + KryoUnitLS, KryoUnitY, KryoUnitX, KryoUnitX, KryoUnitLS, + KryoUnitY]> { + let Latency = 1; let NumMicroOps = 16; +} +def : InstRW<[KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_390ln], + (instregex "ST4Four(v16b|v8h|v4s)$")>; +def KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_XY_X_X_LS_Y_392ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX, + KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX, KryoUnitX, + KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitX, KryoUnitX, + KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 17; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_XY_X_X_LS_Y_392ln], + (instregex "ST4Four(v16b|v8h|v4s)_POST$")>; +def KryoWrite_0cyc_LS_LS_Y_299ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_0cyc_LS_LS_Y_299ln], + (instregex "STLR(B|H|W|X)")>; +def KryoWrite_3cyc_LS_LS_Y_307ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitY]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_Y_307ln], + (instregex "STLX(P(W|X)|R(B|H|W|X))")>; +def KryoWrite_0cyc_LS_Y_276ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_276ln], + (instrs STNPDi, STNPSi)>; +def KryoWrite_0cyc_LS_Y_LS_Y_326ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_326ln], + (instrs STNPQi)>; +def KryoWrite_0cyc_LS_Y_280ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_280ln], + (instrs STNPWi, STNPXi)>; +def KryoWrite_0cyc_LS_Y_277ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_277ln], + (instregex "STP(D|S)i")>; +def KryoWrite_1cyc_LS_Y_X_303ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_X_303ln], + (instregex "STP(D|S)(post|pre)")>; +def KryoWrite_0cyc_LS_Y_LS_Y_327ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_327ln], + (instrs STPQi)>; +def KryoWrite_1cyc_LS_Y_X_LS_Y_343ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitX, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 5; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_X_LS_Y_343ln], + (instrs STPQpost, STPQpre)>; +def KryoWrite_0cyc_LS_Y_279ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_279ln], + (instregex "STP(W|X)i")>; +def KryoWrite_1cyc_LS_X_Y_300ln : + SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_X_Y_300ln], + (instregex "STP(W|X)(post|pre)")>; +def KryoWrite_0cyc_LS_Y_278ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_278ln], + (instregex "STR(Q|D|S|H|B)ui")>; +def KryoWrite_1cyc_X_LS_Y_295ln : + SchedWriteRes<[KryoUnitX, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_1cyc_X_LS_Y_295ln], + (instregex "STR(D|S|H|B)ro(W|X)")>; +def KryoWrite_1cyc_LS_Y_X_304ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_X_304ln], + (instregex "STR(Q|D|S|H|B)(post|pre)")>; +def KryoWrite_2cyc_X_LS_Y_XY_LS_Y_354ln : + SchedWriteRes<[KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, + KryoUnitY]> { + let Latency = 2; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_2cyc_X_LS_Y_XY_LS_Y_354ln], + (instregex "STRQro(W|X)")>; +def KryoWrite_0cyc_LS_Y_399ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_399ln], + (instregex "STR(BB|HH|W|X)ui")>; +def KryoWrite_1cyc_X_LS_Y_406ln : + SchedWriteRes<[KryoUnitX, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_1cyc_X_LS_Y_406ln], + (instregex "STR(BB|HH|W|X)ro(W|X)")>; +def KryoWrite_1cyc_LS_X_Y_407ln : + SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_X_Y_407ln], + (instregex "STR(BB|HH|W|X)(post|pre)")>; +def KryoWrite_0cyc_LS_Y_398ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_398ln], + (instregex "STTR(B|H|W|X)i")>; +def KryoWrite_0cyc_LS_Y_396ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_396ln], + (instregex "STUR(Q|D|S|H|B)i")>; +def KryoWrite_0cyc_LS_Y_397ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_397ln], + (instregex "STUR(BB|HH|W|X)i")>; +def KryoWrite_3cyc_LS_Y_404ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_Y_404ln], + (instregex "STX(P(W|X)|R(B|H|W|X))")>; +def KryoWrite_3cyc_XY_noRSV_160ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_160ln], + (instregex "^(SU|US)QADD(v8i8|v4i16|v2i32)")>; +def KryoWrite_3cyc_XY_XY_167ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_167ln], + (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)")>; +def KryoWrite_1cyc_XY_1ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_1ln, ReadI], + (instregex "SUBS?(W|X)ri")>; +def KryoWrite_2cyc_XY_XY_5ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_5ln, ReadI, ReadIEReg], + (instregex "SUBS?(W|X)rx")>; +def KryoWrite_2cyc_XY_XY_5_1ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_5_1ln, ReadI, ReadISReg], + (instregex "SUBS?(W|X)rs")>; +def KryoWrite_1cyc_XY_noRSV_6ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_6ln, ReadI, ReadI], + (instregex "SUBS?(W|X)rr")>; +def KryoWrite_0cyc_LS_9ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_LS_9ln], + (instregex "SYSL?xt")>; +def KryoWrite_1cyc_X_noRSV_205ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_noRSV_205ln], + (instrs TBLv8i8One)>; +def KryoWrite_1cyc_X_X_208ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_X_208ln], + (instrs TBLv16i8One)>; +def KryoWrite_2cyc_X_X_X_noRSV_222ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 2; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_2cyc_X_X_X_noRSV_222ln], + (instrs TBLv8i8Two)>; +def KryoWrite_2cyc_X_X_X_X_X_X_224ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX]> { + let Latency = 2; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_2cyc_X_X_X_X_X_X_224ln], + (instrs TBLv16i8Two)>; +def KryoWrite_3cyc_X_X_X_X_X_noRSV_225ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_3cyc_X_X_X_X_X_noRSV_225ln], + (instrs TBLv8i8Three)>; +def KryoWrite_3cyc_X_X_X_X_X_X_X_noRSV_228ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 8; +} +def : InstRW<[KryoWrite_3cyc_X_X_X_X_X_X_X_noRSV_228ln], + (instrs TBLv8i8Four)>; +def KryoWrite_4cyc_X_X_X_X_X_X_X_X_XY_X_X_230ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitXY, KryoUnitX, + KryoUnitX]> { + let Latency = 4; let NumMicroOps = 11; +} +def : InstRW<[KryoWrite_4cyc_X_X_X_X_X_X_X_X_XY_X_X_230ln], + (instrs TBLv16i8Three)>; +def KryoWrite_4cyc_X_X_X_X_X_X_X_X_X_X_XY_X_X_X_X_232ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitXY, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 15; +} +def : InstRW<[KryoWrite_4cyc_X_X_X_X_X_X_X_X_X_X_XY_X_X_X_X_232ln], + (instrs TBLv16i8Four)>; +def KryoWrite_2cyc_X_X_noRSV_220ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 2; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_2cyc_X_X_noRSV_220ln], + (instrs TBXv8i8One)>; +def KryoWrite_2cyc_X_X_X_X_221ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 2; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_2cyc_X_X_X_X_221ln], + (instrs TBXv16i8One)>; +def KryoWrite_3cyc_X_X_X_X_noRSV_223ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_3cyc_X_X_X_X_noRSV_223ln], + (instrs TBXv8i8Two)>; +def KryoWrite_4cyc_X_X_X_X_X_X_noRSV_226ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX]> { + let Latency = 4; let NumMicroOps = 7; +} +def : InstRW<[KryoWrite_4cyc_X_X_X_X_X_X_noRSV_226ln], + (instrs TBXv8i8Three)>; +def KryoWrite_3cyc_X_X_X_X_X_X_X_X_227ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 8; +} +def : InstRW<[KryoWrite_3cyc_X_X_X_X_X_X_X_X_227ln], + (instrs TBXv16i8Two)>; +def KryoWrite_4cyc_X_X_X_X_X_X_X_X_noRSV_229ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 9; +} +def : InstRW<[KryoWrite_4cyc_X_X_X_X_X_X_X_X_noRSV_229ln], + (instrs TBXv8i8Four)>; +def KryoWrite_5cyc_X_X_X_X_X_X_X_X_X_XY_X_X_X_231ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitXY, + KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 5; let NumMicroOps = 13; +} +def : InstRW<[KryoWrite_5cyc_X_X_X_X_X_X_X_X_X_XY_X_X_X_231ln], + (instrs TBXv16i8Three)>; +def KryoWrite_5cyc_X_X_X_X_X_X_X_X_X_X_X_XY_X_X_X_X_X_233ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitXY, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX]> { + let Latency = 5; let NumMicroOps = 17; +} +def : InstRW<[KryoWrite_5cyc_X_X_X_X_X_X_X_X_X_X_X_XY_X_X_X_X_X_233ln], + (instrs TBXv16i8Four)>; +def KryoWrite_1cyc_XY_XY_217ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_217ln], + (instregex "((TRN1|TRN2|ZIP1|UZP1|UZP2)v2i64|ZIP2(v2i64|v4i32|v8i16|v16i8))")>; +def KryoWrite_1cyc_X_X_211ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_X_211ln], + (instregex "(TRN1|TRN2)(v4i32|v8i16|v16i8)")>; +def KryoWrite_1cyc_X_XY_213ln : + SchedWriteRes<[KryoUnitX, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_XY_213ln], + (instregex "(TRN1|TRN2)(v2i32|v4i16|v8i8)")>; +def KryoWrite_3cyc_XY_noRSV_156ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_156ln], + (instrs URECPEv2i32, URSQRTEv2i32)>; +def KryoWrite_3cyc_XY_XY_168ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_168ln], + (instrs URECPEv4i32, URSQRTEv4i32)>; +def KryoWrite_1cyc_X_X_210ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_X_210ln], + (instregex "(UZP1|UZP2)(v4i32|v8i16|v16i8)")>; +def KryoWrite_1cyc_X_noRSV_206ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_noRSV_206ln], + (instregex "(UZP1|UZP2|ZIP1|ZIP2)(v2i32|v4i16|v8i8)")>; +def KryoWrite_1cyc_XY_noRSV_215ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_215ln], + (instregex "XTNv.*")>; +def KryoWrite_1cyc_X_X_209ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_X_209ln], + (instregex "ZIP1(v4i32|v8i16|v16i8)")>; diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td new file mode 100644 index 000000000..fbbd3850d --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td @@ -0,0 +1,357 @@ +//==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the ARM ThunderX T8X +// (T88, T81, T83) processors. +// Loosely based on Cortex-A53 which is somewhat similar. +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// The following definitions describe the simpler per-operand machine model. +// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details. + +// Cavium ThunderX T8X scheduling machine model. +def ThunderXT8XModel : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops dispatched per cycle. + let MicroOpBufferSize = 0; // ThunderX T88/T81/T83 are in-order. + let LoadLatency = 3; // Optimistic load latency. + let MispredictPenalty = 8; // Branch mispredict penalty. + let PostRAScheduler = 1; // Use PostRA scheduler. + let CompleteModel = 1; + + list UnsupportedFeatures = [HasSVE]; + + // FIXME: Remove when all errors have been fixed. + let FullInstRWOverlapCheck = 0; +} + +// Modeling each pipeline with BufferSize == 0 since T8X is in-order. +def THXT8XUnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU +def THXT8XUnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC +def THXT8XUnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division +def THXT8XUnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store +def THXT8XUnitBr : ProcResource<1> { let BufferSize = 0; } // Branch +def THXT8XUnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU +def THXT8XUnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mul/Div/Sqrt + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedWrite types mapping the ProcResources and +// latencies. + +let SchedModel = ThunderXT8XModel in { + +// ALU +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 2; } + +// MAC +def : WriteRes { + let Latency = 4; + let ResourceCycles = [1]; +} + +def : WriteRes { + let Latency = 4; + let ResourceCycles = [1]; +} + +// Div +def : WriteRes { + let Latency = 12; + let ResourceCycles = [6]; +} + +def : WriteRes { + let Latency = 14; + let ResourceCycles = [8]; +} + +// Load +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } + +// Vector Load +def : WriteRes { + let Latency = 8; + let ResourceCycles = [3]; +} + +def THXT8XWriteVLD1 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 6; + let ResourceCycles = [1]; +} + +def THXT8XWriteVLD2 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 11; + let ResourceCycles = [7]; +} + +def THXT8XWriteVLD3 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 12; + let ResourceCycles = [8]; +} + +def THXT8XWriteVLD4 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 13; + let ResourceCycles = [9]; +} + +def THXT8XWriteVLD5 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 13; + let ResourceCycles = [9]; +} + +// Pre/Post Indexing +def : WriteRes { let Latency = 0; } + +// Store +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } + +// Vector Store +def : WriteRes; +def THXT8XWriteVST1 : SchedWriteRes<[THXT8XUnitLdSt]>; + +def THXT8XWriteVST2 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 10; + let ResourceCycles = [9]; +} + +def THXT8XWriteVST3 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 11; + let ResourceCycles = [10]; +} + +def : WriteRes { let Unsupported = 1; } + +// Branch +def : WriteRes; +def THXT8XWriteBR : SchedWriteRes<[THXT8XUnitBr]>; +def : WriteRes; +def THXT8XWriteBRR : SchedWriteRes<[THXT8XUnitBr]>; +def THXT8XWriteRET : SchedWriteRes<[THXT8XUnitALU]>; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// FP ALU +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } + +// FP Mul, Div, Sqrt +def : WriteRes { let Latency = 6; } +def : WriteRes { + let Latency = 22; + let ResourceCycles = [19]; +} + +def THXT8XWriteFMAC : SchedWriteRes<[THXT8XUnitFPMDS]> { let Latency = 10; } + +def THXT8XWriteFDivSP : SchedWriteRes<[THXT8XUnitFPMDS]> { + let Latency = 12; + let ResourceCycles = [9]; +} + +def THXT8XWriteFDivDP : SchedWriteRes<[THXT8XUnitFPMDS]> { + let Latency = 22; + let ResourceCycles = [19]; +} + +def THXT8XWriteFSqrtSP : SchedWriteRes<[THXT8XUnitFPMDS]> { + let Latency = 17; + let ResourceCycles = [14]; +} + +def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> { + let Latency = 31; + let ResourceCycles = [28]; +} + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedRead types. + +// No forwarding for these reads. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// FIXME: This needs more targeted benchmarking. +// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable +// operands are needed one cycle later if and only if they are to be +// shifted. Otherwise, they too are needed two cycles later. This same +// ReadAdvance applies to Extended registers as well, even though there is +// a separate SchedPredicate for them. +def : ReadAdvance; +def THXT8XReadShifted : SchedReadAdvance<1, [WriteImm, WriteI, + WriteISReg, WriteIEReg, WriteIS, + WriteID32, WriteID64, + WriteIM32, WriteIM64]>; +def THXT8XReadNotShifted : SchedReadAdvance<2, [WriteImm, WriteI, + WriteISReg, WriteIEReg, WriteIS, + WriteID32, WriteID64, + WriteIM32, WriteIM64]>; +def THXT8XReadISReg : SchedReadVariant<[ + SchedVar, + SchedVar]>; +def : SchedAlias; + +def THXT8XReadIEReg : SchedReadVariant<[ + SchedVar, + SchedVar]>; +def : SchedAlias; + +// MAC - Operands are generally needed one cycle later in the MAC pipe. +// Accumulator operands are needed two cycles later. +def : ReadAdvance; +def : ReadAdvance; + +// Div +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// Subtarget-specific InstRW. + +//--- +// Branch +//--- +def : InstRW<[THXT8XWriteBR], (instregex "^B$")>; +def : InstRW<[THXT8XWriteBR], (instregex "^BL$")>; +def : InstRW<[THXT8XWriteBR], (instregex "^B..$")>; +def : InstRW<[THXT8XWriteBR], (instregex "^CBNZ")>; +def : InstRW<[THXT8XWriteBR], (instregex "^CBZ")>; +def : InstRW<[THXT8XWriteBR], (instregex "^TBNZ")>; +def : InstRW<[THXT8XWriteBR], (instregex "^TBZ")>; +def : InstRW<[THXT8XWriteBRR], (instregex "^BR$")>; +def : InstRW<[THXT8XWriteBRR], (instregex "^BLR$")>; + +//--- +// Ret +//--- +def : InstRW<[THXT8XWriteRET], (instregex "^RET$")>; + +//--- +// Miscellaneous +//--- +def : InstRW<[WriteI], (instrs COPY)>; + +//--- +// Vector Loads +//--- +def : InstRW<[THXT8XWriteVLD1], (instregex "LD1i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[THXT8XWriteVLD1], (instregex "LD2i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[THXT8XWriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>; +def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; +def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; + +def : InstRW<[THXT8XWriteVLD2], (instregex "LD3i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[THXT8XWriteVLD3], (instregex "LD3Threev(2d)$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; + +def : InstRW<[THXT8XWriteVLD2], (instregex "LD4i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[THXT8XWriteVLD4], (instregex "LD4Fourv(2d)$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; + +//--- +// Vector Stores +//--- +def : InstRW<[THXT8XWriteVST1], (instregex "ST1i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[THXT8XWriteVST1], (instregex "ST2i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[THXT8XWriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[THXT8XWriteVST2], (instregex "ST3i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[THXT8XWriteVST2], (instregex "ST3Threev(2d)$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; + +def : InstRW<[THXT8XWriteVST2], (instregex "ST4i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[THXT8XWriteVST2], (instregex "ST4Fourv(2d)$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; + +//--- +// Floating Point MAC, DIV, SQRT +//--- +def : InstRW<[THXT8XWriteFMAC], (instregex "^FN?M(ADD|SUB).*")>; +def : InstRW<[THXT8XWriteFMAC], (instregex "^FML(A|S).*")>; +def : InstRW<[THXT8XWriteFDivSP], (instrs FDIVSrr)>; +def : InstRW<[THXT8XWriteFDivDP], (instrs FDIVDrr)>; +def : InstRW<[THXT8XWriteFDivSP], (instregex "^FDIVv.*32$")>; +def : InstRW<[THXT8XWriteFDivDP], (instregex "^FDIVv.*64$")>; +def : InstRW<[THXT8XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; +def : InstRW<[THXT8XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; + +} diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td new file mode 100644 index 000000000..bee3392b6 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td @@ -0,0 +1,1880 @@ +//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 ---*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the scheduling model for Cavium ThunderX2T99 +// processors. +// Based on Broadcom Vulcan. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// 2. Pipeline Description. + +def ThunderX2T99Model : SchedMachineModel { + let IssueWidth = 4; // 4 micro-ops dispatched at a time. + let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer. + let LoadLatency = 4; // Optimistic load latency. + let MispredictPenalty = 12; // Extra cycles for mispredicted branch. + // Determined via a mix of micro-arch details and experimentation. + let LoopMicroOpBufferSize = 128; + let PostRAScheduler = 1; // Using PostRA sched. + let CompleteModel = 1; + + list UnsupportedFeatures = [HasSVE]; + + // FIXME: Remove when all errors have been fixed. + let FullInstRWOverlapCheck = 0; +} + +let SchedModel = ThunderX2T99Model in { + +// Define the issue ports. + +// Port 0: ALU, FP/SIMD. +def THX2T99P0 : ProcResource<1>; + +// Port 1: ALU, FP/SIMD, integer mul/div. +def THX2T99P1 : ProcResource<1>; + +// Port 2: ALU, Branch. +def THX2T99P2 : ProcResource<1>; + +// Port 3: Store data. +def THX2T99P3 : ProcResource<1>; + +// Port 4: Load/store. +def THX2T99P4 : ProcResource<1>; + +// Port 5: Load/store. +def THX2T99P5 : ProcResource<1>; + +// Define groups for the functional units on each issue port. Each group +// created will be used by a WriteRes later on. +// +// NOTE: Some groups only contain one member. This is a way to create names for +// the various functional units that share a single issue port. For example, +// "THX2T99I1" for ALU ops on port 1 and "THX2T99F1" for FP ops on port 1. + +// Integer divide and multiply micro-ops only on port 1. +def THX2T99I1 : ProcResGroup<[THX2T99P1]>; + +// Branch micro-ops only on port 2. +def THX2T99I2 : ProcResGroup<[THX2T99P2]>; + +// ALU micro-ops on ports 0, 1, and 2. +def THX2T99I012 : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2]>; + +// Crypto FP/SIMD micro-ops only on port 1. +def THX2T99F1 : ProcResGroup<[THX2T99P1]>; + +// FP/SIMD micro-ops on ports 0 and 1. +def THX2T99F01 : ProcResGroup<[THX2T99P0, THX2T99P1]>; + +// Store data micro-ops only on port 3. +def THX2T99SD : ProcResGroup<[THX2T99P3]>; + +// Load/store micro-ops on ports 4 and 5. +def THX2T99LS01 : ProcResGroup<[THX2T99P4, THX2T99P5]>; + +// 60 entry unified scheduler. +def THX2T99Any : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2, + THX2T99P3, THX2T99P4, THX2T99P5]> { + let BufferSize = 60; +} + +// Define commonly used write types for InstRW specializations. +// All definitions follow the format: THX2T99Write_Cyc_. + +// 3 cycles on I1. +def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 3; + let NumMicroOps = 2; +} + +// 1 cycles on I2. +def THX2T99Write_1Cyc_I2 : SchedWriteRes<[THX2T99I2]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 4 cycles on I1. +def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// 23 cycles on I1. +def THX2T99Write_23Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 23; + let ResourceCycles = [13, 23]; + let NumMicroOps = 4; +} + +// 39 cycles on I1. +def THX2T99Write_39Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 39; + let ResourceCycles = [13, 39]; + let NumMicroOps = 4; +} + +// 1 cycle on I0, I1, or I2. +def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 2 cycles on I0, I1, or I2. +def THX2T99Write_2Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 2; + let NumMicroOps = 2; +} + +// 4 cycles on I0, I1, or I2. +def THX2T99Write_4Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 2; + let NumMicroOps = 3; +} + +// 5 cycles on I0, I1, or I2. +def THX2T99Write_5Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 2; + let NumMicroOps = 3; +} + +// 5 cycles on F1. +def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { + let Latency = 5; + let NumMicroOps = 2; +} + +// 7 cycles on F1. +def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { + let Latency = 7; + let NumMicroOps = 2; +} + +// 4 cycles on F0 or F1. +def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// 5 cycles on F0 or F1. +def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 5; + let NumMicroOps = 2; +} + +// 6 cycles on F0 or F1. +def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 7 cycles on F0 or F1. +def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 7; + let NumMicroOps = 3; +} + +// 8 cycles on F0 or F1. +def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 8; + let NumMicroOps = 3; +} + +// 10 cycles on F0 or F1. +def THX2T99Write_10Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 10; + let NumMicroOps = 3; +} + +// 16 cycles on F0 or F1. +def THX2T99Write_16Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 16; + let NumMicroOps = 3; + let ResourceCycles = [8]; +} + +// 23 cycles on F0 or F1. +def THX2T99Write_23Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 23; + let NumMicroOps = 3; + let ResourceCycles = [11]; +} + +// 1 cycles on LS0 or LS1. +def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 0; +} + +// 1 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_1Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 0; + let NumMicroOps = 2; +} + +// 1 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_1Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 0; + let NumMicroOps = 3; +} + +// 2 cycles on LS0 or LS1. +def THX2T99Write_2Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 4 cycles on LS0 or LS1. +def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 4; + let NumMicroOps = 4; +} + +// 5 cycles on LS0 or LS1. +def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 6 cycles on LS0 or LS1. +def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 4 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_4Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 4; + let NumMicroOps = 3; +} + +// 4 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_4Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 4; + let NumMicroOps = 3; +} + +// 5 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_5Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_5Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 6 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_6Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 6; + let NumMicroOps = 4; +} + +// 6 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_6Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 1 cycles on LS0 or LS1 and F0 or F1. +def THX2T99Write_1Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 5 cycles on LS0 or LS1 and F0 or F1. +def THX2T99Write_5Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 6 cycles on LS0 or LS1 and F0 or F1. +def THX2T99Write_6Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 7 cycles on LS0 or LS1 and F0 or F1. +def THX2T99Write_7Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { + let Latency = 7; + let NumMicroOps = 3; +} + +// 8 cycles on LS0 or LS1 and F0 or F1. +def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { + let Latency = 8; + let NumMicroOps = 3; +} + +// 8 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_8Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 8; + let NumMicroOps = 4; +} + +// 12 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_12Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 12; + let NumMicroOps = 6; +} + +// 16 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_16Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 16; + let NumMicroOps = 8; +} + +// 24 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_24Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 24; + let NumMicroOps = 12; +} + +// 32 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_32Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 32; + let NumMicroOps = 16; +} + +// Define commonly used read types. + +// No forwarding is provided for these types. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// 3. Instruction Tables. + +//--- +// 3.1 Branch Instructions +//--- + +// Branch, immed +// Branch and link, immed +// Compare and branch +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +// Branch, register +// Branch and link, register != LR +// Branch and link, register = LR +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } + +def : WriteRes { + let Latency = 4; + let NumMicroOps = 2; +} + +//--- +// Branch +//--- +def : InstRW<[THX2T99Write_1Cyc_I2], (instrs B, BL, BR, BLR)>; +def : InstRW<[THX2T99Write_1Cyc_I2], (instrs RET)>; +def : InstRW<[THX2T99Write_1Cyc_I2], (instregex "^B..$")>; +def : InstRW<[THX2T99Write_1Cyc_I2], + (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>; + +//--- +// 3.2 Arithmetic and Logical Instructions +// 3.3 Move and Shift Instructions +//--- + + +// ALU, basic +// Conditional compare +// Conditional select +// Address generation +def : WriteRes { + let Latency = 1; + let ResourceCycles = [1]; + let NumMicroOps = 2; +} + +def : InstRW<[WriteI], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC(W|X)r", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", + "SBCS(W|X)r", "CCMN(W|X)(i|r)", + "CCMP(W|X)(i|r)", "CSEL(W|X)r", + "CSINC(W|X)r", "CSINV(W|X)r", + "CSNEG(W|X)r")>; + +def : InstRW<[WriteI], (instrs COPY)>; + +// ALU, extend and/or shift +def : WriteRes { + let Latency = 2; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} + +def : InstRW<[WriteISReg], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC(W|X)r", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", + "SBCS(W|X)r", "CCMN(W|X)(i|r)", + "CCMP(W|X)(i|r)", "CSEL(W|X)r", + "CSINC(W|X)r", "CSINV(W|X)r", + "CSNEG(W|X)r")>; + +def : WriteRes { + let Latency = 1; + let ResourceCycles = [1]; + let NumMicroOps = 2; +} + +def : InstRW<[WriteIEReg], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC(W|X)r", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", + "SBCS(W|X)r", "CCMN(W|X)(i|r)", + "CCMP(W|X)(i|r)", "CSEL(W|X)r", + "CSINC(W|X)r", "CSINV(W|X)r", + "CSNEG(W|X)r")>; + +// Move immed +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +def : InstRW<[THX2T99Write_1Cyc_I012], + (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; + +def : InstRW<[THX2T99Write_1Cyc_I012], + (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>; + +// Variable shift +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +//--- +// 3.4 Divide and Multiply Instructions +//--- + +// Divide, W-form +// Latency range of 13-23/13-39. +def : WriteRes { + let Latency = 39; + let ResourceCycles = [39]; + let NumMicroOps = 4; +} + +// Divide, X-form +def : WriteRes { + let Latency = 23; + let ResourceCycles = [23]; + let NumMicroOps = 4; +} + +// Multiply accumulate, W-form +def : WriteRes { + let Latency = 5; + let NumMicroOps = 3; +} + +// Multiply accumulate, X-form +def : WriteRes { + let Latency = 5; + let NumMicroOps = 3; +} + +//def : InstRW<[WriteIM32, ReadIM, ReadIM, ReadIMA, THX2T99Write_5Cyc_I012], +// (instrs MADDWrrr, MSUBWrrr)>; +def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>; +def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>; +def : InstRW<[THX2T99Write_5Cyc_I012], + (instregex "(S|U)(MADDL|MSUBL)rrr")>; + +def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>; +def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>; + +// Bitfield extract, two reg +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +// Multiply high +def : InstRW<[THX2T99Write_4Cyc_I1], (instrs SMULHrr, UMULHrr)>; + +// Miscellaneous Data-Processing Instructions +// Bitfield extract +def : InstRW<[THX2T99Write_1Cyc_I012], (instrs EXTRWrri, EXTRXrri)>; + +// Bitifield move - basic +def : InstRW<[THX2T99Write_1Cyc_I012], + (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>; + +// Bitfield move, insert +def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "^BFM")>; +def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "(S|U)?BFM.*")>; + +// Count leading +def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$", + "^CLZ(W|X)r$")>; + +// Reverse bits +def : InstRW<[THX2T99Write_1Cyc_I012], (instrs RBITWr, RBITXr)>; + +// Cryptography Extensions +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES[DE]")>; +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AESI?MC")>; +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1SU0")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1(H|SU1)")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1[CMP]")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256SU0")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256(H|H2|SU1)")>; + +// CRC Instructions +// def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32", "^CRC32C")>; +def : InstRW<[THX2T99Write_4Cyc_I1], + (instrs CRC32Brr, CRC32Hrr, CRC32Wrr, CRC32Xrr)>; + +def : InstRW<[THX2T99Write_4Cyc_I1], + (instrs CRC32CBrr, CRC32CHrr, CRC32CWrr, CRC32CXrr)>; + +// Reverse bits/bytes +// NOTE: Handled by WriteI. + +//--- +// 3.6 Load Instructions +// 3.10 FP Load Instructions +//--- + +// Load register, literal +// Load register, unscaled immed +// Load register, immed unprivileged +// Load register, unsigned immed +def : WriteRes { + let Latency = 4; + let NumMicroOps = 4; +} + +// Load register, immed post-index +// NOTE: Handled by WriteLD, WriteI. +// Load register, immed pre-index +// NOTE: Handled by WriteLD, WriteAdr. +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +// Load pair, immed offset, normal +// Load pair, immed offset, signed words, base != SP +// Load pair, immed offset signed words, base = SP +// LDP only breaks into *one* LS micro-op. Thus +// the resources are handled by WriteLD. +def : WriteRes { + let Latency = 5; + let NumMicroOps = 5; +} + +// Load register offset, basic +// Load register, register offset, scale by 4/8 +// Load register, register offset, scale by 2 +// Load register offset, extend +// Load register, register offset, extend, scale by 4/8 +// Load register, register offset, extend, scale by 2 +def THX2T99WriteLDIdx : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def : SchedAlias; + +def THX2T99ReadAdrBase : SchedReadVariant<[ + SchedVar, + SchedVar]>; +def : SchedAlias; + +// Load pair, immed pre-index, normal +// Load pair, immed pre-index, signed words +// Load pair, immed post-index, normal +// Load pair, immed post-index, signed words +// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPDi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPQi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPSi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPWi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPXi)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPDi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPQi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSWi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPWi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPXi)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRBui)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDui)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRHui)>; +def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRQui)>; +def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRSui)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDl)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRQl)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRWl)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRXl)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRBi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRHi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRXi)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSWi)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRBpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRHpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRXpre)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpost)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpost)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPXpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRBpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRHpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRXpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPXpre)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRBpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRHpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRXpre)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPXpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRBpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRHpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRXpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroW)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroX)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRBroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRBroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRDroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRQroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHXroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRXroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRBroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRDroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRQroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHXroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRXroX)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBBi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURDi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHHi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURQi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSWi)>; + +//--- +// Prefetch +//--- +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMl)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFUMi)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMui)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroW)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroX)>; + +//-- +// 3.7 Store Instructions +// 3.11 FP Store Instructions +//-- + +// Store register, unscaled immed +// Store register, immed unprivileged +// Store register, unsigned immed +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +// Store register, immed post-index +// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase + +// Store register, immed pre-index +// NOTE: Handled by WriteAdr, WriteST + +// Store register, register offset, basic +// Store register, register offset, scaled by 4/8 +// Store register, register offset, scaled by 2 +// Store register, register offset, extend +// Store register, register offset, extend, scale by 4/8 +// Store register, register offset, extend, scale by 1 +def : WriteRes { + let Latency = 1; + let NumMicroOps = 3; +} + +// Store pair, immed offset, W-form +// Store pair, immed offset, X-form +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +// Store pair, immed post-index, W-form +// Store pair, immed post-index, X-form +// Store pair, immed pre-index, W-form +// Store pair, immed pre-index, X-form +// NOTE: Handled by WriteAdr, WriteSTP. + +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBBi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURDi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHHi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURQi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURSi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURWi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURXi)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRBi)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRHi)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRWi)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRXi)>; + +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPDi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPQi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPXi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPWi)>; + +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPDi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPQi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPXi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPWi)>; + +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRBui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRBui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRDui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRDui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRHui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRHui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRQui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRQui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRXui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRXui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRWui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRWui)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPXpre, STPXpost)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRXpre, STRXpost)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBroW, STRBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBroW, STRBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBBroW, STRBBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBBroW, STRBBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRDroW, STRDroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRDroW, STRDroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHroW, STRHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHroW, STRHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHHroW, STRHHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHHroW, STRHHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRQroW, STRQroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRQroW, STRQroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRSroW, STRSroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRSroW, STRSroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRWroW, STRWroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRWroW, STRWroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRXroW, STRXroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRXroW, STRXroX)>; + +//--- +// 3.8 FP Data Processing Instructions +//--- + +// FP absolute value +// FP min/max +// FP negate +def : WriteRes { + let Latency = 5; + let NumMicroOps = 2; +} + +// FP arithmetic +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>; + +// FP compare +def : WriteRes { + let Latency = 5; + let NumMicroOps = 2; +} + +// FP Mul, Div, Sqrt +def : WriteRes { + let Latency = 22; + let ResourceCycles = [19]; +} + +def THX2T99XWriteFDiv : SchedWriteRes<[THX2T99F01]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX2T99XWriteFDivSP : SchedWriteRes<[THX2T99F01]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX2T99XWriteFDivDP : SchedWriteRes<[THX2T99F01]> { + let Latency = 23; + let ResourceCycles = [12]; + let NumMicroOps = 4; +} + +def THX2T99XWriteFSqrtSP : SchedWriteRes<[THX2T99F01]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX2T99XWriteFSqrtDP : SchedWriteRes<[THX2T99F01]> { + let Latency = 23; + let ResourceCycles = [12]; + let NumMicroOps = 4; +} + +// FP divide, S-form +// FP square root, S-form +def : InstRW<[THX2T99XWriteFDivSP], (instrs FDIVSrr)>; +def : InstRW<[THX2T99XWriteFSqrtSP], (instrs FSQRTSr)>; +def : InstRW<[THX2T99XWriteFDivSP], (instregex "^FDIVv.*32$")>; +def : InstRW<[THX2T99XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; +def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "^FDIVSrr", "^FSQRTSr")>; + +// FP divide, D-form +// FP square root, D-form +def : InstRW<[THX2T99XWriteFDivDP], (instrs FDIVDrr)>; +def : InstRW<[THX2T99XWriteFSqrtDP], (instrs FSQRTDr)>; +def : InstRW<[THX2T99XWriteFDivDP], (instregex "^FDIVv.*64$")>; +def : InstRW<[THX2T99XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; +def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDr")>; + +// FP multiply +// FP multiply accumulate +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def THX2T99XWriteFMul : SchedWriteRes<[THX2T99F01]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def THX2T99XWriteFMulAcc : SchedWriteRes<[THX2T99F01]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def : InstRW<[THX2T99XWriteFMul], (instregex "^FMUL", "^FNMUL")>; +def : InstRW<[THX2T99XWriteFMulAcc], + (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>; + +// FP round to integral +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; + +// FP select +def : InstRW<[THX2T99Write_4Cyc_F01], (instregex "^FCSEL")>; + +//--- +// 3.9 FP Miscellaneous Instructions +//--- + +// FP convert, from vec to vec reg +// FP convert, from gen to vec reg +// FP convert, from vec to gen reg +def : WriteRes { + let Latency = 7; + let NumMicroOps = 3; +} + +// FP move, immed +// FP move, register +def : WriteRes { + let Latency = 4; + let NumMicroOps = 2; +} + +// FP transfer, from gen to vec reg +// FP transfer, from vec to gen reg +def : WriteRes { + let Latency = 4; + let NumMicroOps = 2; +} + +def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; + +//--- +// 3.12 ASIMD Integer Instructions +//--- + +// ASIMD absolute diff, D-form +// ASIMD absolute diff, Q-form +// ASIMD absolute diff accum, D-form +// ASIMD absolute diff accum, Q-form +// ASIMD absolute diff accum long +// ASIMD absolute diff long +// ASIMD arith, basic +// ASIMD arith, complex +// ASIMD compare +// ASIMD logical (AND, BIC, EOR) +// ASIMD max/min, basic +// ASIMD max/min, reduce, 4H/4S +// ASIMD max/min, reduce, 8B/8H +// ASIMD max/min, reduce, 16B +// ASIMD multiply, D-form +// ASIMD multiply, Q-form +// ASIMD multiply accumulate long +// ASIMD multiply accumulate saturating long +// ASIMD multiply long +// ASIMD pairwise add and accumulate +// ASIMD shift accumulate +// ASIMD shift by immed, basic +// ASIMD shift by immed and insert, basic, D-form +// ASIMD shift by immed and insert, basic, Q-form +// ASIMD shift by immed, complex +// ASIMD shift by register, basic, D-form +// ASIMD shift by register, basic, Q-form +// ASIMD shift by register, complex, D-form +// ASIMD shift by register, complex, Q-form +def : WriteRes { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [4]; +} + +// ASIMD arith, reduce, 4H/4S +// ASIMD arith, reduce, 8B/8H +// ASIMD arith, reduce, 16B + +// ASIMD logical (MVN (alias for NOT), ORN, ORR) +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; + +// ASIMD arith, reduce +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; + +// ASIMD polynomial (8x8) multiply long +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^(S|U|SQD)MULL")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL(v8i8|v16i8)")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^PMULL(v1i64|v2i64)")>; + +// ASIMD absolute diff accum, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; +// ASIMD absolute diff accum, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; +// ASIMD absolute diff accum long +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]ABAL")>; +// ASIMD arith, reduce, 4H/4S +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; +// ASIMD arith, reduce, 8B +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; +// ASIMD arith, reduce, 16B/16H +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^[SU]?ADDL?Vv16i8v$")>; +// ASIMD max/min, reduce, 4H/4S +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; +// ASIMD max/min, reduce, 8B/8H +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; +// ASIMD max/min, reduce, 16B/16H +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^[SU](MIN|MAX)Vv16i8v$")>; +// ASIMD multiply, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^(P?MUL|SQR?DMULH)" # + "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" # + "(_indexed)?$")>; +// ASIMD multiply, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; +// ASIMD multiply accumulate, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; +// ASIMD multiply accumulate, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; +// ASIMD shift accumulate +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>; + +// ASIMD shift by immed, basic +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "RSHRNv","SHRNv", "SQRSHRNv","SQRSHRUNv", + "SQSHRNv","SQSHRUNv", "UQRSHRNv", + "UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>; +// ASIMD shift by immed, complex +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^[SU]?(Q|R){1,2}SHR")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SQSHLU")>; +// ASIMD shift by register, basic, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; +// ASIMD shift by register, complex, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU][QR]{1,2}SHL" # + "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; +// ASIMD shift by register, complex, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; + +// ASIMD Arithmetic +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(ADD|SUB)HNv.*")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(RADD|RSUB)HNv.*")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD", + "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" # + "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADALP","^UADALP")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLPv","^UADDLPv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLV","^UADDLV")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ADDVv","^SMAXVv","^UMAXVv","^SMINVv","^UMINVv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SABAv","^UABAv","^SABALv","^UABALv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SQADDv","^SQSUBv","^UQADDv","^UQSUBv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SUQADDv","^USQADDv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ADDHNv","^RADDHNv", "^RSUBHNv", + "^SQABS", "^SQADD", "^SQNEG", "^SQSUB", + "^SRHADD", "^SUBHNv", "^SUQADD", + "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^CMEQv","^CMGEv","^CMGTv", + "^CMLEv","^CMLTv", "^CMHIv","^CMHSv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SMAXv","^SMINv","^UMAXv","^UMINv", + "^SMAXPv","^SMINPv","^UMAXPv","^UMINPv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SABDv","^UABDv", "^SABDLv","^UABDLv")>; + +//--- +// 3.13 ASIMD Floating-point Instructions +//--- + +// ASIMD FP absolute value +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FABSv")>; + +// ASIMD FP arith, normal, D-form +// ASIMD FP arith, normal, Q-form +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FABDv", "^FADDv", "^FSUBv")>; + +// ASIMD FP arith,pairwise, D-form +// ASIMD FP arith, pairwise, Q-form +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADDPv")>; + +// ASIMD FP compare, D-form +// ASIMD FP compare, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv", + "^FCMGTv", "^FCMLEv", + "^FCMLTv")>; + +// ASIMD FP round, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^FRINT[AIMNPXZ](v2f32)")>; +// ASIMD FP round, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; + +// ASIMD FP convert, long +// ASIMD FP convert, narrow +// ASIMD FP convert, other, D-form +// ASIMD FP convert, other, Q-form +// NOTE: Handled by WriteV. + +// ASIMD FP convert, long and narrow +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^FCVT(L|N|XN)v")>; +// ASIMD FP convert, other, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; +// ASIMD FP convert, other, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP divide, D-form, F32 +def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv2f32)>; +def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv2f32")>; + +// ASIMD FP divide, Q-form, F32 +def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv4f32)>; +def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv4f32")>; + +// ASIMD FP divide, Q-form, F64 +def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVv2f64)>; +def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "FDIVv2f64")>; + +// ASIMD FP max/min, normal, D-form +// ASIMD FP max/min, normal, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXv", "^FMAXNMv", + "^FMINv", "^FMINNMv")>; + +// ASIMD FP max/min, pairwise, D-form +// ASIMD FP max/min, pairwise, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXPv", "^FMAXNMPv", + "^FMINPv", "^FMINNMPv")>; + +// ASIMD FP max/min, reduce +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv", + "^FMINVv", "^FMINNMVv")>; + +// ASIMD FP multiply, D-form, FZ +// ASIMD FP multiply, D-form, no FZ +// ASIMD FP multiply, Q-form, FZ +// ASIMD FP multiply, Q-form, no FZ +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP multiply accumulate, Dform, FZ +// ASIMD FP multiply accumulate, Dform, no FZ +// ASIMD FP multiply accumulate, Qform, FZ +// ASIMD FP multiply accumulate, Qform, no FZ +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP negate +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FNEGv")>; + +//-- +// 3.14 ASIMD Miscellaneous Instructions +//-- + +// ASIMD bit reverse +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>; + +// ASIMD bitwise insert, D-form +// ASIMD bitwise insert, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^BIFv", "^BITv", "^BSLv")>; + +// ASIMD count, D-form +// ASIMD count, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^CLSv", "^CLZv", "^CNTv")>; + +// ASIMD duplicate, gen reg +// ASIMD duplicate, element +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CPY")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv.+gpr")>; + +// ASIMD extract +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^EXTv")>; + +// ASIMD extract narrow +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^XTNv")>; + +// ASIMD extract narrow, saturating +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>; + +// ASIMD insert, element to element +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; + +// ASIMD transfer, element to gen reg +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>; + +// ASIMD move, integer immed +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv")>; + +// ASIMD move, FP immed +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>; + +// ASIMD table lookup, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8One")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Two")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Three")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Four")>; + +// ASIMD table lookup, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8One")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Two")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Three")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Four")>; + +// ASIMD transpose +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1", "^TRN2")>; + +// ASIMD unzip/zip +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>; + +// ASIMD reciprocal estimate, D-form +// ASIMD reciprocal estimate, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", + "^FRSQRTEv", "^URSQRTEv")>; + +// ASIMD reciprocal step, D-form, FZ +// ASIMD reciprocal step, D-form, no FZ +// ASIMD reciprocal step, Q-form, FZ +// ASIMD reciprocal step, Q-form, no FZ +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>; + +// ASIMD reverse +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^REV16v", "^REV32v", "^REV64v")>; + +// ASIMD table lookup, D-form +// ASIMD table lookup, Q-form +def : InstRW<[THX2T99Write_8Cyc_F01], (instregex "^TBLv", "^TBXv")>; + +// ASIMD transfer, element to word or word +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>; + +// ASIMD transfer, element to gen reg +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "(S|U)MOVv.*")>; + +// ASIMD transfer gen reg to element +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; + +// ASIMD transpose +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1v", "^TRN2v", + "^UZP1v", "^UZP2v")>; + +// ASIMD unzip/zip +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>; + +//-- +// 3.15 ASIMD Load Instructions +//-- + +// ASIMD load, 1 element, multiple, 1 reg, D-form +// ASIMD load, 1 element, multiple, 1 reg, Q-form +def : InstRW<[THX2T99Write_4Cyc_LS01], + (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], + (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 2 reg, D-form +// ASIMD load, 1 element, multiple, 2 reg, Q-form +def : InstRW<[THX2T99Write_4Cyc_LS01], + (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], + (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 3 reg, D-form +// ASIMD load, 1 element, multiple, 3 reg, Q-form +def : InstRW<[THX2T99Write_5Cyc_LS01], + (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr], + (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 4 reg, D-form +// ASIMD load, 1 element, multiple, 4 reg, Q-form +def : InstRW<[THX2T99Write_6Cyc_LS01], + (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr], + (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, one lane, B/H/S +// ASIMD load, 1 element, one lane, D +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], + (instregex "^LD1i(8|16|32|64)_POST$")>; + +// ASIMD load, 1 element, all lanes, D-form, B/H/S +// ASIMD load, 1 element, all lanes, D-form, D +// ASIMD load, 1 element, all lanes, Q-form +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], + (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], + (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 2 element, multiple, D-form, B/H/S +// ASIMD load, 2 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], + (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], + (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 2 element, one lane, B/H +// ASIMD load, 2 element, one lane, S +// ASIMD load, 2 element, one lane, D +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], + (instregex "^LD2i(8|16|32|64)_POST$")>; + +// ASIMD load, 2 element, all lanes, D-form, B/H/S +// ASIMD load, 2 element, all lanes, D-form, D +// ASIMD load, 2 element, all lanes, Q-form +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], + (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], + (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 3 element, multiple, D-form, B/H/S +// ASIMD load, 3 element, multiple, Q-form, B/H/S +// ASIMD load, 3 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_8Cyc_LS01_F01], + (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], + (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 3 element, one lone, B/H +// ASIMD load, 3 element, one lane, S +// ASIMD load, 3 element, one lane, D +def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], + (instregex "^LD3i(8|16|32|64)_POST$")>; + +// ASIMD load, 3 element, all lanes, D-form, B/H/S +// ASIMD load, 3 element, all lanes, D-form, D +// ASIMD load, 3 element, all lanes, Q-form, B/H/S +// ASIMD load, 3 element, all lanes, Q-form, D +def : InstRW<[THX2T99Write_7Cyc_LS01_F01], + (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], + (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 4 element, multiple, D-form, B/H/S +// ASIMD load, 4 element, multiple, Q-form, B/H/S +// ASIMD load, 4 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_8Cyc_LS01_F01], + (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], + (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 4 element, one lane, B/H +// ASIMD load, 4 element, one lane, S +// ASIMD load, 4 element, one lane, D +def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], + (instregex "^LD4i(8|16|32|64)_POST$")>; + +// ASIMD load, 4 element, all lanes, D-form, B/H/S +// ASIMD load, 4 element, all lanes, D-form, D +// ASIMD load, 4 element, all lanes, Q-form, B/H/S +// ASIMD load, 4 element, all lanes, Q-form, D +def : InstRW<[THX2T99Write_6Cyc_LS01_F01], + (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], + (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +//-- +// 3.16 ASIMD Store Instructions +//-- + +// ASIMD store, 1 element, multiple, 1 reg, D-form +// ASIMD store, 1 element, multiple, 1 reg, Q-form +def : InstRW<[THX2T99Write_1Cyc_LS01], + (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 2 reg, D-form +// ASIMD store, 1 element, multiple, 2 reg, Q-form +def : InstRW<[THX2T99Write_1Cyc_LS01], + (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 3 reg, D-form +// ASIMD store, 1 element, multiple, 3 reg, Q-form +def : InstRW<[THX2T99Write_1Cyc_LS01], + (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 4 reg, D-form +// ASIMD store, 1 element, multiple, 4 reg, Q-form +def : InstRW<[THX2T99Write_1Cyc_LS01], + (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, one lane, B/H/S +// ASIMD store, 1 element, one lane, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], + (instregex "^ST1i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST1i(8|16|32|64)_POST$")>; + +// ASIMD store, 2 element, multiple, D-form, B/H/S +// ASIMD store, 2 element, multiple, Q-form, B/H/S +// ASIMD store, 2 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], + (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 2 element, one lane, B/H/S +// ASIMD store, 2 element, one lane, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], + (instregex "^ST2i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST2i(8|16|32|64)_POST$")>; + +// ASIMD store, 3 element, multiple, D-form, B/H/S +// ASIMD store, 3 element, multiple, Q-form, B/H/S +// ASIMD store, 3 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], + (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 3 element, one lane, B/H +// ASIMD store, 3 element, one lane, S +// ASIMD store, 3 element, one lane, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST3i(8|16|32|64)_POST$")>; + +// ASIMD store, 4 element, multiple, D-form, B/H/S +// ASIMD store, 4 element, multiple, Q-form, B/H/S +// ASIMD store, 4 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], + (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 4 element, one lane, B/H +// ASIMD store, 4 element, one lane, S +// ASIMD store, 4 element, one lane, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST4i(8|16|32|64)_POST$")>; + +// V8.1a Atomics (LSE) +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs CASB, CASH, CASW, CASX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs CASAB, CASAH, CASAW, CASAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs CASLB, CASLH, CASLW, CASLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs CASALB, CASALH, CASALW, CASALX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDLARB, LDLARH, LDLARW, LDLARX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDADDB, LDADDH, LDADDW, LDADDX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDEORB, LDEORH, LDEORW, LDEORX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDSETB, LDSETH, LDSETW, LDSETX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX, + LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX, + LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX, + LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX, + LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX, + LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX, + LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX, + LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX, + LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX, + LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX, + LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX, + LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX, + LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs SWPB, SWPH, SWPW, SWPX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs SWPAB, SWPAH, SWPAW, SWPAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs SWPLB, SWPLH, SWPLW, SWPLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs SWPALB, SWPALH, SWPALW, SWPALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs STLLRB, STLLRH, STLLRW, STLLRX)>; + +} // SchedModel = ThunderX2T99Model + diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64Schedule.td b/capstone/suite/synctools/tablegen/AArch64/AArch64Schedule.td new file mode 100644 index 000000000..ce81f48ac --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64Schedule.td @@ -0,0 +1,106 @@ +//==-- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Define TII for use in SchedVariant Predicates. +// const MachineInstr *MI and const TargetSchedModel *SchedModel +// are defined by default. +def : PredicateProlog<[{ + const AArch64InstrInfo *TII = + static_cast(SchedModel->getInstrInfo()); + (void)TII; +}]>; + +// AArch64 Scheduler Definitions + +def WriteImm : SchedWrite; // MOVN, MOVZ +// TODO: Provide variants for MOV32/64imm Pseudos that dynamically +// select the correct sequence of WriteImms. + +def WriteI : SchedWrite; // ALU +def WriteISReg : SchedWrite; // ALU of Shifted-Reg +def WriteIEReg : SchedWrite; // ALU of Extended-Reg +def ReadI : SchedRead; // ALU +def ReadISReg : SchedRead; // ALU of Shifted-Reg +def ReadIEReg : SchedRead; // ALU of Extended-Reg +def WriteExtr : SchedWrite; // EXTR shifts a reg pair +def ReadExtrHi : SchedRead; // Read the high reg of the EXTR pair +def WriteIS : SchedWrite; // Shift/Scale +def WriteID32 : SchedWrite; // 32-bit Divide +def WriteID64 : SchedWrite; // 64-bit Divide +def ReadID : SchedRead; // 32/64-bit Divide +def WriteIM32 : SchedWrite; // 32-bit Multiply +def WriteIM64 : SchedWrite; // 64-bit Multiply +def ReadIM : SchedRead; // 32/64-bit Multiply +def ReadIMA : SchedRead; // 32/64-bit Multiply Accumulate +def WriteBr : SchedWrite; // Branch +def WriteBrReg : SchedWrite; // Indirect Branch + +def WriteLD : SchedWrite; // Load from base addr plus immediate offset +def WriteST : SchedWrite; // Store to base addr plus immediate offset +def WriteSTP : SchedWrite; // Store a register pair. +def WriteAdr : SchedWrite; // Address pre/post increment. + +def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled). +def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled). +def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST. + +// Predicate for determining when a shiftable register is shifted. +def RegShiftedPred : SchedPredicate<[{TII->hasShiftedReg(*MI)}]>; + +// Predicate for determining when a extendedable register is extended. +def RegExtendedPred : SchedPredicate<[{TII->hasExtendedReg(*MI)}]>; + +// ScaledIdxPred is true if a WriteLDIdx operand will be +// scaled. Subtargets can use this to dynamically select resources and +// latency for WriteLDIdx and ReadAdrBase. +def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(*MI)}]>; + +// Serialized two-level address load. +// EXAMPLE: LOADGot +def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>; + +// Serialized two-level address lookup. +// EXAMPLE: MOVaddr... +def WriteAdrAdr : WriteSequence<[WriteAdr, WriteAdr]>; + +// The second register of a load-pair. +// LDP,LDPSW,LDNP,LDXP,LDAXP +def WriteLDHi : SchedWrite; + +// Store-exclusive is a store followed by a dependent load. +def WriteSTX : WriteSequence<[WriteST, WriteLD]>; + +def WriteSys : SchedWrite; // Long, variable latency system ops. +def WriteBarrier : SchedWrite; // Memory barrier. +def WriteHint : SchedWrite; // Hint instruction. + +def WriteF : SchedWrite; // General floating-point ops. +def WriteFCmp : SchedWrite; // Floating-point compare. +def WriteFCvt : SchedWrite; // Float conversion. +def WriteFCopy : SchedWrite; // Float-int register copy. +def WriteFImm : SchedWrite; // Floating-point immediate. +def WriteFMul : SchedWrite; // Floating-point multiply. +def WriteFDiv : SchedWrite; // Floating-point division. + +def WriteV : SchedWrite; // Vector ops. +def WriteVLD : SchedWrite; // Vector loads. +def WriteVST : SchedWrite; // Vector stores. + +def WriteAtomic : SchedWrite; // Atomic memory operations (CAS, Swap, LDOP) + +// Read the unwritten lanes of the VLD's destination registers. +def ReadVLD : SchedRead; + +// Sequential vector load and shuffle. +def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteV]>; +def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>; + +// Store a shuffled vector. +def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>; +def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>; diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td new file mode 100644 index 000000000..dbc4deaf3 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td @@ -0,0 +1,1332 @@ +//===- AArch64SystemOperands.td ----------------------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the symbolic operands permitted for various kinds of +// AArch64 system instruction. +// +//===----------------------------------------------------------------------===// + +include "llvm/TableGen/SearchableTable.td" + +//===----------------------------------------------------------------------===// +// AT (address translate) instruction options. +//===----------------------------------------------------------------------===// + +class AT op1, bits<4> crn, bits<4> crm, + bits<3> op2> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<14> Encoding; + let Encoding{13-11} = op1; + let Encoding{10-7} = crn; + let Encoding{6-3} = crm; + let Encoding{2-0} = op2; + code Requires = [{ {} }]; +} + +def : AT<"S1E1R", 0b000, 0b0111, 0b1000, 0b000>; +def : AT<"S1E2R", 0b100, 0b0111, 0b1000, 0b000>; +def : AT<"S1E3R", 0b110, 0b0111, 0b1000, 0b000>; +def : AT<"S1E1W", 0b000, 0b0111, 0b1000, 0b001>; +def : AT<"S1E2W", 0b100, 0b0111, 0b1000, 0b001>; +def : AT<"S1E3W", 0b110, 0b0111, 0b1000, 0b001>; +def : AT<"S1E0R", 0b000, 0b0111, 0b1000, 0b010>; +def : AT<"S1E0W", 0b000, 0b0111, 0b1000, 0b011>; +def : AT<"S12E1R", 0b100, 0b0111, 0b1000, 0b100>; +def : AT<"S12E1W", 0b100, 0b0111, 0b1000, 0b101>; +def : AT<"S12E0R", 0b100, 0b0111, 0b1000, 0b110>; +def : AT<"S12E0W", 0b100, 0b0111, 0b1000, 0b111>; + +let Requires = [{ {AArch64::HasV8_2aOps} }] in { +def : AT<"S1E1RP", 0b000, 0b0111, 0b1001, 0b000>; +def : AT<"S1E1WP", 0b000, 0b0111, 0b1001, 0b001>; +} + +//===----------------------------------------------------------------------===// +// DMB/DSB (data barrier) instruction options. +//===----------------------------------------------------------------------===// + +class DB encoding> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<4> Encoding = encoding; +} + +def : DB<"oshld", 0x1>; +def : DB<"oshst", 0x2>; +def : DB<"osh", 0x3>; +def : DB<"nshld", 0x5>; +def : DB<"nshst", 0x6>; +def : DB<"nsh", 0x7>; +def : DB<"ishld", 0x9>; +def : DB<"ishst", 0xa>; +def : DB<"ish", 0xb>; +def : DB<"ld", 0xd>; +def : DB<"st", 0xe>; +def : DB<"sy", 0xf>; + +//===----------------------------------------------------------------------===// +// DC (data cache maintenance) instruction options. +//===----------------------------------------------------------------------===// + +class DC op1, bits<4> crn, bits<4> crm, + bits<3> op2> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<14> Encoding; + let Encoding{13-11} = op1; + let Encoding{10-7} = crn; + let Encoding{6-3} = crm; + let Encoding{2-0} = op2; + code Requires = [{ {} }]; +} + +def : DC<"ZVA", 0b011, 0b0111, 0b0100, 0b001>; +def : DC<"IVAC", 0b000, 0b0111, 0b0110, 0b001>; +def : DC<"ISW", 0b000, 0b0111, 0b0110, 0b010>; +def : DC<"CVAC", 0b011, 0b0111, 0b1010, 0b001>; +def : DC<"CSW", 0b000, 0b0111, 0b1010, 0b010>; +def : DC<"CVAU", 0b011, 0b0111, 0b1011, 0b001>; +def : DC<"CIVAC", 0b011, 0b0111, 0b1110, 0b001>; +def : DC<"CISW", 0b000, 0b0111, 0b1110, 0b010>; + +let Requires = [{ {AArch64::HasV8_2aOps} }] in +def : DC<"CVAP", 0b011, 0b0111, 0b1100, 0b001>; + +//===----------------------------------------------------------------------===// +// IC (instruction cache maintenance) instruction options. +//===----------------------------------------------------------------------===// + +class IC op1, bits<4> crn, bits<4> crm, bits<3> op2, + bit needsreg> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<14> Encoding; + let Encoding{13-11} = op1; + let Encoding{10-7} = crn; + let Encoding{6-3} = crm; + let Encoding{2-0} = op2; + bit NeedsReg = needsreg; +} + +def : IC<"IALLUIS", 0b000, 0b0111, 0b0001, 0b000, 0>; +def : IC<"IALLU", 0b000, 0b0111, 0b0101, 0b000, 0>; +def : IC<"IVAU", 0b011, 0b0111, 0b0101, 0b001, 1>; + +//===----------------------------------------------------------------------===// +// ISB (instruction-fetch barrier) instruction options. +//===----------------------------------------------------------------------===// + +class ISB encoding> : SearchableTable{ + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<4> Encoding; + let Encoding = encoding; +} + +def : ISB<"sy", 0xf>; + +//===----------------------------------------------------------------------===// +// TSB (Trace synchronization barrier) instruction options. +//===----------------------------------------------------------------------===// + +class TSB encoding> : SearchableTable{ + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<4> Encoding; + let Encoding = encoding; + + code Requires = [{ {AArch64::HasV8_4aOps} }]; +} + +def : TSB<"csync", 0>; + +//===----------------------------------------------------------------------===// +// PRFM (prefetch) instruction options. +//===----------------------------------------------------------------------===// + +class PRFM encoding> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<5> Encoding; + let Encoding = encoding; +} + +def : PRFM<"pldl1keep", 0x00>; +def : PRFM<"pldl1strm", 0x01>; +def : PRFM<"pldl2keep", 0x02>; +def : PRFM<"pldl2strm", 0x03>; +def : PRFM<"pldl3keep", 0x04>; +def : PRFM<"pldl3strm", 0x05>; +def : PRFM<"plil1keep", 0x08>; +def : PRFM<"plil1strm", 0x09>; +def : PRFM<"plil2keep", 0x0a>; +def : PRFM<"plil2strm", 0x0b>; +def : PRFM<"plil3keep", 0x0c>; +def : PRFM<"plil3strm", 0x0d>; +def : PRFM<"pstl1keep", 0x10>; +def : PRFM<"pstl1strm", 0x11>; +def : PRFM<"pstl2keep", 0x12>; +def : PRFM<"pstl2strm", 0x13>; +def : PRFM<"pstl3keep", 0x14>; +def : PRFM<"pstl3strm", 0x15>; + +//===----------------------------------------------------------------------===// +// SVE Prefetch instruction options. +//===----------------------------------------------------------------------===// + +class SVEPRFM encoding> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<4> Encoding; + let Encoding = encoding; + code Requires = [{ {} }]; +} + +let Requires = [{ {AArch64::FeatureSVE} }] in { +def : SVEPRFM<"pldl1keep", 0x00>; +def : SVEPRFM<"pldl1strm", 0x01>; +def : SVEPRFM<"pldl2keep", 0x02>; +def : SVEPRFM<"pldl2strm", 0x03>; +def : SVEPRFM<"pldl3keep", 0x04>; +def : SVEPRFM<"pldl3strm", 0x05>; +def : SVEPRFM<"pstl1keep", 0x08>; +def : SVEPRFM<"pstl1strm", 0x09>; +def : SVEPRFM<"pstl2keep", 0x0a>; +def : SVEPRFM<"pstl2strm", 0x0b>; +def : SVEPRFM<"pstl3keep", 0x0c>; +def : SVEPRFM<"pstl3strm", 0x0d>; +} + +//===----------------------------------------------------------------------===// +// SVE Predicate patterns +//===----------------------------------------------------------------------===// + +class SVEPREDPAT encoding> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<5> Encoding; + let Encoding = encoding; +} + +def : SVEPREDPAT<"pow2", 0x00>; +def : SVEPREDPAT<"vl1", 0x01>; +def : SVEPREDPAT<"vl2", 0x02>; +def : SVEPREDPAT<"vl3", 0x03>; +def : SVEPREDPAT<"vl4", 0x04>; +def : SVEPREDPAT<"vl5", 0x05>; +def : SVEPREDPAT<"vl6", 0x06>; +def : SVEPREDPAT<"vl7", 0x07>; +def : SVEPREDPAT<"vl8", 0x08>; +def : SVEPREDPAT<"vl16", 0x09>; +def : SVEPREDPAT<"vl32", 0x0a>; +def : SVEPREDPAT<"vl64", 0x0b>; +def : SVEPREDPAT<"vl128", 0x0c>; +def : SVEPREDPAT<"vl256", 0x0d>; +def : SVEPREDPAT<"mul4", 0x1d>; +def : SVEPREDPAT<"mul3", 0x1e>; +def : SVEPREDPAT<"all", 0x1f>; + +//===----------------------------------------------------------------------===// +// Exact FP Immediates. +// +// These definitions are used to create a lookup table with FP Immediates that +// is used for a few instructions that only accept a limited set of exact FP +// immediates values. +//===----------------------------------------------------------------------===// +class ExactFPImm enum > : SearchableTable { + let SearchableFields = ["Enum", "Repr"]; + let EnumValueField = "Enum"; + + string Name = name; + bits<4> Enum = enum; + string Repr = repr; +} + +def : ExactFPImm<"zero", "0.0", 0x0>; +def : ExactFPImm<"half", "0.5", 0x1>; +def : ExactFPImm<"one", "1.0", 0x2>; +def : ExactFPImm<"two", "2.0", 0x3>; + +//===----------------------------------------------------------------------===// +// PState instruction options. +//===----------------------------------------------------------------------===// + +class PState encoding> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<5> Encoding; + let Encoding = encoding; + code Requires = [{ {} }]; +} + +def : PState<"SPSel", 0b00101>; +def : PState<"DAIFSet", 0b11110>; +def : PState<"DAIFClr", 0b11111>; +// v8.1a "Privileged Access Never" extension-specific PStates +let Requires = [{ {AArch64::HasV8_1aOps} }] in +def : PState<"PAN", 0b00100>; +// v8.2a "User Access Override" extension-specific PStates +let Requires = [{ {AArch64::HasV8_2aOps} }] in +def : PState<"UAO", 0b00011>; +// v8.4a timining insensitivity of data processing instructions +let Requires = [{ {AArch64::HasV8_4aOps} }] in +def : PState<"DIT", 0b11010>; + +//===----------------------------------------------------------------------===// +// PSB instruction options. +//===----------------------------------------------------------------------===// + +class PSB encoding> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<5> Encoding; + let Encoding = encoding; +} + +def : PSB<"csync", 0x11>; + +//===----------------------------------------------------------------------===// +// TLBI (translation lookaside buffer invalidate) instruction options. +//===----------------------------------------------------------------------===// + +class TLBI op1, bits<4> crn, bits<4> crm, + bits<3> op2, bit needsreg = 1> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<14> Encoding; + let Encoding{13-11} = op1; + let Encoding{10-7} = crn; + let Encoding{6-3} = crm; + let Encoding{2-0} = op2; + bit NeedsReg = needsreg; + code Requires = [{ {} }]; +} + +def : TLBI<"IPAS2E1IS", 0b100, 0b1000, 0b0000, 0b001>; +def : TLBI<"IPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b101>; +def : TLBI<"VMALLE1IS", 0b000, 0b1000, 0b0011, 0b000, 0>; +def : TLBI<"ALLE2IS", 0b100, 0b1000, 0b0011, 0b000, 0>; +def : TLBI<"ALLE3IS", 0b110, 0b1000, 0b0011, 0b000, 0>; +def : TLBI<"VAE1IS", 0b000, 0b1000, 0b0011, 0b001>; +def : TLBI<"VAE2IS", 0b100, 0b1000, 0b0011, 0b001>; +def : TLBI<"VAE3IS", 0b110, 0b1000, 0b0011, 0b001>; +def : TLBI<"ASIDE1IS", 0b000, 0b1000, 0b0011, 0b010>; +def : TLBI<"VAAE1IS", 0b000, 0b1000, 0b0011, 0b011>; +def : TLBI<"ALLE1IS", 0b100, 0b1000, 0b0011, 0b100, 0>; +def : TLBI<"VALE1IS", 0b000, 0b1000, 0b0011, 0b101>; +def : TLBI<"VALE2IS", 0b100, 0b1000, 0b0011, 0b101>; +def : TLBI<"VALE3IS", 0b110, 0b1000, 0b0011, 0b101>; +def : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>; +def : TLBI<"VAALE1IS", 0b000, 0b1000, 0b0011, 0b111>; +def : TLBI<"IPAS2E1", 0b100, 0b1000, 0b0100, 0b001>; +def : TLBI<"IPAS2LE1", 0b100, 0b1000, 0b0100, 0b101>; +def : TLBI<"VMALLE1", 0b000, 0b1000, 0b0111, 0b000, 0>; +def : TLBI<"ALLE2", 0b100, 0b1000, 0b0111, 0b000, 0>; +def : TLBI<"ALLE3", 0b110, 0b1000, 0b0111, 0b000, 0>; +def : TLBI<"VAE1", 0b000, 0b1000, 0b0111, 0b001>; +def : TLBI<"VAE2", 0b100, 0b1000, 0b0111, 0b001>; +def : TLBI<"VAE3", 0b110, 0b1000, 0b0111, 0b001>; +def : TLBI<"ASIDE1", 0b000, 0b1000, 0b0111, 0b010>; +def : TLBI<"VAAE1", 0b000, 0b1000, 0b0111, 0b011>; +def : TLBI<"ALLE1", 0b100, 0b1000, 0b0111, 0b100, 0>; +def : TLBI<"VALE1", 0b000, 0b1000, 0b0111, 0b101>; +def : TLBI<"VALE2", 0b100, 0b1000, 0b0111, 0b101>; +def : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>; +def : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>; +def : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>; + +// Armv8.4-A Outer Sharable TLB Maintenance instructions: +let Requires = [{ {AArch64::HasV8_4aOps} }] in { +// op1 CRn CRm op2 +def : TLBI<"VMALLE1OS", 0b000, 0b1000, 0b0001, 0b000, 0>; +def : TLBI<"VAE1OS", 0b000, 0b1000, 0b0001, 0b001>; +def : TLBI<"ASIDE1OS", 0b000, 0b1000, 0b0001, 0b010>; +def : TLBI<"VAAE1OS", 0b000, 0b1000, 0b0001, 0b011>; +def : TLBI<"VALE1OS", 0b000, 0b1000, 0b0001, 0b101>; +def : TLBI<"VAALE1OS", 0b000, 0b1000, 0b0001, 0b111>; +def : TLBI<"IPAS2E1OS", 0b100, 0b1000, 0b0100, 0b000>; +def : TLBI<"IPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b100>; +def : TLBI<"VAE2OS", 0b100, 0b1000, 0b0001, 0b001>; +def : TLBI<"VALE2OS", 0b100, 0b1000, 0b0001, 0b101>; +def : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>; +def : TLBI<"VAE3OS", 0b110, 0b1000, 0b0001, 0b001>; +def : TLBI<"VALE3OS", 0b110, 0b1000, 0b0001, 0b101>; +def : TLBI<"ALLE2OS", 0b100, 0b1000, 0b0001, 0b000, 0>; +def : TLBI<"ALLE1OS", 0b100, 0b1000, 0b0001, 0b100, 0>; +def : TLBI<"ALLE3OS", 0b110, 0b1000, 0b0001, 0b000, 0>; + +// Armv8.4-A TLB Range Maintenance instructions: +// op1 CRn CRm op2 +def : TLBI<"RVAE1", 0b000, 0b1000, 0b0110, 0b001>; +def : TLBI<"RVAAE1", 0b000, 0b1000, 0b0110, 0b011>; +def : TLBI<"RVALE1", 0b000, 0b1000, 0b0110, 0b101>; +def : TLBI<"RVAALE1", 0b000, 0b1000, 0b0110, 0b111>; +def : TLBI<"RVAE1IS", 0b000, 0b1000, 0b0010, 0b001>; +def : TLBI<"RVAAE1IS", 0b000, 0b1000, 0b0010, 0b011>; +def : TLBI<"RVALE1IS", 0b000, 0b1000, 0b0010, 0b101>; +def : TLBI<"RVAALE1IS", 0b000, 0b1000, 0b0010, 0b111>; +def : TLBI<"RVAE1OS", 0b000, 0b1000, 0b0101, 0b001>; +def : TLBI<"RVAAE1OS", 0b000, 0b1000, 0b0101, 0b011>; +def : TLBI<"RVALE1OS", 0b000, 0b1000, 0b0101, 0b101>; +def : TLBI<"RVAALE1OS", 0b000, 0b1000, 0b0101, 0b111>; +def : TLBI<"RIPAS2E1IS", 0b100, 0b1000, 0b0000, 0b010>; +def : TLBI<"RIPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b110>; +def : TLBI<"RIPAS2E1", 0b100, 0b1000, 0b0100, 0b010>; +def : TLBI<"RIPAS2LE1", 0b100, 0b1000, 0b0100, 0b110>; +def : TLBI<"RIPAS2E1OS", 0b100, 0b1000, 0b0100, 0b011>; +def : TLBI<"RIPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b111>; +def : TLBI<"RVAE2", 0b100, 0b1000, 0b0110, 0b001>; +def : TLBI<"RVALE2", 0b100, 0b1000, 0b0110, 0b101>; +def : TLBI<"RVAE2IS", 0b100, 0b1000, 0b0010, 0b001>; +def : TLBI<"RVALE2IS", 0b100, 0b1000, 0b0010, 0b101>; +def : TLBI<"RVAE2OS", 0b100, 0b1000, 0b0101, 0b001>; +def : TLBI<"RVALE2OS", 0b100, 0b1000, 0b0101, 0b101>; +def : TLBI<"RVAE3", 0b110, 0b1000, 0b0110, 0b001>; +def : TLBI<"RVALE3", 0b110, 0b1000, 0b0110, 0b101>; +def : TLBI<"RVAE3IS", 0b110, 0b1000, 0b0010, 0b001>; +def : TLBI<"RVALE3IS", 0b110, 0b1000, 0b0010, 0b101>; +def : TLBI<"RVAE3OS", 0b110, 0b1000, 0b0101, 0b001>; +def : TLBI<"RVALE3OS", 0b110, 0b1000, 0b0101, 0b101>; +} + +//===----------------------------------------------------------------------===// +// MRS/MSR (system register read/write) instruction options. +//===----------------------------------------------------------------------===// + +class SysReg op0, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<16> Encoding; + let Encoding{15-14} = op0; + let Encoding{13-11} = op1; + let Encoding{10-7} = crn; + let Encoding{6-3} = crm; + let Encoding{2-0} = op2; + bit Readable = ?; + bit Writeable = ?; + code Requires = [{ {} }]; +} + +class RWSysReg op0, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2> + : SysReg { + let Readable = 1; + let Writeable = 1; +} + +class ROSysReg op0, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2> + : SysReg { + let Readable = 1; + let Writeable = 0; +} + +class WOSysReg op0, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2> + : SysReg { + let Readable = 0; + let Writeable = 1; +} + +//===---------------------- +// Read-only regs +//===---------------------- + +// Op0 Op1 CRn CRm Op2 +def : ROSysReg<"MDCCSR_EL0", 0b10, 0b011, 0b0000, 0b0001, 0b000>; +def : ROSysReg<"DBGDTRRX_EL0", 0b10, 0b011, 0b0000, 0b0101, 0b000>; +def : ROSysReg<"MDRAR_EL1", 0b10, 0b000, 0b0001, 0b0000, 0b000>; +def : ROSysReg<"OSLSR_EL1", 0b10, 0b000, 0b0001, 0b0001, 0b100>; +def : ROSysReg<"DBGAUTHSTATUS_EL1", 0b10, 0b000, 0b0111, 0b1110, 0b110>; +def : ROSysReg<"PMCEID0_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b110>; +def : ROSysReg<"PMCEID1_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b111>; +def : ROSysReg<"MIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b000>; +def : ROSysReg<"CCSIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b000>; +def : ROSysReg<"CCSIDR2_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b010> { + let Requires = [{ {AArch64::HasV8_3aOps} }]; +} +def : ROSysReg<"CLIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b001>; +def : ROSysReg<"CTR_EL0", 0b11, 0b011, 0b0000, 0b0000, 0b001>; +def : ROSysReg<"MPIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b101>; +def : ROSysReg<"REVIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b110>; +def : ROSysReg<"AIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b111>; +def : ROSysReg<"DCZID_EL0", 0b11, 0b011, 0b0000, 0b0000, 0b111>; +def : ROSysReg<"ID_PFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b000>; +def : ROSysReg<"ID_PFR1_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b001>; +def : ROSysReg<"ID_DFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b010>; +def : ROSysReg<"ID_AFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b011>; +def : ROSysReg<"ID_MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b100>; +def : ROSysReg<"ID_MMFR1_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b101>; +def : ROSysReg<"ID_MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b110>; +def : ROSysReg<"ID_MMFR3_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b111>; +def : ROSysReg<"ID_ISAR0_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b000>; +def : ROSysReg<"ID_ISAR1_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b001>; +def : ROSysReg<"ID_ISAR2_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b010>; +def : ROSysReg<"ID_ISAR3_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b011>; +def : ROSysReg<"ID_ISAR4_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b100>; +def : ROSysReg<"ID_ISAR5_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b101>; +def : ROSysReg<"ID_ISAR6_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b111> { + let Requires = [{ {AArch64::HasV8_2aOps} }]; +} +def : ROSysReg<"ID_AA64PFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b000>; +def : ROSysReg<"ID_AA64PFR1_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b001>; +def : ROSysReg<"ID_AA64DFR0_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b000>; +def : ROSysReg<"ID_AA64DFR1_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b001>; +def : ROSysReg<"ID_AA64AFR0_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b100>; +def : ROSysReg<"ID_AA64AFR1_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b101>; +def : ROSysReg<"ID_AA64ISAR0_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b000>; +def : ROSysReg<"ID_AA64ISAR1_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b001>; +def : ROSysReg<"ID_AA64MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b000>; +def : ROSysReg<"ID_AA64MMFR1_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b001>; +def : ROSysReg<"ID_AA64MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b010> { + let Requires = [{ {AArch64::HasV8_2aOps} }]; +} +def : ROSysReg<"MVFR0_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b000>; +def : ROSysReg<"MVFR1_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b001>; +def : ROSysReg<"MVFR2_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b010>; +def : ROSysReg<"RVBAR_EL1", 0b11, 0b000, 0b1100, 0b0000, 0b001>; +def : ROSysReg<"RVBAR_EL2", 0b11, 0b100, 0b1100, 0b0000, 0b001>; +def : ROSysReg<"RVBAR_EL3", 0b11, 0b110, 0b1100, 0b0000, 0b001>; +def : ROSysReg<"ISR_EL1", 0b11, 0b000, 0b1100, 0b0001, 0b000>; +def : ROSysReg<"CNTPCT_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b001>; +def : ROSysReg<"CNTVCT_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b010>; +def : ROSysReg<"ID_MMFR4_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b110>; + +// Trace registers +// Op0 Op1 CRn CRm Op2 +def : ROSysReg<"TRCSTATR", 0b10, 0b001, 0b0000, 0b0011, 0b000>; +def : ROSysReg<"TRCIDR8", 0b10, 0b001, 0b0000, 0b0000, 0b110>; +def : ROSysReg<"TRCIDR9", 0b10, 0b001, 0b0000, 0b0001, 0b110>; +def : ROSysReg<"TRCIDR10", 0b10, 0b001, 0b0000, 0b0010, 0b110>; +def : ROSysReg<"TRCIDR11", 0b10, 0b001, 0b0000, 0b0011, 0b110>; +def : ROSysReg<"TRCIDR12", 0b10, 0b001, 0b0000, 0b0100, 0b110>; +def : ROSysReg<"TRCIDR13", 0b10, 0b001, 0b0000, 0b0101, 0b110>; +def : ROSysReg<"TRCIDR0", 0b10, 0b001, 0b0000, 0b1000, 0b111>; +def : ROSysReg<"TRCIDR1", 0b10, 0b001, 0b0000, 0b1001, 0b111>; +def : ROSysReg<"TRCIDR2", 0b10, 0b001, 0b0000, 0b1010, 0b111>; +def : ROSysReg<"TRCIDR3", 0b10, 0b001, 0b0000, 0b1011, 0b111>; +def : ROSysReg<"TRCIDR4", 0b10, 0b001, 0b0000, 0b1100, 0b111>; +def : ROSysReg<"TRCIDR5", 0b10, 0b001, 0b0000, 0b1101, 0b111>; +def : ROSysReg<"TRCIDR6", 0b10, 0b001, 0b0000, 0b1110, 0b111>; +def : ROSysReg<"TRCIDR7", 0b10, 0b001, 0b0000, 0b1111, 0b111>; +def : ROSysReg<"TRCOSLSR", 0b10, 0b001, 0b0001, 0b0001, 0b100>; +def : ROSysReg<"TRCPDSR", 0b10, 0b001, 0b0001, 0b0101, 0b100>; +def : ROSysReg<"TRCDEVAFF0", 0b10, 0b001, 0b0111, 0b1010, 0b110>; +def : ROSysReg<"TRCDEVAFF1", 0b10, 0b001, 0b0111, 0b1011, 0b110>; +def : ROSysReg<"TRCLSR", 0b10, 0b001, 0b0111, 0b1101, 0b110>; +def : ROSysReg<"TRCAUTHSTATUS", 0b10, 0b001, 0b0111, 0b1110, 0b110>; +def : ROSysReg<"TRCDEVARCH", 0b10, 0b001, 0b0111, 0b1111, 0b110>; +def : ROSysReg<"TRCDEVID", 0b10, 0b001, 0b0111, 0b0010, 0b111>; +def : ROSysReg<"TRCDEVTYPE", 0b10, 0b001, 0b0111, 0b0011, 0b111>; +def : ROSysReg<"TRCPIDR4", 0b10, 0b001, 0b0111, 0b0100, 0b111>; +def : ROSysReg<"TRCPIDR5", 0b10, 0b001, 0b0111, 0b0101, 0b111>; +def : ROSysReg<"TRCPIDR6", 0b10, 0b001, 0b0111, 0b0110, 0b111>; +def : ROSysReg<"TRCPIDR7", 0b10, 0b001, 0b0111, 0b0111, 0b111>; +def : ROSysReg<"TRCPIDR0", 0b10, 0b001, 0b0111, 0b1000, 0b111>; +def : ROSysReg<"TRCPIDR1", 0b10, 0b001, 0b0111, 0b1001, 0b111>; +def : ROSysReg<"TRCPIDR2", 0b10, 0b001, 0b0111, 0b1010, 0b111>; +def : ROSysReg<"TRCPIDR3", 0b10, 0b001, 0b0111, 0b1011, 0b111>; +def : ROSysReg<"TRCCIDR0", 0b10, 0b001, 0b0111, 0b1100, 0b111>; +def : ROSysReg<"TRCCIDR1", 0b10, 0b001, 0b0111, 0b1101, 0b111>; +def : ROSysReg<"TRCCIDR2", 0b10, 0b001, 0b0111, 0b1110, 0b111>; +def : ROSysReg<"TRCCIDR3", 0b10, 0b001, 0b0111, 0b1111, 0b111>; + +// GICv3 registers +// Op0 Op1 CRn CRm Op2 +def : ROSysReg<"ICC_IAR1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b000>; +def : ROSysReg<"ICC_IAR0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b000>; +def : ROSysReg<"ICC_HPPIR1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b010>; +def : ROSysReg<"ICC_HPPIR0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b010>; +def : ROSysReg<"ICC_RPR_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b011>; +def : ROSysReg<"ICH_VTR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b001>; +def : ROSysReg<"ICH_EISR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b011>; +def : ROSysReg<"ICH_ELRSR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b101>; + +// SVE control registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::FeatureSVE} }] in { +def : ROSysReg<"ID_AA64ZFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b100>; +} + +// v8.1a "Limited Ordering Regions" extension-specific system register +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::HasV8_1aOps} }] in +def : ROSysReg<"LORID_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b111>; + +// v8.2a "RAS extension" registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::FeatureRAS} }] in { +def : ROSysReg<"ERRIDR_EL1", 0b11, 0b000, 0b0101, 0b0011, 0b000>; +def : ROSysReg<"ERXFR_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b000>; +} + +//===---------------------- +// Write-only regs +//===---------------------- + +// Op0 Op1 CRn CRm Op2 +def : WOSysReg<"DBGDTRTX_EL0", 0b10, 0b011, 0b0000, 0b0101, 0b000>; +def : WOSysReg<"OSLAR_EL1", 0b10, 0b000, 0b0001, 0b0000, 0b100>; +def : WOSysReg<"PMSWINC_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b100>; + +// Trace Registers +// Op0 Op1 CRn CRm Op2 +def : WOSysReg<"TRCOSLAR", 0b10, 0b001, 0b0001, 0b0000, 0b100>; +def : WOSysReg<"TRCLAR", 0b10, 0b001, 0b0111, 0b1100, 0b110>; + +// GICv3 registers +// Op0 Op1 CRn CRm Op2 +def : WOSysReg<"ICC_EOIR1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b001>; +def : WOSysReg<"ICC_EOIR0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b001>; +def : WOSysReg<"ICC_DIR_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b001>; +def : WOSysReg<"ICC_SGI1R_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b101>; +def : WOSysReg<"ICC_ASGI1R_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b110>; +def : WOSysReg<"ICC_SGI0R_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b111>; + +//===---------------------- +// Read-write regs +//===---------------------- + +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"OSDTRRX_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b010>; +def : RWSysReg<"OSDTRTX_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b010>; +def : RWSysReg<"TEECR32_EL1", 0b10, 0b010, 0b0000, 0b0000, 0b000>; +def : RWSysReg<"MDCCINT_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b000>; +def : RWSysReg<"MDSCR_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b010>; +def : RWSysReg<"DBGDTR_EL0", 0b10, 0b011, 0b0000, 0b0100, 0b000>; +def : RWSysReg<"OSECCR_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b010>; +def : RWSysReg<"DBGVCR32_EL2", 0b10, 0b100, 0b0000, 0b0111, 0b000>; +def : RWSysReg<"DBGBVR0_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b100>; +def : RWSysReg<"DBGBVR1_EL1", 0b10, 0b000, 0b0000, 0b0001, 0b100>; +def : RWSysReg<"DBGBVR2_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b100>; +def : RWSysReg<"DBGBVR3_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b100>; +def : RWSysReg<"DBGBVR4_EL1", 0b10, 0b000, 0b0000, 0b0100, 0b100>; +def : RWSysReg<"DBGBVR5_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b100>; +def : RWSysReg<"DBGBVR6_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b100>; +def : RWSysReg<"DBGBVR7_EL1", 0b10, 0b000, 0b0000, 0b0111, 0b100>; +def : RWSysReg<"DBGBVR8_EL1", 0b10, 0b000, 0b0000, 0b1000, 0b100>; +def : RWSysReg<"DBGBVR9_EL1", 0b10, 0b000, 0b0000, 0b1001, 0b100>; +def : RWSysReg<"DBGBVR10_EL1", 0b10, 0b000, 0b0000, 0b1010, 0b100>; +def : RWSysReg<"DBGBVR11_EL1", 0b10, 0b000, 0b0000, 0b1011, 0b100>; +def : RWSysReg<"DBGBVR12_EL1", 0b10, 0b000, 0b0000, 0b1100, 0b100>; +def : RWSysReg<"DBGBVR13_EL1", 0b10, 0b000, 0b0000, 0b1101, 0b100>; +def : RWSysReg<"DBGBVR14_EL1", 0b10, 0b000, 0b0000, 0b1110, 0b100>; +def : RWSysReg<"DBGBVR15_EL1", 0b10, 0b000, 0b0000, 0b1111, 0b100>; +def : RWSysReg<"DBGBCR0_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b101>; +def : RWSysReg<"DBGBCR1_EL1", 0b10, 0b000, 0b0000, 0b0001, 0b101>; +def : RWSysReg<"DBGBCR2_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b101>; +def : RWSysReg<"DBGBCR3_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b101>; +def : RWSysReg<"DBGBCR4_EL1", 0b10, 0b000, 0b0000, 0b0100, 0b101>; +def : RWSysReg<"DBGBCR5_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b101>; +def : RWSysReg<"DBGBCR6_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b101>; +def : RWSysReg<"DBGBCR7_EL1", 0b10, 0b000, 0b0000, 0b0111, 0b101>; +def : RWSysReg<"DBGBCR8_EL1", 0b10, 0b000, 0b0000, 0b1000, 0b101>; +def : RWSysReg<"DBGBCR9_EL1", 0b10, 0b000, 0b0000, 0b1001, 0b101>; +def : RWSysReg<"DBGBCR10_EL1", 0b10, 0b000, 0b0000, 0b1010, 0b101>; +def : RWSysReg<"DBGBCR11_EL1", 0b10, 0b000, 0b0000, 0b1011, 0b101>; +def : RWSysReg<"DBGBCR12_EL1", 0b10, 0b000, 0b0000, 0b1100, 0b101>; +def : RWSysReg<"DBGBCR13_EL1", 0b10, 0b000, 0b0000, 0b1101, 0b101>; +def : RWSysReg<"DBGBCR14_EL1", 0b10, 0b000, 0b0000, 0b1110, 0b101>; +def : RWSysReg<"DBGBCR15_EL1", 0b10, 0b000, 0b0000, 0b1111, 0b101>; +def : RWSysReg<"DBGWVR0_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b110>; +def : RWSysReg<"DBGWVR1_EL1", 0b10, 0b000, 0b0000, 0b0001, 0b110>; +def : RWSysReg<"DBGWVR2_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b110>; +def : RWSysReg<"DBGWVR3_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b110>; +def : RWSysReg<"DBGWVR4_EL1", 0b10, 0b000, 0b0000, 0b0100, 0b110>; +def : RWSysReg<"DBGWVR5_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b110>; +def : RWSysReg<"DBGWVR6_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b110>; +def : RWSysReg<"DBGWVR7_EL1", 0b10, 0b000, 0b0000, 0b0111, 0b110>; +def : RWSysReg<"DBGWVR8_EL1", 0b10, 0b000, 0b0000, 0b1000, 0b110>; +def : RWSysReg<"DBGWVR9_EL1", 0b10, 0b000, 0b0000, 0b1001, 0b110>; +def : RWSysReg<"DBGWVR10_EL1", 0b10, 0b000, 0b0000, 0b1010, 0b110>; +def : RWSysReg<"DBGWVR11_EL1", 0b10, 0b000, 0b0000, 0b1011, 0b110>; +def : RWSysReg<"DBGWVR12_EL1", 0b10, 0b000, 0b0000, 0b1100, 0b110>; +def : RWSysReg<"DBGWVR13_EL1", 0b10, 0b000, 0b0000, 0b1101, 0b110>; +def : RWSysReg<"DBGWVR14_EL1", 0b10, 0b000, 0b0000, 0b1110, 0b110>; +def : RWSysReg<"DBGWVR15_EL1", 0b10, 0b000, 0b0000, 0b1111, 0b110>; +def : RWSysReg<"DBGWCR0_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b111>; +def : RWSysReg<"DBGWCR1_EL1", 0b10, 0b000, 0b0000, 0b0001, 0b111>; +def : RWSysReg<"DBGWCR2_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b111>; +def : RWSysReg<"DBGWCR3_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b111>; +def : RWSysReg<"DBGWCR4_EL1", 0b10, 0b000, 0b0000, 0b0100, 0b111>; +def : RWSysReg<"DBGWCR5_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b111>; +def : RWSysReg<"DBGWCR6_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b111>; +def : RWSysReg<"DBGWCR7_EL1", 0b10, 0b000, 0b0000, 0b0111, 0b111>; +def : RWSysReg<"DBGWCR8_EL1", 0b10, 0b000, 0b0000, 0b1000, 0b111>; +def : RWSysReg<"DBGWCR9_EL1", 0b10, 0b000, 0b0000, 0b1001, 0b111>; +def : RWSysReg<"DBGWCR10_EL1", 0b10, 0b000, 0b0000, 0b1010, 0b111>; +def : RWSysReg<"DBGWCR11_EL1", 0b10, 0b000, 0b0000, 0b1011, 0b111>; +def : RWSysReg<"DBGWCR12_EL1", 0b10, 0b000, 0b0000, 0b1100, 0b111>; +def : RWSysReg<"DBGWCR13_EL1", 0b10, 0b000, 0b0000, 0b1101, 0b111>; +def : RWSysReg<"DBGWCR14_EL1", 0b10, 0b000, 0b0000, 0b1110, 0b111>; +def : RWSysReg<"DBGWCR15_EL1", 0b10, 0b000, 0b0000, 0b1111, 0b111>; +def : RWSysReg<"TEEHBR32_EL1", 0b10, 0b010, 0b0001, 0b0000, 0b000>; +def : RWSysReg<"OSDLR_EL1", 0b10, 0b000, 0b0001, 0b0011, 0b100>; +def : RWSysReg<"DBGPRCR_EL1", 0b10, 0b000, 0b0001, 0b0100, 0b100>; +def : RWSysReg<"DBGCLAIMSET_EL1", 0b10, 0b000, 0b0111, 0b1000, 0b110>; +def : RWSysReg<"DBGCLAIMCLR_EL1", 0b10, 0b000, 0b0111, 0b1001, 0b110>; +def : RWSysReg<"CSSELR_EL1", 0b11, 0b010, 0b0000, 0b0000, 0b000>; +def : RWSysReg<"VPIDR_EL2", 0b11, 0b100, 0b0000, 0b0000, 0b000>; +def : RWSysReg<"VMPIDR_EL2", 0b11, 0b100, 0b0000, 0b0000, 0b101>; +def : RWSysReg<"CPACR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b010>; +def : RWSysReg<"SCTLR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b000>; +def : RWSysReg<"SCTLR_EL2", 0b11, 0b100, 0b0001, 0b0000, 0b000>; +def : RWSysReg<"SCTLR_EL3", 0b11, 0b110, 0b0001, 0b0000, 0b000>; +def : RWSysReg<"ACTLR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b001>; +def : RWSysReg<"ACTLR_EL2", 0b11, 0b100, 0b0001, 0b0000, 0b001>; +def : RWSysReg<"ACTLR_EL3", 0b11, 0b110, 0b0001, 0b0000, 0b001>; +def : RWSysReg<"HCR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b000>; +def : RWSysReg<"SCR_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b000>; +def : RWSysReg<"MDCR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b001>; +def : RWSysReg<"SDER32_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b001>; +def : RWSysReg<"CPTR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b010>; +def : RWSysReg<"CPTR_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b010>; +def : RWSysReg<"HSTR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b011>; +def : RWSysReg<"HACR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b111>; +def : RWSysReg<"MDCR_EL3", 0b11, 0b110, 0b0001, 0b0011, 0b001>; +def : RWSysReg<"TTBR0_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b000>; +def : RWSysReg<"TTBR0_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000>; +def : RWSysReg<"TTBR0_EL3", 0b11, 0b110, 0b0010, 0b0000, 0b000>; +def : RWSysReg<"TTBR1_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b001>; +def : RWSysReg<"TCR_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b010>; +def : RWSysReg<"TCR_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b010>; +def : RWSysReg<"TCR_EL3", 0b11, 0b110, 0b0010, 0b0000, 0b010>; +def : RWSysReg<"VTTBR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b000>; +def : RWSysReg<"VTCR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b010>; +def : RWSysReg<"DACR32_EL2", 0b11, 0b100, 0b0011, 0b0000, 0b000>; +def : RWSysReg<"SPSR_EL1", 0b11, 0b000, 0b0100, 0b0000, 0b000>; +def : RWSysReg<"SPSR_EL2", 0b11, 0b100, 0b0100, 0b0000, 0b000>; +def : RWSysReg<"SPSR_EL3", 0b11, 0b110, 0b0100, 0b0000, 0b000>; +def : RWSysReg<"ELR_EL1", 0b11, 0b000, 0b0100, 0b0000, 0b001>; +def : RWSysReg<"ELR_EL2", 0b11, 0b100, 0b0100, 0b0000, 0b001>; +def : RWSysReg<"ELR_EL3", 0b11, 0b110, 0b0100, 0b0000, 0b001>; +def : RWSysReg<"SP_EL0", 0b11, 0b000, 0b0100, 0b0001, 0b000>; +def : RWSysReg<"SP_EL1", 0b11, 0b100, 0b0100, 0b0001, 0b000>; +def : RWSysReg<"SP_EL2", 0b11, 0b110, 0b0100, 0b0001, 0b000>; +def : RWSysReg<"SPSel", 0b11, 0b000, 0b0100, 0b0010, 0b000>; +def : RWSysReg<"NZCV", 0b11, 0b011, 0b0100, 0b0010, 0b000>; +def : RWSysReg<"DAIF", 0b11, 0b011, 0b0100, 0b0010, 0b001>; +def : RWSysReg<"CurrentEL", 0b11, 0b000, 0b0100, 0b0010, 0b010>; +def : RWSysReg<"SPSR_irq", 0b11, 0b100, 0b0100, 0b0011, 0b000>; +def : RWSysReg<"SPSR_abt", 0b11, 0b100, 0b0100, 0b0011, 0b001>; +def : RWSysReg<"SPSR_und", 0b11, 0b100, 0b0100, 0b0011, 0b010>; +def : RWSysReg<"SPSR_fiq", 0b11, 0b100, 0b0100, 0b0011, 0b011>; +def : RWSysReg<"FPCR", 0b11, 0b011, 0b0100, 0b0100, 0b000>; +def : RWSysReg<"FPSR", 0b11, 0b011, 0b0100, 0b0100, 0b001>; +def : RWSysReg<"DSPSR_EL0", 0b11, 0b011, 0b0100, 0b0101, 0b000>; +def : RWSysReg<"DLR_EL0", 0b11, 0b011, 0b0100, 0b0101, 0b001>; +def : RWSysReg<"IFSR32_EL2", 0b11, 0b100, 0b0101, 0b0000, 0b001>; +def : RWSysReg<"AFSR0_EL1", 0b11, 0b000, 0b0101, 0b0001, 0b000>; +def : RWSysReg<"AFSR0_EL2", 0b11, 0b100, 0b0101, 0b0001, 0b000>; +def : RWSysReg<"AFSR0_EL3", 0b11, 0b110, 0b0101, 0b0001, 0b000>; +def : RWSysReg<"AFSR1_EL1", 0b11, 0b000, 0b0101, 0b0001, 0b001>; +def : RWSysReg<"AFSR1_EL2", 0b11, 0b100, 0b0101, 0b0001, 0b001>; +def : RWSysReg<"AFSR1_EL3", 0b11, 0b110, 0b0101, 0b0001, 0b001>; +def : RWSysReg<"ESR_EL1", 0b11, 0b000, 0b0101, 0b0010, 0b000>; +def : RWSysReg<"ESR_EL2", 0b11, 0b100, 0b0101, 0b0010, 0b000>; +def : RWSysReg<"ESR_EL3", 0b11, 0b110, 0b0101, 0b0010, 0b000>; +def : RWSysReg<"FPEXC32_EL2", 0b11, 0b100, 0b0101, 0b0011, 0b000>; +def : RWSysReg<"FAR_EL1", 0b11, 0b000, 0b0110, 0b0000, 0b000>; +def : RWSysReg<"FAR_EL2", 0b11, 0b100, 0b0110, 0b0000, 0b000>; +def : RWSysReg<"FAR_EL3", 0b11, 0b110, 0b0110, 0b0000, 0b000>; +def : RWSysReg<"HPFAR_EL2", 0b11, 0b100, 0b0110, 0b0000, 0b100>; +def : RWSysReg<"PAR_EL1", 0b11, 0b000, 0b0111, 0b0100, 0b000>; +def : RWSysReg<"PMCR_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b000>; +def : RWSysReg<"PMCNTENSET_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b001>; +def : RWSysReg<"PMCNTENCLR_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b010>; +def : RWSysReg<"PMOVSCLR_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b011>; +def : RWSysReg<"PMSELR_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b101>; +def : RWSysReg<"PMCCNTR_EL0", 0b11, 0b011, 0b1001, 0b1101, 0b000>; +def : RWSysReg<"PMXEVTYPER_EL0", 0b11, 0b011, 0b1001, 0b1101, 0b001>; +def : RWSysReg<"PMXEVCNTR_EL0", 0b11, 0b011, 0b1001, 0b1101, 0b010>; +def : RWSysReg<"PMUSERENR_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b000>; +def : RWSysReg<"PMINTENSET_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b001>; +def : RWSysReg<"PMINTENCLR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b010>; +def : RWSysReg<"PMOVSSET_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b011>; +def : RWSysReg<"MAIR_EL1", 0b11, 0b000, 0b1010, 0b0010, 0b000>; +def : RWSysReg<"MAIR_EL2", 0b11, 0b100, 0b1010, 0b0010, 0b000>; +def : RWSysReg<"MAIR_EL3", 0b11, 0b110, 0b1010, 0b0010, 0b000>; +def : RWSysReg<"AMAIR_EL1", 0b11, 0b000, 0b1010, 0b0011, 0b000>; +def : RWSysReg<"AMAIR_EL2", 0b11, 0b100, 0b1010, 0b0011, 0b000>; +def : RWSysReg<"AMAIR_EL3", 0b11, 0b110, 0b1010, 0b0011, 0b000>; +def : RWSysReg<"VBAR_EL1", 0b11, 0b000, 0b1100, 0b0000, 0b000>; +def : RWSysReg<"VBAR_EL2", 0b11, 0b100, 0b1100, 0b0000, 0b000>; +def : RWSysReg<"VBAR_EL3", 0b11, 0b110, 0b1100, 0b0000, 0b000>; +def : RWSysReg<"RMR_EL1", 0b11, 0b000, 0b1100, 0b0000, 0b010>; +def : RWSysReg<"RMR_EL2", 0b11, 0b100, 0b1100, 0b0000, 0b010>; +def : RWSysReg<"RMR_EL3", 0b11, 0b110, 0b1100, 0b0000, 0b010>; +def : RWSysReg<"CONTEXTIDR_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b001>; +def : RWSysReg<"TPIDR_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b010>; +def : RWSysReg<"TPIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b010>; +def : RWSysReg<"TPIDR_EL3", 0b11, 0b110, 0b1101, 0b0000, 0b010>; +def : RWSysReg<"TPIDRRO_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b011>; +def : RWSysReg<"TPIDR_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b100>; +def : RWSysReg<"CNTFRQ_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b000>; +def : RWSysReg<"CNTVOFF_EL2", 0b11, 0b100, 0b1110, 0b0000, 0b011>; +def : RWSysReg<"CNTKCTL_EL1", 0b11, 0b000, 0b1110, 0b0001, 0b000>; +def : RWSysReg<"CNTHCTL_EL2", 0b11, 0b100, 0b1110, 0b0001, 0b000>; +def : RWSysReg<"CNTP_TVAL_EL0", 0b11, 0b011, 0b1110, 0b0010, 0b000>; +def : RWSysReg<"CNTHP_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0010, 0b000>; +def : RWSysReg<"CNTPS_TVAL_EL1", 0b11, 0b111, 0b1110, 0b0010, 0b000>; +def : RWSysReg<"CNTP_CTL_EL0", 0b11, 0b011, 0b1110, 0b0010, 0b001>; +def : RWSysReg<"CNTHP_CTL_EL2", 0b11, 0b100, 0b1110, 0b0010, 0b001>; +def : RWSysReg<"CNTPS_CTL_EL1", 0b11, 0b111, 0b1110, 0b0010, 0b001>; +def : RWSysReg<"CNTP_CVAL_EL0", 0b11, 0b011, 0b1110, 0b0010, 0b010>; +def : RWSysReg<"CNTHP_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0010, 0b010>; +def : RWSysReg<"CNTPS_CVAL_EL1", 0b11, 0b111, 0b1110, 0b0010, 0b010>; +def : RWSysReg<"CNTV_TVAL_EL0", 0b11, 0b011, 0b1110, 0b0011, 0b000>; +def : RWSysReg<"CNTV_CTL_EL0", 0b11, 0b011, 0b1110, 0b0011, 0b001>; +def : RWSysReg<"CNTV_CVAL_EL0", 0b11, 0b011, 0b1110, 0b0011, 0b010>; +def : RWSysReg<"PMEVCNTR0_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b000>; +def : RWSysReg<"PMEVCNTR1_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b001>; +def : RWSysReg<"PMEVCNTR2_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b010>; +def : RWSysReg<"PMEVCNTR3_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b011>; +def : RWSysReg<"PMEVCNTR4_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b100>; +def : RWSysReg<"PMEVCNTR5_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b101>; +def : RWSysReg<"PMEVCNTR6_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b110>; +def : RWSysReg<"PMEVCNTR7_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b111>; +def : RWSysReg<"PMEVCNTR8_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b000>; +def : RWSysReg<"PMEVCNTR9_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b001>; +def : RWSysReg<"PMEVCNTR10_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b010>; +def : RWSysReg<"PMEVCNTR11_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b011>; +def : RWSysReg<"PMEVCNTR12_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b100>; +def : RWSysReg<"PMEVCNTR13_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b101>; +def : RWSysReg<"PMEVCNTR14_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b110>; +def : RWSysReg<"PMEVCNTR15_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b111>; +def : RWSysReg<"PMEVCNTR16_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b000>; +def : RWSysReg<"PMEVCNTR17_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b001>; +def : RWSysReg<"PMEVCNTR18_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b010>; +def : RWSysReg<"PMEVCNTR19_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b011>; +def : RWSysReg<"PMEVCNTR20_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b100>; +def : RWSysReg<"PMEVCNTR21_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b101>; +def : RWSysReg<"PMEVCNTR22_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b110>; +def : RWSysReg<"PMEVCNTR23_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b111>; +def : RWSysReg<"PMEVCNTR24_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b000>; +def : RWSysReg<"PMEVCNTR25_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b001>; +def : RWSysReg<"PMEVCNTR26_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b010>; +def : RWSysReg<"PMEVCNTR27_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b011>; +def : RWSysReg<"PMEVCNTR28_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b100>; +def : RWSysReg<"PMEVCNTR29_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b101>; +def : RWSysReg<"PMEVCNTR30_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b110>; +def : RWSysReg<"PMCCFILTR_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b111>; +def : RWSysReg<"PMEVTYPER0_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b000>; +def : RWSysReg<"PMEVTYPER1_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b001>; +def : RWSysReg<"PMEVTYPER2_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b010>; +def : RWSysReg<"PMEVTYPER3_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b011>; +def : RWSysReg<"PMEVTYPER4_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b100>; +def : RWSysReg<"PMEVTYPER5_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b101>; +def : RWSysReg<"PMEVTYPER6_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b110>; +def : RWSysReg<"PMEVTYPER7_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b111>; +def : RWSysReg<"PMEVTYPER8_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b000>; +def : RWSysReg<"PMEVTYPER9_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b001>; +def : RWSysReg<"PMEVTYPER10_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b010>; +def : RWSysReg<"PMEVTYPER11_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b011>; +def : RWSysReg<"PMEVTYPER12_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b100>; +def : RWSysReg<"PMEVTYPER13_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b101>; +def : RWSysReg<"PMEVTYPER14_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b110>; +def : RWSysReg<"PMEVTYPER15_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b111>; +def : RWSysReg<"PMEVTYPER16_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b000>; +def : RWSysReg<"PMEVTYPER17_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b001>; +def : RWSysReg<"PMEVTYPER18_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b010>; +def : RWSysReg<"PMEVTYPER19_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b011>; +def : RWSysReg<"PMEVTYPER20_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b100>; +def : RWSysReg<"PMEVTYPER21_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b101>; +def : RWSysReg<"PMEVTYPER22_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b110>; +def : RWSysReg<"PMEVTYPER23_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b111>; +def : RWSysReg<"PMEVTYPER24_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b000>; +def : RWSysReg<"PMEVTYPER25_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b001>; +def : RWSysReg<"PMEVTYPER26_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b010>; +def : RWSysReg<"PMEVTYPER27_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b011>; +def : RWSysReg<"PMEVTYPER28_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b100>; +def : RWSysReg<"PMEVTYPER29_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b101>; +def : RWSysReg<"PMEVTYPER30_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b110>; + +// Trace registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"TRCPRGCTLR", 0b10, 0b001, 0b0000, 0b0001, 0b000>; +def : RWSysReg<"TRCPROCSELR", 0b10, 0b001, 0b0000, 0b0010, 0b000>; +def : RWSysReg<"TRCCONFIGR", 0b10, 0b001, 0b0000, 0b0100, 0b000>; +def : RWSysReg<"TRCAUXCTLR", 0b10, 0b001, 0b0000, 0b0110, 0b000>; +def : RWSysReg<"TRCEVENTCTL0R", 0b10, 0b001, 0b0000, 0b1000, 0b000>; +def : RWSysReg<"TRCEVENTCTL1R", 0b10, 0b001, 0b0000, 0b1001, 0b000>; +def : RWSysReg<"TRCSTALLCTLR", 0b10, 0b001, 0b0000, 0b1011, 0b000>; +def : RWSysReg<"TRCTSCTLR", 0b10, 0b001, 0b0000, 0b1100, 0b000>; +def : RWSysReg<"TRCSYNCPR", 0b10, 0b001, 0b0000, 0b1101, 0b000>; +def : RWSysReg<"TRCCCCTLR", 0b10, 0b001, 0b0000, 0b1110, 0b000>; +def : RWSysReg<"TRCBBCTLR", 0b10, 0b001, 0b0000, 0b1111, 0b000>; +def : RWSysReg<"TRCTRACEIDR", 0b10, 0b001, 0b0000, 0b0000, 0b001>; +def : RWSysReg<"TRCQCTLR", 0b10, 0b001, 0b0000, 0b0001, 0b001>; +def : RWSysReg<"TRCVICTLR", 0b10, 0b001, 0b0000, 0b0000, 0b010>; +def : RWSysReg<"TRCVIIECTLR", 0b10, 0b001, 0b0000, 0b0001, 0b010>; +def : RWSysReg<"TRCVISSCTLR", 0b10, 0b001, 0b0000, 0b0010, 0b010>; +def : RWSysReg<"TRCVIPCSSCTLR", 0b10, 0b001, 0b0000, 0b0011, 0b010>; +def : RWSysReg<"TRCVDCTLR", 0b10, 0b001, 0b0000, 0b1000, 0b010>; +def : RWSysReg<"TRCVDSACCTLR", 0b10, 0b001, 0b0000, 0b1001, 0b010>; +def : RWSysReg<"TRCVDARCCTLR", 0b10, 0b001, 0b0000, 0b1010, 0b010>; +def : RWSysReg<"TRCSEQEVR0", 0b10, 0b001, 0b0000, 0b0000, 0b100>; +def : RWSysReg<"TRCSEQEVR1", 0b10, 0b001, 0b0000, 0b0001, 0b100>; +def : RWSysReg<"TRCSEQEVR2", 0b10, 0b001, 0b0000, 0b0010, 0b100>; +def : RWSysReg<"TRCSEQRSTEVR", 0b10, 0b001, 0b0000, 0b0110, 0b100>; +def : RWSysReg<"TRCSEQSTR", 0b10, 0b001, 0b0000, 0b0111, 0b100>; +def : RWSysReg<"TRCEXTINSELR", 0b10, 0b001, 0b0000, 0b1000, 0b100>; +def : RWSysReg<"TRCCNTRLDVR0", 0b10, 0b001, 0b0000, 0b0000, 0b101>; +def : RWSysReg<"TRCCNTRLDVR1", 0b10, 0b001, 0b0000, 0b0001, 0b101>; +def : RWSysReg<"TRCCNTRLDVR2", 0b10, 0b001, 0b0000, 0b0010, 0b101>; +def : RWSysReg<"TRCCNTRLDVR3", 0b10, 0b001, 0b0000, 0b0011, 0b101>; +def : RWSysReg<"TRCCNTCTLR0", 0b10, 0b001, 0b0000, 0b0100, 0b101>; +def : RWSysReg<"TRCCNTCTLR1", 0b10, 0b001, 0b0000, 0b0101, 0b101>; +def : RWSysReg<"TRCCNTCTLR2", 0b10, 0b001, 0b0000, 0b0110, 0b101>; +def : RWSysReg<"TRCCNTCTLR3", 0b10, 0b001, 0b0000, 0b0111, 0b101>; +def : RWSysReg<"TRCCNTVR0", 0b10, 0b001, 0b0000, 0b1000, 0b101>; +def : RWSysReg<"TRCCNTVR1", 0b10, 0b001, 0b0000, 0b1001, 0b101>; +def : RWSysReg<"TRCCNTVR2", 0b10, 0b001, 0b0000, 0b1010, 0b101>; +def : RWSysReg<"TRCCNTVR3", 0b10, 0b001, 0b0000, 0b1011, 0b101>; +def : RWSysReg<"TRCIMSPEC0", 0b10, 0b001, 0b0000, 0b0000, 0b111>; +def : RWSysReg<"TRCIMSPEC1", 0b10, 0b001, 0b0000, 0b0001, 0b111>; +def : RWSysReg<"TRCIMSPEC2", 0b10, 0b001, 0b0000, 0b0010, 0b111>; +def : RWSysReg<"TRCIMSPEC3", 0b10, 0b001, 0b0000, 0b0011, 0b111>; +def : RWSysReg<"TRCIMSPEC4", 0b10, 0b001, 0b0000, 0b0100, 0b111>; +def : RWSysReg<"TRCIMSPEC5", 0b10, 0b001, 0b0000, 0b0101, 0b111>; +def : RWSysReg<"TRCIMSPEC6", 0b10, 0b001, 0b0000, 0b0110, 0b111>; +def : RWSysReg<"TRCIMSPEC7", 0b10, 0b001, 0b0000, 0b0111, 0b111>; +def : RWSysReg<"TRCRSCTLR2", 0b10, 0b001, 0b0001, 0b0010, 0b000>; +def : RWSysReg<"TRCRSCTLR3", 0b10, 0b001, 0b0001, 0b0011, 0b000>; +def : RWSysReg<"TRCRSCTLR4", 0b10, 0b001, 0b0001, 0b0100, 0b000>; +def : RWSysReg<"TRCRSCTLR5", 0b10, 0b001, 0b0001, 0b0101, 0b000>; +def : RWSysReg<"TRCRSCTLR6", 0b10, 0b001, 0b0001, 0b0110, 0b000>; +def : RWSysReg<"TRCRSCTLR7", 0b10, 0b001, 0b0001, 0b0111, 0b000>; +def : RWSysReg<"TRCRSCTLR8", 0b10, 0b001, 0b0001, 0b1000, 0b000>; +def : RWSysReg<"TRCRSCTLR9", 0b10, 0b001, 0b0001, 0b1001, 0b000>; +def : RWSysReg<"TRCRSCTLR10", 0b10, 0b001, 0b0001, 0b1010, 0b000>; +def : RWSysReg<"TRCRSCTLR11", 0b10, 0b001, 0b0001, 0b1011, 0b000>; +def : RWSysReg<"TRCRSCTLR12", 0b10, 0b001, 0b0001, 0b1100, 0b000>; +def : RWSysReg<"TRCRSCTLR13", 0b10, 0b001, 0b0001, 0b1101, 0b000>; +def : RWSysReg<"TRCRSCTLR14", 0b10, 0b001, 0b0001, 0b1110, 0b000>; +def : RWSysReg<"TRCRSCTLR15", 0b10, 0b001, 0b0001, 0b1111, 0b000>; +def : RWSysReg<"TRCRSCTLR16", 0b10, 0b001, 0b0001, 0b0000, 0b001>; +def : RWSysReg<"TRCRSCTLR17", 0b10, 0b001, 0b0001, 0b0001, 0b001>; +def : RWSysReg<"TRCRSCTLR18", 0b10, 0b001, 0b0001, 0b0010, 0b001>; +def : RWSysReg<"TRCRSCTLR19", 0b10, 0b001, 0b0001, 0b0011, 0b001>; +def : RWSysReg<"TRCRSCTLR20", 0b10, 0b001, 0b0001, 0b0100, 0b001>; +def : RWSysReg<"TRCRSCTLR21", 0b10, 0b001, 0b0001, 0b0101, 0b001>; +def : RWSysReg<"TRCRSCTLR22", 0b10, 0b001, 0b0001, 0b0110, 0b001>; +def : RWSysReg<"TRCRSCTLR23", 0b10, 0b001, 0b0001, 0b0111, 0b001>; +def : RWSysReg<"TRCRSCTLR24", 0b10, 0b001, 0b0001, 0b1000, 0b001>; +def : RWSysReg<"TRCRSCTLR25", 0b10, 0b001, 0b0001, 0b1001, 0b001>; +def : RWSysReg<"TRCRSCTLR26", 0b10, 0b001, 0b0001, 0b1010, 0b001>; +def : RWSysReg<"TRCRSCTLR27", 0b10, 0b001, 0b0001, 0b1011, 0b001>; +def : RWSysReg<"TRCRSCTLR28", 0b10, 0b001, 0b0001, 0b1100, 0b001>; +def : RWSysReg<"TRCRSCTLR29", 0b10, 0b001, 0b0001, 0b1101, 0b001>; +def : RWSysReg<"TRCRSCTLR30", 0b10, 0b001, 0b0001, 0b1110, 0b001>; +def : RWSysReg<"TRCRSCTLR31", 0b10, 0b001, 0b0001, 0b1111, 0b001>; +def : RWSysReg<"TRCSSCCR0", 0b10, 0b001, 0b0001, 0b0000, 0b010>; +def : RWSysReg<"TRCSSCCR1", 0b10, 0b001, 0b0001, 0b0001, 0b010>; +def : RWSysReg<"TRCSSCCR2", 0b10, 0b001, 0b0001, 0b0010, 0b010>; +def : RWSysReg<"TRCSSCCR3", 0b10, 0b001, 0b0001, 0b0011, 0b010>; +def : RWSysReg<"TRCSSCCR4", 0b10, 0b001, 0b0001, 0b0100, 0b010>; +def : RWSysReg<"TRCSSCCR5", 0b10, 0b001, 0b0001, 0b0101, 0b010>; +def : RWSysReg<"TRCSSCCR6", 0b10, 0b001, 0b0001, 0b0110, 0b010>; +def : RWSysReg<"TRCSSCCR7", 0b10, 0b001, 0b0001, 0b0111, 0b010>; +def : RWSysReg<"TRCSSCSR0", 0b10, 0b001, 0b0001, 0b1000, 0b010>; +def : RWSysReg<"TRCSSCSR1", 0b10, 0b001, 0b0001, 0b1001, 0b010>; +def : RWSysReg<"TRCSSCSR2", 0b10, 0b001, 0b0001, 0b1010, 0b010>; +def : RWSysReg<"TRCSSCSR3", 0b10, 0b001, 0b0001, 0b1011, 0b010>; +def : RWSysReg<"TRCSSCSR4", 0b10, 0b001, 0b0001, 0b1100, 0b010>; +def : RWSysReg<"TRCSSCSR5", 0b10, 0b001, 0b0001, 0b1101, 0b010>; +def : RWSysReg<"TRCSSCSR6", 0b10, 0b001, 0b0001, 0b1110, 0b010>; +def : RWSysReg<"TRCSSCSR7", 0b10, 0b001, 0b0001, 0b1111, 0b010>; +def : RWSysReg<"TRCSSPCICR0", 0b10, 0b001, 0b0001, 0b0000, 0b011>; +def : RWSysReg<"TRCSSPCICR1", 0b10, 0b001, 0b0001, 0b0001, 0b011>; +def : RWSysReg<"TRCSSPCICR2", 0b10, 0b001, 0b0001, 0b0010, 0b011>; +def : RWSysReg<"TRCSSPCICR3", 0b10, 0b001, 0b0001, 0b0011, 0b011>; +def : RWSysReg<"TRCSSPCICR4", 0b10, 0b001, 0b0001, 0b0100, 0b011>; +def : RWSysReg<"TRCSSPCICR5", 0b10, 0b001, 0b0001, 0b0101, 0b011>; +def : RWSysReg<"TRCSSPCICR6", 0b10, 0b001, 0b0001, 0b0110, 0b011>; +def : RWSysReg<"TRCSSPCICR7", 0b10, 0b001, 0b0001, 0b0111, 0b011>; +def : RWSysReg<"TRCPDCR", 0b10, 0b001, 0b0001, 0b0100, 0b100>; +def : RWSysReg<"TRCACVR0", 0b10, 0b001, 0b0010, 0b0000, 0b000>; +def : RWSysReg<"TRCACVR1", 0b10, 0b001, 0b0010, 0b0010, 0b000>; +def : RWSysReg<"TRCACVR2", 0b10, 0b001, 0b0010, 0b0100, 0b000>; +def : RWSysReg<"TRCACVR3", 0b10, 0b001, 0b0010, 0b0110, 0b000>; +def : RWSysReg<"TRCACVR4", 0b10, 0b001, 0b0010, 0b1000, 0b000>; +def : RWSysReg<"TRCACVR5", 0b10, 0b001, 0b0010, 0b1010, 0b000>; +def : RWSysReg<"TRCACVR6", 0b10, 0b001, 0b0010, 0b1100, 0b000>; +def : RWSysReg<"TRCACVR7", 0b10, 0b001, 0b0010, 0b1110, 0b000>; +def : RWSysReg<"TRCACVR8", 0b10, 0b001, 0b0010, 0b0000, 0b001>; +def : RWSysReg<"TRCACVR9", 0b10, 0b001, 0b0010, 0b0010, 0b001>; +def : RWSysReg<"TRCACVR10", 0b10, 0b001, 0b0010, 0b0100, 0b001>; +def : RWSysReg<"TRCACVR11", 0b10, 0b001, 0b0010, 0b0110, 0b001>; +def : RWSysReg<"TRCACVR12", 0b10, 0b001, 0b0010, 0b1000, 0b001>; +def : RWSysReg<"TRCACVR13", 0b10, 0b001, 0b0010, 0b1010, 0b001>; +def : RWSysReg<"TRCACVR14", 0b10, 0b001, 0b0010, 0b1100, 0b001>; +def : RWSysReg<"TRCACVR15", 0b10, 0b001, 0b0010, 0b1110, 0b001>; +def : RWSysReg<"TRCACATR0", 0b10, 0b001, 0b0010, 0b0000, 0b010>; +def : RWSysReg<"TRCACATR1", 0b10, 0b001, 0b0010, 0b0010, 0b010>; +def : RWSysReg<"TRCACATR2", 0b10, 0b001, 0b0010, 0b0100, 0b010>; +def : RWSysReg<"TRCACATR3", 0b10, 0b001, 0b0010, 0b0110, 0b010>; +def : RWSysReg<"TRCACATR4", 0b10, 0b001, 0b0010, 0b1000, 0b010>; +def : RWSysReg<"TRCACATR5", 0b10, 0b001, 0b0010, 0b1010, 0b010>; +def : RWSysReg<"TRCACATR6", 0b10, 0b001, 0b0010, 0b1100, 0b010>; +def : RWSysReg<"TRCACATR7", 0b10, 0b001, 0b0010, 0b1110, 0b010>; +def : RWSysReg<"TRCACATR8", 0b10, 0b001, 0b0010, 0b0000, 0b011>; +def : RWSysReg<"TRCACATR9", 0b10, 0b001, 0b0010, 0b0010, 0b011>; +def : RWSysReg<"TRCACATR10", 0b10, 0b001, 0b0010, 0b0100, 0b011>; +def : RWSysReg<"TRCACATR11", 0b10, 0b001, 0b0010, 0b0110, 0b011>; +def : RWSysReg<"TRCACATR12", 0b10, 0b001, 0b0010, 0b1000, 0b011>; +def : RWSysReg<"TRCACATR13", 0b10, 0b001, 0b0010, 0b1010, 0b011>; +def : RWSysReg<"TRCACATR14", 0b10, 0b001, 0b0010, 0b1100, 0b011>; +def : RWSysReg<"TRCACATR15", 0b10, 0b001, 0b0010, 0b1110, 0b011>; +def : RWSysReg<"TRCDVCVR0", 0b10, 0b001, 0b0010, 0b0000, 0b100>; +def : RWSysReg<"TRCDVCVR1", 0b10, 0b001, 0b0010, 0b0100, 0b100>; +def : RWSysReg<"TRCDVCVR2", 0b10, 0b001, 0b0010, 0b1000, 0b100>; +def : RWSysReg<"TRCDVCVR3", 0b10, 0b001, 0b0010, 0b1100, 0b100>; +def : RWSysReg<"TRCDVCVR4", 0b10, 0b001, 0b0010, 0b0000, 0b101>; +def : RWSysReg<"TRCDVCVR5", 0b10, 0b001, 0b0010, 0b0100, 0b101>; +def : RWSysReg<"TRCDVCVR6", 0b10, 0b001, 0b0010, 0b1000, 0b101>; +def : RWSysReg<"TRCDVCVR7", 0b10, 0b001, 0b0010, 0b1100, 0b101>; +def : RWSysReg<"TRCDVCMR0", 0b10, 0b001, 0b0010, 0b0000, 0b110>; +def : RWSysReg<"TRCDVCMR1", 0b10, 0b001, 0b0010, 0b0100, 0b110>; +def : RWSysReg<"TRCDVCMR2", 0b10, 0b001, 0b0010, 0b1000, 0b110>; +def : RWSysReg<"TRCDVCMR3", 0b10, 0b001, 0b0010, 0b1100, 0b110>; +def : RWSysReg<"TRCDVCMR4", 0b10, 0b001, 0b0010, 0b0000, 0b111>; +def : RWSysReg<"TRCDVCMR5", 0b10, 0b001, 0b0010, 0b0100, 0b111>; +def : RWSysReg<"TRCDVCMR6", 0b10, 0b001, 0b0010, 0b1000, 0b111>; +def : RWSysReg<"TRCDVCMR7", 0b10, 0b001, 0b0010, 0b1100, 0b111>; +def : RWSysReg<"TRCCIDCVR0", 0b10, 0b001, 0b0011, 0b0000, 0b000>; +def : RWSysReg<"TRCCIDCVR1", 0b10, 0b001, 0b0011, 0b0010, 0b000>; +def : RWSysReg<"TRCCIDCVR2", 0b10, 0b001, 0b0011, 0b0100, 0b000>; +def : RWSysReg<"TRCCIDCVR3", 0b10, 0b001, 0b0011, 0b0110, 0b000>; +def : RWSysReg<"TRCCIDCVR4", 0b10, 0b001, 0b0011, 0b1000, 0b000>; +def : RWSysReg<"TRCCIDCVR5", 0b10, 0b001, 0b0011, 0b1010, 0b000>; +def : RWSysReg<"TRCCIDCVR6", 0b10, 0b001, 0b0011, 0b1100, 0b000>; +def : RWSysReg<"TRCCIDCVR7", 0b10, 0b001, 0b0011, 0b1110, 0b000>; +def : RWSysReg<"TRCVMIDCVR0", 0b10, 0b001, 0b0011, 0b0000, 0b001>; +def : RWSysReg<"TRCVMIDCVR1", 0b10, 0b001, 0b0011, 0b0010, 0b001>; +def : RWSysReg<"TRCVMIDCVR2", 0b10, 0b001, 0b0011, 0b0100, 0b001>; +def : RWSysReg<"TRCVMIDCVR3", 0b10, 0b001, 0b0011, 0b0110, 0b001>; +def : RWSysReg<"TRCVMIDCVR4", 0b10, 0b001, 0b0011, 0b1000, 0b001>; +def : RWSysReg<"TRCVMIDCVR5", 0b10, 0b001, 0b0011, 0b1010, 0b001>; +def : RWSysReg<"TRCVMIDCVR6", 0b10, 0b001, 0b0011, 0b1100, 0b001>; +def : RWSysReg<"TRCVMIDCVR7", 0b10, 0b001, 0b0011, 0b1110, 0b001>; +def : RWSysReg<"TRCCIDCCTLR0", 0b10, 0b001, 0b0011, 0b0000, 0b010>; +def : RWSysReg<"TRCCIDCCTLR1", 0b10, 0b001, 0b0011, 0b0001, 0b010>; +def : RWSysReg<"TRCVMIDCCTLR0", 0b10, 0b001, 0b0011, 0b0010, 0b010>; +def : RWSysReg<"TRCVMIDCCTLR1", 0b10, 0b001, 0b0011, 0b0011, 0b010>; +def : RWSysReg<"TRCITCTRL", 0b10, 0b001, 0b0111, 0b0000, 0b100>; +def : RWSysReg<"TRCCLAIMSET", 0b10, 0b001, 0b0111, 0b1000, 0b110>; +def : RWSysReg<"TRCCLAIMCLR", 0b10, 0b001, 0b0111, 0b1001, 0b110>; + +// GICv3 registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"ICC_BPR1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b011>; +def : RWSysReg<"ICC_BPR0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b011>; +def : RWSysReg<"ICC_PMR_EL1", 0b11, 0b000, 0b0100, 0b0110, 0b000>; +def : RWSysReg<"ICC_CTLR_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b100>; +def : RWSysReg<"ICC_CTLR_EL3", 0b11, 0b110, 0b1100, 0b1100, 0b100>; +def : RWSysReg<"ICC_SRE_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b101>; +def : RWSysReg<"ICC_SRE_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b101>; +def : RWSysReg<"ICC_SRE_EL3", 0b11, 0b110, 0b1100, 0b1100, 0b101>; +def : RWSysReg<"ICC_IGRPEN0_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b110>; +def : RWSysReg<"ICC_IGRPEN1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b111>; +def : RWSysReg<"ICC_IGRPEN1_EL3", 0b11, 0b110, 0b1100, 0b1100, 0b111>; +def : RWSysReg<"ICC_SEIEN_EL1", 0b11, 0b000, 0b1100, 0b1101, 0b000>; +def : RWSysReg<"ICC_AP0R0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b100>; +def : RWSysReg<"ICC_AP0R1_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b101>; +def : RWSysReg<"ICC_AP0R2_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b110>; +def : RWSysReg<"ICC_AP0R3_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b111>; +def : RWSysReg<"ICC_AP1R0_EL1", 0b11, 0b000, 0b1100, 0b1001, 0b000>; +def : RWSysReg<"ICC_AP1R1_EL1", 0b11, 0b000, 0b1100, 0b1001, 0b001>; +def : RWSysReg<"ICC_AP1R2_EL1", 0b11, 0b000, 0b1100, 0b1001, 0b010>; +def : RWSysReg<"ICC_AP1R3_EL1", 0b11, 0b000, 0b1100, 0b1001, 0b011>; +def : RWSysReg<"ICH_AP0R0_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b000>; +def : RWSysReg<"ICH_AP0R1_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b001>; +def : RWSysReg<"ICH_AP0R2_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b010>; +def : RWSysReg<"ICH_AP0R3_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b011>; +def : RWSysReg<"ICH_AP1R0_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b000>; +def : RWSysReg<"ICH_AP1R1_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b001>; +def : RWSysReg<"ICH_AP1R2_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b010>; +def : RWSysReg<"ICH_AP1R3_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b011>; +def : RWSysReg<"ICH_HCR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b000>; +def : RWSysReg<"ICH_MISR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b010>; +def : RWSysReg<"ICH_VMCR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b111>; +def : RWSysReg<"ICH_VSEIR_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b100>; +def : RWSysReg<"ICH_LR0_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b000>; +def : RWSysReg<"ICH_LR1_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b001>; +def : RWSysReg<"ICH_LR2_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b010>; +def : RWSysReg<"ICH_LR3_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b011>; +def : RWSysReg<"ICH_LR4_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b100>; +def : RWSysReg<"ICH_LR5_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b101>; +def : RWSysReg<"ICH_LR6_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b110>; +def : RWSysReg<"ICH_LR7_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b111>; +def : RWSysReg<"ICH_LR8_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b000>; +def : RWSysReg<"ICH_LR9_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b001>; +def : RWSysReg<"ICH_LR10_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b010>; +def : RWSysReg<"ICH_LR11_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b011>; +def : RWSysReg<"ICH_LR12_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b100>; +def : RWSysReg<"ICH_LR13_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b101>; +def : RWSysReg<"ICH_LR14_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b110>; +def : RWSysReg<"ICH_LR15_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b111>; + +// v8.1a "Privileged Access Never" extension-specific system registers +let Requires = [{ {AArch64::HasV8_1aOps} }] in +def : RWSysReg<"PAN", 0b11, 0b000, 0b0100, 0b0010, 0b011>; + +// v8.1a "Limited Ordering Regions" extension-specific system registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::HasV8_1aOps} }] in { +def : RWSysReg<"LORSA_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b000>; +def : RWSysReg<"LOREA_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b001>; +def : RWSysReg<"LORN_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b010>; +def : RWSysReg<"LORC_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b011>; +} + +// v8.1a "Virtualization hos extensions" system registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::HasV8_1aOps} }] in { +def : RWSysReg<"TTBR1_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b001>; +def : RWSysReg<"CONTEXTIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b001>; +def : RWSysReg<"CNTHV_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b000>; +def : RWSysReg<"CNTHV_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b010>; +def : RWSysReg<"CNTHV_CTL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b001>; +def : RWSysReg<"SCTLR_EL12", 0b11, 0b101, 0b0001, 0b0000, 0b000>; +def : RWSysReg<"CPACR_EL12", 0b11, 0b101, 0b0001, 0b0000, 0b010>; +def : RWSysReg<"TTBR0_EL12", 0b11, 0b101, 0b0010, 0b0000, 0b000>; +def : RWSysReg<"TTBR1_EL12", 0b11, 0b101, 0b0010, 0b0000, 0b001>; +def : RWSysReg<"TCR_EL12", 0b11, 0b101, 0b0010, 0b0000, 0b010>; +def : RWSysReg<"AFSR0_EL12", 0b11, 0b101, 0b0101, 0b0001, 0b000>; +def : RWSysReg<"AFSR1_EL12", 0b11, 0b101, 0b0101, 0b0001, 0b001>; +def : RWSysReg<"ESR_EL12", 0b11, 0b101, 0b0101, 0b0010, 0b000>; +def : RWSysReg<"FAR_EL12", 0b11, 0b101, 0b0110, 0b0000, 0b000>; +def : RWSysReg<"MAIR_EL12", 0b11, 0b101, 0b1010, 0b0010, 0b000>; +def : RWSysReg<"AMAIR_EL12", 0b11, 0b101, 0b1010, 0b0011, 0b000>; +def : RWSysReg<"VBAR_EL12", 0b11, 0b101, 0b1100, 0b0000, 0b000>; +def : RWSysReg<"CONTEXTIDR_EL12", 0b11, 0b101, 0b1101, 0b0000, 0b001>; +def : RWSysReg<"CNTKCTL_EL12", 0b11, 0b101, 0b1110, 0b0001, 0b000>; +def : RWSysReg<"CNTP_TVAL_EL02", 0b11, 0b101, 0b1110, 0b0010, 0b000>; +def : RWSysReg<"CNTP_CTL_EL02", 0b11, 0b101, 0b1110, 0b0010, 0b001>; +def : RWSysReg<"CNTP_CVAL_EL02", 0b11, 0b101, 0b1110, 0b0010, 0b010>; +def : RWSysReg<"CNTV_TVAL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b000>; +def : RWSysReg<"CNTV_CTL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b001>; +def : RWSysReg<"CNTV_CVAL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b010>; +def : RWSysReg<"SPSR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b000>; +def : RWSysReg<"ELR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b001>; +} +// v8.2a registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::HasV8_2aOps} }] in +def : RWSysReg<"UAO", 0b11, 0b000, 0b0100, 0b0010, 0b100>; + +// v8.2a "Statistical Profiling extension" registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::FeatureSPE} }] in { +def : RWSysReg<"PMBLIMITR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b000>; +def : RWSysReg<"PMBPTR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b001>; +def : RWSysReg<"PMBSR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b011>; +def : RWSysReg<"PMBIDR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b111>; +def : RWSysReg<"PMSCR_EL2", 0b11, 0b100, 0b1001, 0b1001, 0b000>; +def : RWSysReg<"PMSCR_EL12", 0b11, 0b101, 0b1001, 0b1001, 0b000>; +def : RWSysReg<"PMSCR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b000>; +def : RWSysReg<"PMSICR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b010>; +def : RWSysReg<"PMSIRR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b011>; +def : RWSysReg<"PMSFCR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b100>; +def : RWSysReg<"PMSEVFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b101>; +def : RWSysReg<"PMSLATFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b110>; +def : RWSysReg<"PMSIDR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b111>; +} + +// v8.2a "RAS extension" registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::FeatureRAS} }] in { +def : RWSysReg<"ERRSELR_EL1", 0b11, 0b000, 0b0101, 0b0011, 0b001>; +def : RWSysReg<"ERXCTLR_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b001>; +def : RWSysReg<"ERXSTATUS_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b010>; +def : RWSysReg<"ERXADDR_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b011>; +def : RWSysReg<"ERXMISC0_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b000>; +def : RWSysReg<"ERXMISC1_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b001>; +def : RWSysReg<"DISR_EL1", 0b11, 0b000, 0b1100, 0b0001, 0b001>; +def : RWSysReg<"VDISR_EL2", 0b11, 0b100, 0b1100, 0b0001, 0b001>; +def : RWSysReg<"VSESR_EL2", 0b11, 0b100, 0b0101, 0b0010, 0b011>; +} + +// v8.3a "Pointer authentication extension" registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::HasV8_3aOps} }] in { +def : RWSysReg<"APIAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b000>; +def : RWSysReg<"APIAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b001>; +def : RWSysReg<"APIBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b010>; +def : RWSysReg<"APIBKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b011>; +def : RWSysReg<"APDAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b000>; +def : RWSysReg<"APDAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b001>; +def : RWSysReg<"APDBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b010>; +def : RWSysReg<"APDBKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b011>; +def : RWSysReg<"APGAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b000>; +def : RWSysReg<"APGAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b001>; +} + +let Requires = [{ {AArch64::HasV8_4aOps} }] in { + +// v8.4a "Virtualization secure second stage translation" registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"VSTCR_EL2" , 0b11, 0b100, 0b0010, 0b0110, 0b010>; +def : RWSysReg<"VSTTBR_EL2", 0b11, 0b100, 0b0010, 0b0110, 0b000>; + +// v8.4a "Virtualization timer" registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"CNTHVS_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0100, 0b000>; +def : RWSysReg<"CNTHVS_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0100, 0b010>; +def : RWSysReg<"CNTHVS_CTL_EL2", 0b11, 0b100, 0b1110, 0b0100, 0b001>; +def : RWSysReg<"CNTHPS_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b000>; +def : RWSysReg<"CNTHPS_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b010>; +def : RWSysReg<"CNTHPS_CTL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b001>; + +// v8.4a "Virtualization debug state" registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"SDER32_EL2", 0b11, 0b100, 0b0001, 0b0011, 0b001>; + +// v8.4a RAS registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"ERXPFGCTL_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b101>; +def : RWSysReg<"ERXPFGCDN_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b110>; +def : RWSysReg<"ERXTS_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b111>; +def : RWSysReg<"ERXMISC2_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b010>; +def : RWSysReg<"ERXMISC3_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b011>; +def : ROSysReg<"ERXPFGF_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b100>; + +// v8.4a MPAM registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"MPAM0_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b001>; +def : RWSysReg<"MPAM1_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b000>; +def : RWSysReg<"MPAM2_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b000>; +def : RWSysReg<"MPAM3_EL3", 0b11, 0b110, 0b1010, 0b0101, 0b000>; +def : RWSysReg<"MPAM1_EL12", 0b11, 0b101, 0b1010, 0b0101, 0b000>; +def : RWSysReg<"MPAMHCR_EL2", 0b11, 0b100, 0b1010, 0b0100, 0b000>; +def : RWSysReg<"MPAMVPMV_EL2", 0b11, 0b100, 0b1010, 0b0100, 0b001>; +def : RWSysReg<"MPAMVPM0_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b000>; +def : RWSysReg<"MPAMVPM1_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b001>; +def : RWSysReg<"MPAMVPM2_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b010>; +def : RWSysReg<"MPAMVPM3_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b011>; +def : RWSysReg<"MPAMVPM4_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b100>; +def : RWSysReg<"MPAMVPM5_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b101>; +def : RWSysReg<"MPAMVPM6_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b110>; +def : RWSysReg<"MPAMVPM7_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b111>; +def : ROSysReg<"MPAMIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b100>; + +// v8.4a Activitiy monitor registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"AMCR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b000>; +def : ROSysReg<"AMCFGR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b001>; +def : ROSysReg<"AMCGCR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b010>; +def : RWSysReg<"AMUSERENR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b011>; +def : RWSysReg<"AMCNTENCLR0_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b100>; +def : RWSysReg<"AMCNTENSET0_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b101>; +def : RWSysReg<"AMEVCNTR00_EL0", 0b11, 0b011, 0b1101, 0b0100, 0b000>; +def : RWSysReg<"AMEVCNTR01_EL0", 0b11, 0b011, 0b1101, 0b0100, 0b001>; +def : RWSysReg<"AMEVCNTR02_EL0", 0b11, 0b011, 0b1101, 0b0100, 0b010>; +def : RWSysReg<"AMEVCNTR03_EL0", 0b11, 0b011, 0b1101, 0b0100, 0b011>; +def : ROSysReg<"AMEVTYPER00_EL0", 0b11, 0b011, 0b1101, 0b0110, 0b000>; +def : ROSysReg<"AMEVTYPER01_EL0", 0b11, 0b011, 0b1101, 0b0110, 0b001>; +def : ROSysReg<"AMEVTYPER02_EL0", 0b11, 0b011, 0b1101, 0b0110, 0b010>; +def : ROSysReg<"AMEVTYPER03_EL0", 0b11, 0b011, 0b1101, 0b0110, 0b011>; +def : RWSysReg<"AMCNTENCLR1_EL0", 0b11, 0b011, 0b1101, 0b0011, 0b000>; +def : RWSysReg<"AMCNTENSET1_EL0", 0b11, 0b011, 0b1101, 0b0011, 0b001>; +def : RWSysReg<"AMEVCNTR10_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b000>; +def : RWSysReg<"AMEVCNTR11_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b001>; +def : RWSysReg<"AMEVCNTR12_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b010>; +def : RWSysReg<"AMEVCNTR13_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b011>; +def : RWSysReg<"AMEVCNTR14_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b100>; +def : RWSysReg<"AMEVCNTR15_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b101>; +def : RWSysReg<"AMEVCNTR16_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b110>; +def : RWSysReg<"AMEVCNTR17_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b111>; +def : RWSysReg<"AMEVCNTR18_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b000>; +def : RWSysReg<"AMEVCNTR19_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b001>; +def : RWSysReg<"AMEVCNTR110_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b010>; +def : RWSysReg<"AMEVCNTR111_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b011>; +def : RWSysReg<"AMEVCNTR112_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b100>; +def : RWSysReg<"AMEVCNTR113_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b101>; +def : RWSysReg<"AMEVCNTR114_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b110>; +def : RWSysReg<"AMEVCNTR115_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b111>; +def : RWSysReg<"AMEVTYPER10_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b000>; +def : RWSysReg<"AMEVTYPER11_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b001>; +def : RWSysReg<"AMEVTYPER12_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b010>; +def : RWSysReg<"AMEVTYPER13_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b011>; +def : RWSysReg<"AMEVTYPER14_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b100>; +def : RWSysReg<"AMEVTYPER15_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b101>; +def : RWSysReg<"AMEVTYPER16_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b110>; +def : RWSysReg<"AMEVTYPER17_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b111>; +def : RWSysReg<"AMEVTYPER18_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b000>; +def : RWSysReg<"AMEVTYPER19_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b001>; +def : RWSysReg<"AMEVTYPER110_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b010>; +def : RWSysReg<"AMEVTYPER111_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b011>; +def : RWSysReg<"AMEVTYPER112_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b100>; +def : RWSysReg<"AMEVTYPER113_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b101>; +def : RWSysReg<"AMEVTYPER114_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b110>; +def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>; + +// v8.4a Trace Extension registers +// +// Please note that the 8.4 spec also defines these registers: +// TRCIDR1, ID_DFR0_EL1, ID_AA64DFR0_EL1, MDSCR_EL1, MDCR_EL2, and MDCR_EL3, +// but they are already defined above. +// +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"TRFCR_EL1", 0b11, 0b000, 0b0001, 0b0010, 0b001>; +def : RWSysReg<"TRFCR_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b001>; +def : RWSysReg<"TRFCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b001>; + +// v8.4a Timining insensitivity of data processing instructions +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"DIT", 0b11, 0b011, 0b0100, 0b0010, 0b101>; + +// v8.4a Enhanced Support for Nested Virtualization +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"VNCR_EL2", 0b11, 0b100, 0b0010, 0b0010, 0b000>; + +} // HasV8_4aOps + +// SVE control registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::FeatureSVE} }] in { +def : RWSysReg<"ZCR_EL1", 0b11, 0b000, 0b0001, 0b0010, 0b000>; +def : RWSysReg<"ZCR_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b000>; +def : RWSysReg<"ZCR_EL3", 0b11, 0b110, 0b0001, 0b0010, 0b000>; +def : RWSysReg<"ZCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b000>; +} + +// Cyclone specific system registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::ProcCyclone} }] in +def : RWSysReg<"CPM_IOACC_CTL_EL3", 0b11, 0b111, 0b1111, 0b0010, 0b000>; diff --git a/capstone/suite/synctools/tablegen/AArch64/SVEInstrFormats.td b/capstone/suite/synctools/tablegen/AArch64/SVEInstrFormats.td new file mode 100644 index 000000000..7a8dd8bc5 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/SVEInstrFormats.td @@ -0,0 +1,4456 @@ +//=-- SVEInstrFormats.td - AArch64 SVE Instruction classes -*- tablegen -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AArch64 Scalable Vector Extension (SVE) Instruction Class Definitions. +// +//===----------------------------------------------------------------------===// + +def SVEPatternOperand : AsmOperandClass { + let Name = "SVEPattern"; + let ParserMethod = "tryParseSVEPattern"; + let PredicateMethod = "isSVEPattern"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "InvalidSVEPattern"; +} + +def sve_pred_enum : Operand, ImmLeaf { + + let PrintMethod = "printSVEPattern"; + let ParserMatchClass = SVEPatternOperand; +} + +def SVEPrefetchOperand : AsmOperandClass { + let Name = "SVEPrefetch"; + let ParserMethod = "tryParsePrefetch"; + let PredicateMethod = "isPrefetch"; + let RenderMethod = "addPrefetchOperands"; +} + +def sve_prfop : Operand, ImmLeaf { + let PrintMethod = "printPrefetchOp"; + let ParserMatchClass = SVEPrefetchOperand; +} + +class SVELogicalImmOperand : AsmOperandClass { + let Name = "SVELogicalImm" # Width; + let DiagnosticType = "LogicalSecondSource"; + let PredicateMethod = "isLogicalImm"; + let RenderMethod = "addLogicalImmOperands"; +} + +def sve_logical_imm8 : Operand { + let ParserMatchClass = SVELogicalImmOperand<8>; + let PrintMethod = "printLogicalImm"; + + let MCOperandPredicate = [{ + if (!MCOp.isImm()) + return false; + int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); + return AArch64_AM::isSVEMaskOfIdenticalElements(Val); + }]; +} + +def sve_logical_imm16 : Operand { + let ParserMatchClass = SVELogicalImmOperand<16>; + let PrintMethod = "printLogicalImm"; + + let MCOperandPredicate = [{ + if (!MCOp.isImm()) + return false; + int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); + return AArch64_AM::isSVEMaskOfIdenticalElements(Val); + }]; +} + +def sve_logical_imm32 : Operand { + let ParserMatchClass = SVELogicalImmOperand<32>; + let PrintMethod = "printLogicalImm"; + + let MCOperandPredicate = [{ + if (!MCOp.isImm()) + return false; + int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); + return AArch64_AM::isSVEMaskOfIdenticalElements(Val); + }]; +} + +class SVEPreferredLogicalImmOperand : AsmOperandClass { + let Name = "SVEPreferredLogicalImm" # Width; + let PredicateMethod = "isSVEPreferredLogicalImm"; + let RenderMethod = "addLogicalImmOperands"; +} + +def sve_preferred_logical_imm16 : Operand { + let ParserMatchClass = SVEPreferredLogicalImmOperand<16>; + let PrintMethod = "printSVELogicalImm"; + + let MCOperandPredicate = [{ + if (!MCOp.isImm()) + return false; + int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); + return AArch64_AM::isSVEMaskOfIdenticalElements(Val) && + AArch64_AM::isSVEMoveMaskPreferredLogicalImmediate(Val); + }]; +} + +def sve_preferred_logical_imm32 : Operand { + let ParserMatchClass = SVEPreferredLogicalImmOperand<32>; + let PrintMethod = "printSVELogicalImm"; + + let MCOperandPredicate = [{ + if (!MCOp.isImm()) + return false; + int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); + return AArch64_AM::isSVEMaskOfIdenticalElements(Val) && + AArch64_AM::isSVEMoveMaskPreferredLogicalImmediate(Val); + }]; +} + +def sve_preferred_logical_imm64 : Operand { + let ParserMatchClass = SVEPreferredLogicalImmOperand<64>; + let PrintMethod = "printSVELogicalImm"; + + let MCOperandPredicate = [{ + if (!MCOp.isImm()) + return false; + int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); + return AArch64_AM::isSVEMaskOfIdenticalElements(Val) && + AArch64_AM::isSVEMoveMaskPreferredLogicalImmediate(Val); + }]; +} + +class SVELogicalImmNotOperand : AsmOperandClass { + let Name = "SVELogicalImm" # Width # "Not"; + let DiagnosticType = "LogicalSecondSource"; + let PredicateMethod = "isLogicalImm"; + let RenderMethod = "addLogicalImmNotOperands"; +} + +def sve_logical_imm8_not : Operand { + let ParserMatchClass = SVELogicalImmNotOperand<8>; +} + +def sve_logical_imm16_not : Operand { + let ParserMatchClass = SVELogicalImmNotOperand<16>; +} + +def sve_logical_imm32_not : Operand { + let ParserMatchClass = SVELogicalImmNotOperand<32>; +} + +class SVEShiftedImmOperand + : AsmOperandClass { + let Name = "SVE" # Infix # "Imm" # ElementWidth; + let DiagnosticType = "Invalid" # Name; + let RenderMethod = "addImmWithOptionalShiftOperands<8>"; + let ParserMethod = "tryParseImmWithOptionalShift"; + let PredicateMethod = Predicate; +} + +def SVECpyImmOperand8 : SVEShiftedImmOperand<8, "Cpy", "isSVECpyImm">; +def SVECpyImmOperand16 : SVEShiftedImmOperand<16, "Cpy", "isSVECpyImm">; +def SVECpyImmOperand32 : SVEShiftedImmOperand<32, "Cpy", "isSVECpyImm">; +def SVECpyImmOperand64 : SVEShiftedImmOperand<64, "Cpy", "isSVECpyImm">; + +def SVEAddSubImmOperand8 : SVEShiftedImmOperand<8, "AddSub", "isSVEAddSubImm">; +def SVEAddSubImmOperand16 : SVEShiftedImmOperand<16, "AddSub", "isSVEAddSubImm">; +def SVEAddSubImmOperand32 : SVEShiftedImmOperand<32, "AddSub", "isSVEAddSubImm">; +def SVEAddSubImmOperand64 : SVEShiftedImmOperand<64, "AddSub", "isSVEAddSubImm">; + +class imm8_opt_lsl + : Operand, ImmLeaf { + let EncoderMethod = "getImm8OptLsl"; + let DecoderMethod = "DecodeImm8OptLsl<" # ElementWidth # ">"; + let PrintMethod = "printImm8OptLsl<" # printType # ">"; + let ParserMatchClass = OpndClass; + let MIOperandInfo = (ops i32imm, i32imm); +} + +def cpy_imm8_opt_lsl_i8 : imm8_opt_lsl<8, "int8_t", SVECpyImmOperand8, [{ + return AArch64_AM::isSVECpyImm(Imm); +}]>; +def cpy_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "int16_t", SVECpyImmOperand16, [{ + return AArch64_AM::isSVECpyImm(Imm); +}]>; +def cpy_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "int32_t", SVECpyImmOperand32, [{ + return AArch64_AM::isSVECpyImm(Imm); +}]>; +def cpy_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "int64_t", SVECpyImmOperand64, [{ + return AArch64_AM::isSVECpyImm(Imm); +}]>; + +def addsub_imm8_opt_lsl_i8 : imm8_opt_lsl<8, "uint8_t", SVEAddSubImmOperand8, [{ + return AArch64_AM::isSVEAddSubImm(Imm); +}]>; +def addsub_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "uint16_t", SVEAddSubImmOperand16, [{ + return AArch64_AM::isSVEAddSubImm(Imm); +}]>; +def addsub_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "uint32_t", SVEAddSubImmOperand32, [{ + return AArch64_AM::isSVEAddSubImm(Imm); +}]>; +def addsub_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "uint64_t", SVEAddSubImmOperand64, [{ + return AArch64_AM::isSVEAddSubImm(Imm); +}]>; + +class SVEExactFPImm : AsmOperandClass { + let Name = "SVEExactFPImmOperand" # Suffix; + let DiagnosticType = "Invalid" # Name; + let ParserMethod = "tryParseFPImm"; + let PredicateMethod = "isExactFPImm<" # ValA # ", " # ValB # ">"; + let RenderMethod = "addExactFPImmOperands<" # ValA # ", " # ValB # ">"; +} + +class SVEExactFPImmOperand : Operand { + let PrintMethod = "printExactFPImm<" # ValA # ", " # ValB # ">"; + let ParserMatchClass = SVEExactFPImm; +} + +def sve_fpimm_half_one + : SVEExactFPImmOperand<"HalfOne", "AArch64ExactFPImm::half", + "AArch64ExactFPImm::one">; +def sve_fpimm_half_two + : SVEExactFPImmOperand<"HalfTwo", "AArch64ExactFPImm::half", + "AArch64ExactFPImm::two">; +def sve_fpimm_zero_one + : SVEExactFPImmOperand<"ZeroOne", "AArch64ExactFPImm::zero", + "AArch64ExactFPImm::one">; + +def sve_incdec_imm : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 17); +}]> { + let ParserMatchClass = Imm1_16Operand; + let EncoderMethod = "getSVEIncDecImm"; + let DecoderMethod = "DecodeSVEIncDecImm"; +} + +//===----------------------------------------------------------------------===// +// SVE PTrue - These are used extensively throughout the pattern matching so +// it's important we define them first. +//===----------------------------------------------------------------------===// + +class sve_int_ptrue sz8_64, bits<3> opc, string asm, PPRRegOp pprty> +: I<(outs pprty:$Pd), (ins sve_pred_enum:$pattern), + asm, "\t$Pd, $pattern", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<5> pattern; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-19} = 0b011; + let Inst{18-17} = opc{2-1}; + let Inst{16} = opc{0}; + let Inst{15-10} = 0b111000; + let Inst{9-5} = pattern; + let Inst{4} = 0b0; + let Inst{3-0} = Pd; + + let Defs = !if(!eq (opc{0}, 1), [NZCV], []); +} + +multiclass sve_int_ptrue opc, string asm> { + def _B : sve_int_ptrue<0b00, opc, asm, PPR8>; + def _H : sve_int_ptrue<0b01, opc, asm, PPR16>; + def _S : sve_int_ptrue<0b10, opc, asm, PPR32>; + def _D : sve_int_ptrue<0b11, opc, asm, PPR64>; + + def : InstAlias(NAME # _B) PPR8:$Pd, 0b11111), 1>; + def : InstAlias(NAME # _H) PPR16:$Pd, 0b11111), 1>; + def : InstAlias(NAME # _S) PPR32:$Pd, 0b11111), 1>; + def : InstAlias(NAME # _D) PPR64:$Pd, 0b11111), 1>; +} + +let Predicates = [HasSVE] in { + defm PTRUE : sve_int_ptrue<0b000, "ptrue">; + defm PTRUES : sve_int_ptrue<0b001, "ptrues">; +} + + +//===----------------------------------------------------------------------===// +// SVE Predicate Misc Group +//===----------------------------------------------------------------------===// + +class sve_int_pfalse opc, string asm> +: I<(outs PPR8:$Pd), (ins), + asm, "\t$Pd", + "", + []>, Sched<[]> { + bits<4> Pd; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = opc{5-4}; + let Inst{21-19} = 0b011; + let Inst{18-16} = opc{3-1}; + let Inst{15-10} = 0b111001; + let Inst{9} = opc{0}; + let Inst{8-4} = 0b00000; + let Inst{3-0} = Pd; +} + +class sve_int_ptest opc, string asm> +: I<(outs), (ins PPRAny:$Pg, PPR8:$Pn), + asm, "\t$Pg, $Pn", + "", + []>, Sched<[]> { + bits<4> Pg; + bits<4> Pn; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = opc{5-4}; + let Inst{21-19} = 0b010; + let Inst{18-16} = opc{3-1}; + let Inst{15-14} = 0b11; + let Inst{13-10} = Pg; + let Inst{9} = opc{0}; + let Inst{8-5} = Pn; + let Inst{4-0} = 0b00000; + + let Defs = [NZCV]; +} + +class sve_int_pfirst_next sz8_64, bits<5> opc, string asm, + PPRRegOp pprty> +: I<(outs pprty:$Pdn), (ins PPRAny:$Pg, pprty:$_Pdn), + asm, "\t$Pdn, $Pg, $_Pdn", + "", + []>, Sched<[]> { + bits<4> Pdn; + bits<4> Pg; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-19} = 0b011; + let Inst{18-16} = opc{4-2}; + let Inst{15-11} = 0b11000; + let Inst{10-9} = opc{1-0}; + let Inst{8-5} = Pg; + let Inst{4} = 0; + let Inst{3-0} = Pdn; + + let Constraints = "$Pdn = $_Pdn"; + let Defs = [NZCV]; +} + +multiclass sve_int_pfirst opc, string asm> { + def : sve_int_pfirst_next<0b01, opc, asm, PPR8>; +} + +multiclass sve_int_pnext opc, string asm> { + def _B : sve_int_pfirst_next<0b00, opc, asm, PPR8>; + def _H : sve_int_pfirst_next<0b01, opc, asm, PPR16>; + def _S : sve_int_pfirst_next<0b10, opc, asm, PPR32>; + def _D : sve_int_pfirst_next<0b11, opc, asm, PPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Predicate Count Group +//===----------------------------------------------------------------------===// + +class sve_int_count_r sz8_64, bits<5> opc, string asm, + RegisterOperand dty, PPRRegOp pprty, RegisterOperand sty> +: I<(outs dty:$Rdn), (ins pprty:$Pg, sty:$_Rdn), + asm, "\t$Rdn, $Pg", + "", + []>, Sched<[]> { + bits<5> Rdn; + bits<4> Pg; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-19} = 0b101; + let Inst{18-16} = opc{4-2}; + let Inst{15-11} = 0b10001; + let Inst{10-9} = opc{1-0}; + let Inst{8-5} = Pg; + let Inst{4-0} = Rdn; + + // Signed 32bit forms require their GPR operand printed. + let AsmString = !if(!eq(opc{4,2-0}, 0b0000), + !strconcat(asm, "\t$Rdn, $Pg, $_Rdn"), + !strconcat(asm, "\t$Rdn, $Pg")); + let Constraints = "$Rdn = $_Rdn"; +} + +multiclass sve_int_count_r_s32 opc, string asm> { + def _B : sve_int_count_r<0b00, opc, asm, GPR64z, PPR8, GPR64as32>; + def _H : sve_int_count_r<0b01, opc, asm, GPR64z, PPR16, GPR64as32>; + def _S : sve_int_count_r<0b10, opc, asm, GPR64z, PPR32, GPR64as32>; + def _D : sve_int_count_r<0b11, opc, asm, GPR64z, PPR64, GPR64as32>; +} + +multiclass sve_int_count_r_u32 opc, string asm> { + def _B : sve_int_count_r<0b00, opc, asm, GPR32z, PPR8, GPR32z>; + def _H : sve_int_count_r<0b01, opc, asm, GPR32z, PPR16, GPR32z>; + def _S : sve_int_count_r<0b10, opc, asm, GPR32z, PPR32, GPR32z>; + def _D : sve_int_count_r<0b11, opc, asm, GPR32z, PPR64, GPR32z>; +} + +multiclass sve_int_count_r_x64 opc, string asm> { + def _B : sve_int_count_r<0b00, opc, asm, GPR64z, PPR8, GPR64z>; + def _H : sve_int_count_r<0b01, opc, asm, GPR64z, PPR16, GPR64z>; + def _S : sve_int_count_r<0b10, opc, asm, GPR64z, PPR32, GPR64z>; + def _D : sve_int_count_r<0b11, opc, asm, GPR64z, PPR64, GPR64z>; +} + +class sve_int_count_v sz8_64, bits<5> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, PPRAny:$Pg), + asm, "\t$Zdn, $Pg", + "", + []>, Sched<[]> { + bits<4> Pg; + bits<5> Zdn; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-19} = 0b101; + let Inst{18-16} = opc{4-2}; + let Inst{15-11} = 0b10000; + let Inst{10-9} = opc{1-0}; + let Inst{8-5} = Pg; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_count_v opc, string asm> { + def _H : sve_int_count_v<0b01, opc, asm, ZPR16>; + def _S : sve_int_count_v<0b10, opc, asm, ZPR32>; + def _D : sve_int_count_v<0b11, opc, asm, ZPR64>; +} + +class sve_int_pcount_pred sz8_64, bits<4> opc, string asm, + PPRRegOp pprty> +: I<(outs GPR64:$Rd), (ins PPRAny:$Pg, pprty:$Pn), + asm, "\t$Rd, $Pg, $Pn", + "", + []>, Sched<[]> { + bits<4> Pg; + bits<4> Pn; + bits<5> Rd; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-19} = 0b100; + let Inst{18-16} = opc{3-1}; + let Inst{15-14} = 0b10; + let Inst{13-10} = Pg; + let Inst{9} = opc{0}; + let Inst{8-5} = Pn; + let Inst{4-0} = Rd; +} + +multiclass sve_int_pcount_pred opc, string asm> { + def _B : sve_int_pcount_pred<0b00, opc, asm, PPR8>; + def _H : sve_int_pcount_pred<0b01, opc, asm, PPR16>; + def _S : sve_int_pcount_pred<0b10, opc, asm, PPR32>; + def _D : sve_int_pcount_pred<0b11, opc, asm, PPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Element Count Group +//===----------------------------------------------------------------------===// + +class sve_int_count opc, string asm> +: I<(outs GPR64:$Rd), (ins sve_pred_enum:$pattern, sve_incdec_imm:$imm4), + asm, "\t$Rd, $pattern, mul $imm4", + "", + []>, Sched<[]> { + bits<5> Rd; + bits<4> imm4; + bits<5> pattern; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = opc{2-1}; + let Inst{21-20} = 0b10; + let Inst{19-16} = imm4; + let Inst{15-11} = 0b11100; + let Inst{10} = opc{0}; + let Inst{9-5} = pattern; + let Inst{4-0} = Rd; +} + +multiclass sve_int_count opc, string asm> { + def NAME : sve_int_count; + + def : InstAlias(NAME) GPR64:$Rd, sve_pred_enum:$pattern, 1), 1>; + def : InstAlias(NAME) GPR64:$Rd, 0b11111, 1), 2>; +} + +class sve_int_countvlv opc, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4), + asm, "\t$Zdn, $pattern, mul $imm4", + "", + []>, Sched<[]> { + bits<5> Zdn; + bits<5> pattern; + bits<4> imm4; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = opc{4-3}; + let Inst{21} = 0b1; + let Inst{20} = opc{2}; + let Inst{19-16} = imm4; + let Inst{15-12} = 0b1100; + let Inst{11-10} = opc{1-0}; + let Inst{9-5} = pattern; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_countvlv opc, string asm, ZPRRegOp zprty> { + def NAME : sve_int_countvlv; + + def : InstAlias(NAME) zprty:$Zdn, sve_pred_enum:$pattern, 1), 1>; + def : InstAlias(NAME) zprty:$Zdn, 0b11111, 1), 2>; +} + +class sve_int_pred_pattern_a opc, string asm> +: I<(outs GPR64:$Rdn), (ins GPR64:$_Rdn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4), + asm, "\t$Rdn, $pattern, mul $imm4", + "", + []>, Sched<[]> { + bits<5> Rdn; + bits<5> pattern; + bits<4> imm4; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = opc{2-1}; + let Inst{21-20} = 0b11; + let Inst{19-16} = imm4; + let Inst{15-11} = 0b11100; + let Inst{10} = opc{0}; + let Inst{9-5} = pattern; + let Inst{4-0} = Rdn; + + let Constraints = "$Rdn = $_Rdn"; +} + +multiclass sve_int_pred_pattern_a opc, string asm> { + def NAME : sve_int_pred_pattern_a; + + def : InstAlias(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1), 1>; + def : InstAlias(NAME) GPR64:$Rdn, 0b11111, 1), 2>; +} + +class sve_int_pred_pattern_b opc, string asm, RegisterOperand dt, + RegisterOperand st> +: I<(outs dt:$Rdn), (ins st:$_Rdn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4), + asm, "\t$Rdn, $pattern, mul $imm4", + "", + []>, Sched<[]> { + bits<5> Rdn; + bits<5> pattern; + bits<4> imm4; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = opc{4-3}; + let Inst{21} = 0b1; + let Inst{20} = opc{2}; + let Inst{19-16} = imm4; + let Inst{15-12} = 0b1111; + let Inst{11-10} = opc{1-0}; + let Inst{9-5} = pattern; + let Inst{4-0} = Rdn; + + // Signed 32bit forms require their GPR operand printed. + let AsmString = !if(!eq(opc{2,0}, 0b00), + !strconcat(asm, "\t$Rdn, $_Rdn, $pattern, mul $imm4"), + !strconcat(asm, "\t$Rdn, $pattern, mul $imm4")); + + let Constraints = "$Rdn = $_Rdn"; +} + +multiclass sve_int_pred_pattern_b_s32 opc, string asm> { + def NAME : sve_int_pred_pattern_b; + + def : InstAlias(NAME) GPR64z:$Rd, GPR64as32:$Rn, sve_pred_enum:$pattern, 1), 1>; + def : InstAlias(NAME) GPR64z:$Rd, GPR64as32:$Rn, 0b11111, 1), 2>; +} + +multiclass sve_int_pred_pattern_b_u32 opc, string asm> { + def NAME : sve_int_pred_pattern_b; + + def : InstAlias(NAME) GPR32z:$Rdn, sve_pred_enum:$pattern, 1), 1>; + def : InstAlias(NAME) GPR32z:$Rdn, 0b11111, 1), 2>; +} + +multiclass sve_int_pred_pattern_b_x64 opc, string asm> { + def NAME : sve_int_pred_pattern_b; + + def : InstAlias(NAME) GPR64z:$Rdn, sve_pred_enum:$pattern, 1), 1>; + def : InstAlias(NAME) GPR64z:$Rdn, 0b11111, 1), 2>; +} + + +//===----------------------------------------------------------------------===// +// SVE Permute - Cross Lane Group +//===----------------------------------------------------------------------===// + +class sve_int_perm_dup_r sz8_64, string asm, ZPRRegOp zprty, + RegisterClass srcRegType> +: I<(outs zprty:$Zd), (ins srcRegType:$Rn), + asm, "\t$Zd, $Rn", + "", + []>, Sched<[]> { + bits<5> Rn; + bits<5> Zd; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-10} = 0b100000001110; + let Inst{9-5} = Rn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_perm_dup_r { + def _B : sve_int_perm_dup_r<0b00, asm, ZPR8, GPR32sp>; + def _H : sve_int_perm_dup_r<0b01, asm, ZPR16, GPR32sp>; + def _S : sve_int_perm_dup_r<0b10, asm, ZPR32, GPR32sp>; + def _D : sve_int_perm_dup_r<0b11, asm, ZPR64, GPR64sp>; + + def : InstAlias<"mov $Zd, $Rn", + (!cast(NAME # _B) ZPR8:$Zd, GPR32sp:$Rn), 1>; + def : InstAlias<"mov $Zd, $Rn", + (!cast(NAME # _H) ZPR16:$Zd, GPR32sp:$Rn), 1>; + def : InstAlias<"mov $Zd, $Rn", + (!cast(NAME # _S) ZPR32:$Zd, GPR32sp:$Rn), 1>; + def : InstAlias<"mov $Zd, $Rn", + (!cast(NAME # _D) ZPR64:$Zd, GPR64sp:$Rn), 1>; +} + +class sve_int_perm_dup_i tsz, Operand immtype, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$idx), + asm, "\t$Zd, $Zn$idx", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<7> idx; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = {?,?}; // imm3h + let Inst{21} = 0b1; + let Inst{20-16} = tsz; + let Inst{15-10} = 0b001000; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_perm_dup_i { + def _B : sve_int_perm_dup_i<{?,?,?,?,1}, sve_elm_idx_extdup_b, asm, ZPR8> { + let Inst{23-22} = idx{5-4}; + let Inst{20-17} = idx{3-0}; + } + def _H : sve_int_perm_dup_i<{?,?,?,1,0}, sve_elm_idx_extdup_h, asm, ZPR16> { + let Inst{23-22} = idx{4-3}; + let Inst{20-18} = idx{2-0}; + } + def _S : sve_int_perm_dup_i<{?,?,1,0,0}, sve_elm_idx_extdup_s, asm, ZPR32> { + let Inst{23-22} = idx{3-2}; + let Inst{20-19} = idx{1-0}; + } + def _D : sve_int_perm_dup_i<{?,1,0,0,0}, sve_elm_idx_extdup_d, asm, ZPR64> { + let Inst{23-22} = idx{2-1}; + let Inst{20} = idx{0}; + } + def _Q : sve_int_perm_dup_i<{1,0,0,0,0}, sve_elm_idx_extdup_q, asm, ZPR128> { + let Inst{23-22} = idx{1-0}; + } + + def : InstAlias<"mov $Zd, $Zn$idx", + (!cast(NAME # _B) ZPR8:$Zd, ZPR8:$Zn, sve_elm_idx_extdup_b:$idx), 1>; + def : InstAlias<"mov $Zd, $Zn$idx", + (!cast(NAME # _H) ZPR16:$Zd, ZPR16:$Zn, sve_elm_idx_extdup_h:$idx), 1>; + def : InstAlias<"mov $Zd, $Zn$idx", + (!cast(NAME # _S) ZPR32:$Zd, ZPR32:$Zn, sve_elm_idx_extdup_s:$idx), 1>; + def : InstAlias<"mov $Zd, $Zn$idx", + (!cast(NAME # _D) ZPR64:$Zd, ZPR64:$Zn, sve_elm_idx_extdup_d:$idx), 1>; + def : InstAlias<"mov $Zd, $Zn$idx", + (!cast(NAME # _Q) ZPR128:$Zd, ZPR128:$Zn, sve_elm_idx_extdup_q:$idx), 1>; + def : InstAlias<"mov $Zd, $Bn", + (!cast(NAME # _B) ZPR8:$Zd, FPR8asZPR:$Bn, 0), 2>; + def : InstAlias<"mov $Zd, $Hn", + (!cast(NAME # _H) ZPR16:$Zd, FPR16asZPR:$Hn, 0), 2>; + def : InstAlias<"mov $Zd, $Sn", + (!cast(NAME # _S) ZPR32:$Zd, FPR32asZPR:$Sn, 0), 2>; + def : InstAlias<"mov $Zd, $Dn", + (!cast(NAME # _D) ZPR64:$Zd, FPR64asZPR:$Dn, 0), 2>; + def : InstAlias<"mov $Zd, $Qn", + (!cast(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>; +} + +class sve_int_perm_tbl sz8_64, string asm, ZPRRegOp zprty, + RegisterOperand VecList> +: I<(outs zprty:$Zd), (ins VecList:$Zn, zprty:$Zm), + asm, "\t$Zd, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-10} = 0b001100; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_perm_tbl { + def _B : sve_int_perm_tbl<0b00, asm, ZPR8, Z_b>; + def _H : sve_int_perm_tbl<0b01, asm, ZPR16, Z_h>; + def _S : sve_int_perm_tbl<0b10, asm, ZPR32, Z_s>; + def _D : sve_int_perm_tbl<0b11, asm, ZPR64, Z_d>; + + def : InstAlias(NAME # _B) ZPR8:$Zd, ZPR8:$Zn, ZPR8:$Zm), 0>; + def : InstAlias(NAME # _H) ZPR16:$Zd, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias(NAME # _S) ZPR32:$Zd, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias(NAME # _D) ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zm), 0>; +} + +class sve_int_perm_reverse_z sz8_64, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn), + asm, "\t$Zd, $Zn", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-10} = 0b111000001110; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_perm_reverse_z { + def _B : sve_int_perm_reverse_z<0b00, asm, ZPR8>; + def _H : sve_int_perm_reverse_z<0b01, asm, ZPR16>; + def _S : sve_int_perm_reverse_z<0b10, asm, ZPR32>; + def _D : sve_int_perm_reverse_z<0b11, asm, ZPR64>; +} + +class sve_int_perm_reverse_p sz8_64, string asm, PPRRegOp pprty> +: I<(outs pprty:$Pd), (ins pprty:$Pn), + asm, "\t$Pd, $Pn", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-9} = 0b1101000100000; + let Inst{8-5} = Pn; + let Inst{4} = 0b0; + let Inst{3-0} = Pd; +} + +multiclass sve_int_perm_reverse_p { + def _B : sve_int_perm_reverse_p<0b00, asm, PPR8>; + def _H : sve_int_perm_reverse_p<0b01, asm, PPR16>; + def _S : sve_int_perm_reverse_p<0b10, asm, PPR32>; + def _D : sve_int_perm_reverse_p<0b11, asm, PPR64>; +} + +class sve_int_perm_unpk sz16_64, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> +: I<(outs zprty1:$Zd), (ins zprty2:$Zn), + asm, "\t$Zd, $Zn", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz16_64; + let Inst{21-18} = 0b1100; + let Inst{17-16} = opc; + let Inst{15-10} = 0b001110; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_perm_unpk opc, string asm> { + def _H : sve_int_perm_unpk<0b01, opc, asm, ZPR16, ZPR8>; + def _S : sve_int_perm_unpk<0b10, opc, asm, ZPR32, ZPR16>; + def _D : sve_int_perm_unpk<0b11, opc, asm, ZPR64, ZPR32>; +} + +class sve_int_perm_insrs sz8_64, string asm, ZPRRegOp zprty, + RegisterClass srcRegType> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, srcRegType:$Rm), + asm, "\t$Zdn, $Rm", + "", + []>, Sched<[]> { + bits<5> Rm; + bits<5> Zdn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-10} = 0b100100001110; + let Inst{9-5} = Rm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_perm_insrs { + def _B : sve_int_perm_insrs<0b00, asm, ZPR8, GPR32>; + def _H : sve_int_perm_insrs<0b01, asm, ZPR16, GPR32>; + def _S : sve_int_perm_insrs<0b10, asm, ZPR32, GPR32>; + def _D : sve_int_perm_insrs<0b11, asm, ZPR64, GPR64>; +} + +class sve_int_perm_insrv sz8_64, string asm, ZPRRegOp zprty, + RegisterClass srcRegType> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, srcRegType:$Vm), + asm, "\t$Zdn, $Vm", + "", + []>, Sched<[]> { + bits<5> Vm; + bits<5> Zdn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-10} = 0b110100001110; + let Inst{9-5} = Vm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_perm_insrv { + def _B : sve_int_perm_insrv<0b00, asm, ZPR8, FPR8>; + def _H : sve_int_perm_insrv<0b01, asm, ZPR16, FPR16>; + def _S : sve_int_perm_insrv<0b10, asm, ZPR32, FPR32>; + def _D : sve_int_perm_insrv<0b11, asm, ZPR64, FPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Permute - Extract Group +//===----------------------------------------------------------------------===// + +class sve_int_perm_extract_i +: I<(outs ZPR8:$Zdn), (ins ZPR8:$_Zdn, ZPR8:$Zm, imm0_255:$imm8), + asm, "\t$Zdn, $_Zdn, $Zm, $imm8", + "", []>, Sched<[]> { + bits<5> Zdn; + bits<5> Zm; + bits<8> imm8; + let Inst{31-21} = 0b00000101001; + let Inst{20-16} = imm8{7-3}; + let Inst{15-13} = 0b000; + let Inst{12-10} = imm8{2-0}; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +//===----------------------------------------------------------------------===// +// SVE Vector Select Group +//===----------------------------------------------------------------------===// + +class sve_int_sel_vvv sz8_64, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins PPRAny:$Pg, zprty:$Zn, zprty:$Zm), + asm, "\t$Zd, $Pg, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<4> Pg; + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-14} = 0b11; + let Inst{13-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_sel_vvv { + def _B : sve_int_sel_vvv<0b00, asm, ZPR8>; + def _H : sve_int_sel_vvv<0b01, asm, ZPR16>; + def _S : sve_int_sel_vvv<0b10, asm, ZPR32>; + def _D : sve_int_sel_vvv<0b11, asm, ZPR64>; + + def : InstAlias<"mov $Zd, $Pg/m, $Zn", + (!cast(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, ZPR8:$Zn, ZPR8:$Zd), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Zn", + (!cast(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, ZPR16:$Zn, ZPR16:$Zd), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Zn", + (!cast(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, ZPR32:$Zn, ZPR32:$Zd), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Zn", + (!cast(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, ZPR64:$Zn, ZPR64:$Zd), 1>; +} + + +//===----------------------------------------------------------------------===// +// SVE Predicate Logical Operations Group +//===----------------------------------------------------------------------===// + +class sve_int_pred_log opc, string asm> +: I<(outs PPR8:$Pd), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$Pm), + asm, "\t$Pd, $Pg/z, $Pn, $Pm", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pg; + bits<4> Pm; + bits<4> Pn; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = opc{3-2}; + let Inst{21-20} = 0b00; + let Inst{19-16} = Pm; + let Inst{15-14} = 0b01; + let Inst{13-10} = Pg; + let Inst{9} = opc{1}; + let Inst{8-5} = Pn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + // SEL has no predication qualifier. + let AsmString = !if(!eq(opc, 0b0011), + !strconcat(asm, "\t$Pd, $Pg, $Pn, $Pm"), + !strconcat(asm, "\t$Pd, $Pg/z, $Pn, $Pm")); + + let Defs = !if(!eq (opc{2}, 1), [NZCV], []); +} + + +//===----------------------------------------------------------------------===// +// SVE Logical Mask Immediate Group +//===----------------------------------------------------------------------===// + +class sve_int_log_imm opc, string asm> +: I<(outs ZPR64:$Zdn), (ins ZPR64:$_Zdn, logical_imm64:$imms13), + asm, "\t$Zdn, $_Zdn, $imms13", + "", []>, Sched<[]> { + bits<5> Zdn; + bits<13> imms13; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = opc; + let Inst{21-18} = 0b0000; + let Inst{17-5} = imms13; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DecoderMethod = "DecodeSVELogicalImmInstruction"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_log_imm opc, string asm, string alias> { + def NAME : sve_int_log_imm; + + def : InstAlias(NAME) ZPR8:$Zdn, sve_logical_imm8:$imm), 4>; + def : InstAlias(NAME) ZPR16:$Zdn, sve_logical_imm16:$imm), 3>; + def : InstAlias(NAME) ZPR32:$Zdn, sve_logical_imm32:$imm), 2>; + + def : InstAlias(NAME) ZPR8:$Zdn, sve_logical_imm8_not:$imm), 0>; + def : InstAlias(NAME) ZPR16:$Zdn, sve_logical_imm16_not:$imm), 0>; + def : InstAlias(NAME) ZPR32:$Zdn, sve_logical_imm32_not:$imm), 0>; + def : InstAlias(NAME) ZPR64:$Zdn, logical_imm64_not:$imm), 0>; +} + +class sve_int_dup_mask_imm +: I<(outs ZPR64:$Zd), (ins logical_imm64:$imms), + asm, "\t$Zd, $imms", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<13> imms; + let Inst{31-18} = 0b00000101110000; + let Inst{17-5} = imms; + let Inst{4-0} = Zd; + + let isReMaterializable = 1; + let DecoderMethod = "DecodeSVELogicalImmInstruction"; +} + +multiclass sve_int_dup_mask_imm { + def NAME : sve_int_dup_mask_imm; + + def : InstAlias<"dupm $Zd, $imm", + (!cast(NAME) ZPR8:$Zd, sve_logical_imm8:$imm), 4>; + def : InstAlias<"dupm $Zd, $imm", + (!cast(NAME) ZPR16:$Zd, sve_logical_imm16:$imm), 3>; + def : InstAlias<"dupm $Zd, $imm", + (!cast(NAME) ZPR32:$Zd, sve_logical_imm32:$imm), 2>; + + // All Zd.b forms have a CPY/DUP equivalent, hence no byte alias here. + def : InstAlias<"mov $Zd, $imm", + (!cast(NAME) ZPR16:$Zd, sve_preferred_logical_imm16:$imm), 7>; + def : InstAlias<"mov $Zd, $imm", + (!cast(NAME) ZPR32:$Zd, sve_preferred_logical_imm32:$imm), 6>; + def : InstAlias<"mov $Zd, $imm", + (!cast(NAME) ZPR64:$Zd, sve_preferred_logical_imm64:$imm), 5>; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Arithmetic - Unpredicated Group. +//===----------------------------------------------------------------------===// + +class sve_int_bin_cons_arit_0 sz8_64, bits<3> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), + asm, "\t$Zd, $Zn, $Zm", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-13} = 0b000; + let Inst{12-10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_bin_cons_arit_0 opc, string asm> { + def _B : sve_int_bin_cons_arit_0<0b00, opc, asm, ZPR8>; + def _H : sve_int_bin_cons_arit_0<0b01, opc, asm, ZPR16>; + def _S : sve_int_bin_cons_arit_0<0b10, opc, asm, ZPR32>; + def _D : sve_int_bin_cons_arit_0<0b11, opc, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Arithmetic - Predicated Group +//===----------------------------------------------------------------------===// + +class sve_fp_2op_i_p_zds sz, bits<3> opc, string asm, + ZPRRegOp zprty, + Operand imm_ty> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, imm_ty:$i1), + asm, "\t$Zdn, $Pg/m, $_Zdn, $i1", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zdn; + bit i1; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21-19} = 0b011; + let Inst{18-16} = opc; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-6} = 0b0000; + let Inst{5} = i1; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_fp_2op_i_p_zds opc, string asm, Operand imm_ty> { + def _H : sve_fp_2op_i_p_zds<0b01, opc, asm, ZPR16, imm_ty>; + def _S : sve_fp_2op_i_p_zds<0b10, opc, asm, ZPR32, imm_ty>; + def _D : sve_fp_2op_i_p_zds<0b11, opc, asm, ZPR64, imm_ty>; +} + +class sve_fp_2op_p_zds sz, bits<4> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm), + asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zdn; + bits<5> Zm; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21-20} = 0b00; + let Inst{19-16} = opc; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_fp_2op_p_zds opc, string asm> { + def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>; + def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>; + def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>; +} + +class sve_fp_ftmad sz, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm0_7:$imm3), + asm, "\t$Zdn, $_Zdn, $Zm, $imm3", + "", + []>, Sched<[]> { + bits<5> Zdn; + bits<5> Zm; + bits<3> imm3; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21-19} = 0b010; + let Inst{18-16} = imm3; + let Inst{15-10} = 0b100000; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_fp_ftmad { + def _H : sve_fp_ftmad<0b01, asm, ZPR16>; + def _S : sve_fp_ftmad<0b10, asm, ZPR32>; + def _D : sve_fp_ftmad<0b11, asm, ZPR64>; +} + + +//===----------------------------------------------------------------------===// +// SVE Floating Point Arithmetic - Unpredicated Group +//===----------------------------------------------------------------------===// + +class sve_fp_3op_u_zd sz, bits<3> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), + asm, "\t$Zd, $Zn, $Zm", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21} = 0b0; + let Inst{20-16} = Zm; + let Inst{15-13} = 0b000; + let Inst{12-10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_fp_3op_u_zd opc, string asm> { + def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>; + def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>; + def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Fused Multiply-Add Group +//===----------------------------------------------------------------------===// + +class sve_fp_3op_p_zds_a sz, bits<2> opc, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zda), (ins PPR3bAny:$Pg, zprty:$_Zda, zprty:$Zn, zprty:$Zm), + asm, "\t$Zda, $Pg/m, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zda; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15} = 0b0; + let Inst{14-13} = opc; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_fp_3op_p_zds_a opc, string asm> { + def _H : sve_fp_3op_p_zds_a<0b01, opc, asm, ZPR16>; + def _S : sve_fp_3op_p_zds_a<0b10, opc, asm, ZPR32>; + def _D : sve_fp_3op_p_zds_a<0b11, opc, asm, ZPR64>; +} + +class sve_fp_3op_p_zds_b sz, bits<2> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm, zprty:$Za), + asm, "\t$Zdn, $Pg/m, $Zm, $Za", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Za; + bits<5> Zdn; + bits<5> Zm; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{20-16} = Za; + let Inst{15} = 0b1; + let Inst{14-13} = opc; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_fp_3op_p_zds_b opc, string asm> { + def _H : sve_fp_3op_p_zds_b<0b01, opc, asm, ZPR16>; + def _S : sve_fp_3op_p_zds_b<0b10, opc, asm, ZPR32>; + def _D : sve_fp_3op_p_zds_b<0b11, opc, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Multiply-Add - Indexed Group +//===----------------------------------------------------------------------===// + +class sve_fp_fma_by_indexed_elem sz, bit opc, string asm, + ZPRRegOp zprty1, + ZPRRegOp zprty2, Operand itype> +: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty1:$Zn, zprty2:$Zm, itype:$iop), + asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> { + bits<5> Zda; + bits<5> Zn; + let Inst{31-24} = 0b01100100; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{15-11} = 0; + let Inst{10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_fp_fma_by_indexed_elem { + def _H : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, VectorIndexH> { + bits<3> Zm; + bits<3> iop; + let Inst{22} = iop{2}; + let Inst{20-19} = iop{1-0}; + let Inst{18-16} = Zm; + } + def _S : sve_fp_fma_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR3b32, VectorIndexS> { + bits<3> Zm; + bits<2> iop; + let Inst{20-19} = iop; + let Inst{18-16} = Zm; + } + def _D : sve_fp_fma_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR4b64, VectorIndexD> { + bits<4> Zm; + bit iop; + let Inst{20} = iop; + let Inst{19-16} = Zm; + } +} + + +//===----------------------------------------------------------------------===// +// SVE Floating Point Multiply - Indexed Group +//===----------------------------------------------------------------------===// + +class sve_fp_fmul_by_indexed_elem sz, string asm, ZPRRegOp zprty, + ZPRRegOp zprty2, Operand itype> +: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty2:$Zm, itype:$iop), + asm, "\t$Zd, $Zn, $Zm$iop", "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b01100100; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{15-10} = 0b001000; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_fp_fmul_by_indexed_elem { + def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, asm, ZPR16, ZPR3b16, VectorIndexH> { + bits<3> Zm; + bits<3> iop; + let Inst{22} = iop{2}; + let Inst{20-19} = iop{1-0}; + let Inst{18-16} = Zm; + } + def _S : sve_fp_fmul_by_indexed_elem<0b10, asm, ZPR32, ZPR3b32, VectorIndexS> { + bits<3> Zm; + bits<2> iop; + let Inst{20-19} = iop; + let Inst{18-16} = Zm; + } + def _D : sve_fp_fmul_by_indexed_elem<0b11, asm, ZPR64, ZPR4b64, VectorIndexD> { + bits<4> Zm; + bit iop; + let Inst{20} = iop; + let Inst{19-16} = Zm; + } +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Complex Multiply-Add Group +//===----------------------------------------------------------------------===// + +class sve_fp_fcmla sz, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zda), (ins PPR3bAny:$Pg, zprty:$_Zda, zprty:$Zn, zprty:$Zm, + complexrotateop:$imm), + asm, "\t$Zda, $Pg/m, $Zn, $Zm, $imm", + "", []>, Sched<[]> { + bits<5> Zda; + bits<3> Pg; + bits<5> Zn; + bits<5> Zm; + bits<2> imm; + let Inst{31-24} = 0b01100100; + let Inst{23-22} = sz; + let Inst{21} = 0; + let Inst{20-16} = Zm; + let Inst{15} = 0; + let Inst{14-13} = imm; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_fp_fcmla { + def _H : sve_fp_fcmla<0b01, asm, ZPR16>; + def _S : sve_fp_fcmla<0b10, asm, ZPR32>; + def _D : sve_fp_fcmla<0b11, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Complex Multiply-Add - Indexed Group +//===----------------------------------------------------------------------===// + +class sve_fp_fcmla_by_indexed_elem sz, string asm, + ZPRRegOp zprty, + ZPRRegOp zprty2, Operand itype> +: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, zprty2:$Zm, itype:$iop, + complexrotateop:$imm), + asm, "\t$Zda, $Zn, $Zm$iop, $imm", + "", []>, Sched<[]> { + bits<5> Zda; + bits<5> Zn; + bits<2> imm; + let Inst{31-24} = 0b01100100; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{15-12} = 0b0001; + let Inst{11-10} = imm; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_fp_fcmla_by_indexed_elem { + def _H : sve_fp_fcmla_by_indexed_elem<0b10, asm, ZPR16, ZPR3b16, VectorIndexS> { + bits<3> Zm; + bits<2> iop; + let Inst{20-19} = iop; + let Inst{18-16} = Zm; + } + def _S : sve_fp_fcmla_by_indexed_elem<0b11, asm, ZPR32, ZPR4b32, VectorIndexD> { + bits<4> Zm; + bits<1> iop; + let Inst{20} = iop; + let Inst{19-16} = Zm; + } +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Complex Addition Group +//===----------------------------------------------------------------------===// + +class sve_fp_fcadd sz, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm, + complexrotateopodd:$imm), + asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm, $imm", + "", + []>, Sched<[]> { + bits<5> Zdn; + bits<5> Zm; + bits<3> Pg; + bit imm; + let Inst{31-24} = 0b01100100; + let Inst{23-22} = sz; + let Inst{21-17} = 0; + let Inst{16} = imm; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_fp_fcadd { + def _H : sve_fp_fcadd<0b01, asm, ZPR16>; + def _S : sve_fp_fcadd<0b10, asm, ZPR32>; + def _D : sve_fp_fcadd<0b11, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Stack Allocation Group +//===----------------------------------------------------------------------===// + +class sve_int_arith_vl +: I<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, simm6_32b:$imm6), + asm, "\t$Rd, $Rn, $imm6", + "", + []>, Sched<[]> { + bits<5> Rd; + bits<5> Rn; + bits<6> imm6; + let Inst{31-23} = 0b000001000; + let Inst{22} = opc; + let Inst{21} = 0b1; + let Inst{20-16} = Rn; + let Inst{15-11} = 0b01010; + let Inst{10-5} = imm6; + let Inst{4-0} = Rd; +} + +class sve_int_read_vl_a opc2, string asm> +: I<(outs GPR64:$Rd), (ins simm6_32b:$imm6), + asm, "\t$Rd, $imm6", + "", + []>, Sched<[]> { + bits<5> Rd; + bits<6> imm6; + let Inst{31-23} = 0b000001001; + let Inst{22} = op; + let Inst{21} = 0b1; + let Inst{20-16} = opc2{4-0}; + let Inst{15-11} = 0b01010; + let Inst{10-5} = imm6; + let Inst{4-0} = Rd; +} + +//===----------------------------------------------------------------------===// +// SVE Permute - In Lane Group +//===----------------------------------------------------------------------===// + +class sve_int_perm_bin_perm_zz opc, bits<2> sz8_64, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), + asm, "\t$Zd, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-13} = 0b011; + let Inst{12-10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_perm_bin_perm_zz opc, string asm> { + def _B : sve_int_perm_bin_perm_zz; + def _H : sve_int_perm_bin_perm_zz; + def _S : sve_int_perm_bin_perm_zz; + def _D : sve_int_perm_bin_perm_zz; +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Unary Operations Group +//===----------------------------------------------------------------------===// + +class sve_fp_2op_p_zd opc, string asm, RegisterOperand i_zprtype, + RegisterOperand o_zprtype, ElementSizeEnum size> +: I<(outs o_zprtype:$Zd), (ins i_zprtype:$_Zd, PPR3bAny:$Pg, i_zprtype:$Zn), + asm, "\t$Zd, $Pg/m, $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = opc{6-5}; + let Inst{21} = 0b0; + let Inst{20-16} = opc{4-0}; + let Inst{15-13} = 0b101; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = size; +} + +multiclass sve_fp_2op_p_zd_HSD opc, string asm> { + def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>; + def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>; + def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>; +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Unary Operations - Unpredicated Group +//===----------------------------------------------------------------------===// + +class sve_fp_2op_u_zd sz, bits<3> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn), + asm, "\t$Zd, $Zn", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21-19} = 0b001; + let Inst{18-16} = opc; + let Inst{15-10} = 0b001100; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_fp_2op_u_zd opc, string asm> { + def _H : sve_fp_2op_u_zd<0b01, opc, asm, ZPR16>; + def _S : sve_fp_2op_u_zd<0b10, opc, asm, ZPR32>; + def _D : sve_fp_2op_u_zd<0b11, opc, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Arithmetic - Binary Predicated Group +//===----------------------------------------------------------------------===// + +class sve_int_bin_pred_arit_log sz8_64, bits<2> fmt, bits<3> opc, + string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm), + asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm", "", []>, Sched<[]> { + bits<3> Pg; + bits<5> Zdn; + bits<5> Zm; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b0; + let Inst{20-19} = fmt; + let Inst{18-16} = opc; + let Inst{15-13} = 0b000; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_bin_pred_log opc, string asm> { + def _B : sve_int_bin_pred_arit_log<0b00, 0b11, opc, asm, ZPR8>; + def _H : sve_int_bin_pred_arit_log<0b01, 0b11, opc, asm, ZPR16>; + def _S : sve_int_bin_pred_arit_log<0b10, 0b11, opc, asm, ZPR32>; + def _D : sve_int_bin_pred_arit_log<0b11, 0b11, opc, asm, ZPR64>; +} + +multiclass sve_int_bin_pred_arit_0 opc, string asm> { + def _B : sve_int_bin_pred_arit_log<0b00, 0b00, opc, asm, ZPR8>; + def _H : sve_int_bin_pred_arit_log<0b01, 0b00, opc, asm, ZPR16>; + def _S : sve_int_bin_pred_arit_log<0b10, 0b00, opc, asm, ZPR32>; + def _D : sve_int_bin_pred_arit_log<0b11, 0b00, opc, asm, ZPR64>; +} + +multiclass sve_int_bin_pred_arit_1 opc, string asm> { + def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>; + def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>; + def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>; + def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>; +} + +multiclass sve_int_bin_pred_arit_2 opc, string asm> { + def _B : sve_int_bin_pred_arit_log<0b00, 0b10, opc, asm, ZPR8>; + def _H : sve_int_bin_pred_arit_log<0b01, 0b10, opc, asm, ZPR16>; + def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>; + def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>; +} + +// Special case for divides which are not defined for 8b/16b elements. +multiclass sve_int_bin_pred_arit_2_div opc, string asm> { + def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>; + def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Multiply-Add Group +//===----------------------------------------------------------------------===// + +class sve_int_mladdsub_vvv_pred sz8_64, bits<1> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm, zprty:$Za), + asm, "\t$Zdn, $Pg/m, $Zm, $Za", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zdn; + bits<5> Za; + bits<5> Zm; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b0; + let Inst{20-16} = Zm; + let Inst{15-14} = 0b11; + let Inst{13} = opc; + let Inst{12-10} = Pg; + let Inst{9-5} = Za; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_mladdsub_vvv_pred opc, string asm> { + def _B : sve_int_mladdsub_vvv_pred<0b00, opc, asm, ZPR8>; + def _H : sve_int_mladdsub_vvv_pred<0b01, opc, asm, ZPR16>; + def _S : sve_int_mladdsub_vvv_pred<0b10, opc, asm, ZPR32>; + def _D : sve_int_mladdsub_vvv_pred<0b11, opc, asm, ZPR64>; +} + +class sve_int_mlas_vvv_pred sz8_64, bits<1> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zda), (ins PPR3bAny:$Pg, zprty:$_Zda, zprty:$Zn, zprty:$Zm), + asm, "\t$Zda, $Pg/m, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zda; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b0; + let Inst{20-16} = Zm; + let Inst{15-14} = 0b01; + let Inst{13} = opc; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_mlas_vvv_pred opc, string asm> { + def _B : sve_int_mlas_vvv_pred<0b00, opc, asm, ZPR8>; + def _H : sve_int_mlas_vvv_pred<0b01, opc, asm, ZPR16>; + def _S : sve_int_mlas_vvv_pred<0b10, opc, asm, ZPR32>; + def _D : sve_int_mlas_vvv_pred<0b11, opc, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Dot Product Group +//===----------------------------------------------------------------------===// + +class sve_intx_dot +: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm), asm, + "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { + bits<5> Zda; + bits<5> Zn; + bits<5> Zm; + let Inst{31-23} = 0b010001001; + let Inst{22} = sz; + let Inst{21} = 0; + let Inst{20-16} = Zm; + let Inst{15-11} = 0; + let Inst{10} = U; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = zprty1.ElementSize; +} + +multiclass sve_intx_dot { + def _S : sve_intx_dot<0b0, opc, asm, ZPR32, ZPR8>; + def _D : sve_intx_dot<0b1, opc, asm, ZPR64, ZPR16>; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Dot Product Group - Indexed Group +//===----------------------------------------------------------------------===// + +class sve_intx_dot_by_indexed_elem +: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop), + asm, "\t$Zda, $Zn, $Zm$iop", + "", []>, Sched<[]> { + bits<5> Zda; + bits<5> Zn; + let Inst{31-23} = 0b010001001; + let Inst{22} = sz; + let Inst{21} = 0b1; + let Inst{15-11} = 0; + let Inst{10} = U; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_intx_dot_by_indexed_elem { + def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> { + bits<2> iop; + bits<3> Zm; + let Inst{20-19} = iop; + let Inst{18-16} = Zm; + } + def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> { + bits<1> iop; + bits<4> Zm; + let Inst{20} = iop; + let Inst{19-16} = Zm; + } +} + +//===----------------------------------------------------------------------===// +// SVE Integer Arithmetic - Unary Predicated Group +//===----------------------------------------------------------------------===// + +class sve_int_un_pred_arit sz8_64, bits<4> opc, + string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Zd, $Pg/m, $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21-20} = 0b01; + let Inst{19} = opc{0}; + let Inst{18-16} = opc{3-1}; + let Inst{15-13} = 0b101; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_un_pred_arit_0 opc, string asm> { + def _B : sve_int_un_pred_arit<0b00, { opc, 0b0 }, asm, ZPR8>; + def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>; + def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>; + def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; +} + +multiclass sve_int_un_pred_arit_0_h opc, string asm> { + def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>; + def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>; + def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; +} + +multiclass sve_int_un_pred_arit_0_w opc, string asm> { + def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>; + def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; +} + +multiclass sve_int_un_pred_arit_0_d opc, string asm> { + def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; +} + +multiclass sve_int_un_pred_arit_1 opc, string asm> { + def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>; + def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>; + def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>; + def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>; +} + +multiclass sve_int_un_pred_arit_1_fp opc, string asm> { + def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>; + def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>; + def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Wide Immediate - Unpredicated Group +//===----------------------------------------------------------------------===// +class sve_int_dup_imm sz8_64, string asm, + ZPRRegOp zprty, Operand immtype> +: I<(outs zprty:$Zd), (ins immtype:$imm), + asm, "\t$Zd, $imm", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<9> imm; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-14} = 0b11100011; + let Inst{13} = imm{8}; // sh + let Inst{12-5} = imm{7-0}; // imm8 + let Inst{4-0} = Zd; + + let isReMaterializable = 1; +} + +multiclass sve_int_dup_imm { + def _B : sve_int_dup_imm<0b00, asm, ZPR8, cpy_imm8_opt_lsl_i8>; + def _H : sve_int_dup_imm<0b01, asm, ZPR16, cpy_imm8_opt_lsl_i16>; + def _S : sve_int_dup_imm<0b10, asm, ZPR32, cpy_imm8_opt_lsl_i32>; + def _D : sve_int_dup_imm<0b11, asm, ZPR64, cpy_imm8_opt_lsl_i64>; + + def : InstAlias<"mov $Zd, $imm", + (!cast(NAME # _B) ZPR8:$Zd, cpy_imm8_opt_lsl_i8:$imm), 1>; + def : InstAlias<"mov $Zd, $imm", + (!cast(NAME # _H) ZPR16:$Zd, cpy_imm8_opt_lsl_i16:$imm), 1>; + def : InstAlias<"mov $Zd, $imm", + (!cast(NAME # _S) ZPR32:$Zd, cpy_imm8_opt_lsl_i32:$imm), 1>; + def : InstAlias<"mov $Zd, $imm", + (!cast(NAME # _D) ZPR64:$Zd, cpy_imm8_opt_lsl_i64:$imm), 1>; + + def : InstAlias<"fmov $Zd, #0.0", + (!cast(NAME # _H) ZPR16:$Zd, 0, 0), 1>; + def : InstAlias<"fmov $Zd, #0.0", + (!cast(NAME # _S) ZPR32:$Zd, 0, 0), 1>; + def : InstAlias<"fmov $Zd, #0.0", + (!cast(NAME # _D) ZPR64:$Zd, 0, 0), 1>; +} + +class sve_int_dup_fpimm sz8_64, Operand fpimmtype, + string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins fpimmtype:$imm8), + asm, "\t$Zd, $imm8", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<8> imm8; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-14} = 0b11100111; + let Inst{13} = 0b0; + let Inst{12-5} = imm8; + let Inst{4-0} = Zd; + + let isReMaterializable = 1; +} + +multiclass sve_int_dup_fpimm { + def _H : sve_int_dup_fpimm<0b01, fpimm16, asm, ZPR16>; + def _S : sve_int_dup_fpimm<0b10, fpimm32, asm, ZPR32>; + def _D : sve_int_dup_fpimm<0b11, fpimm64, asm, ZPR64>; + + def : InstAlias<"fmov $Zd, $imm8", + (!cast(NAME # _H) ZPR16:$Zd, fpimm16:$imm8), 1>; + def : InstAlias<"fmov $Zd, $imm8", + (!cast(NAME # _S) ZPR32:$Zd, fpimm32:$imm8), 1>; + def : InstAlias<"fmov $Zd, $imm8", + (!cast(NAME # _D) ZPR64:$Zd, fpimm64:$imm8), 1>; +} + +class sve_int_arith_imm0 sz8_64, bits<3> opc, string asm, + ZPRRegOp zprty, Operand immtype> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, immtype:$imm), + asm, "\t$Zdn, $_Zdn, $imm", + "", + []>, Sched<[]> { + bits<5> Zdn; + bits<9> imm; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-19} = 0b100; + let Inst{18-16} = opc; + let Inst{15-14} = 0b11; + let Inst{13} = imm{8}; // sh + let Inst{12-5} = imm{7-0}; // imm8 + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_arith_imm0 opc, string asm> { + def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8, addsub_imm8_opt_lsl_i8>; + def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>; + def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>; + def _D : sve_int_arith_imm0<0b11, opc, asm, ZPR64, addsub_imm8_opt_lsl_i64>; +} + +class sve_int_arith_imm sz8_64, bits<6> opc, string asm, + ZPRRegOp zprty, Operand immtype> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, immtype:$imm), + asm, "\t$Zdn, $_Zdn, $imm", + "", + []>, Sched<[]> { + bits<5> Zdn; + bits<8> imm; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-16} = opc; + let Inst{15-13} = 0b110; + let Inst{12-5} = imm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_arith_imm1 opc, string asm, Operand immtype> { + def _B : sve_int_arith_imm<0b00, { 0b1010, opc }, asm, ZPR8, immtype>; + def _H : sve_int_arith_imm<0b01, { 0b1010, opc }, asm, ZPR16, immtype>; + def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, immtype>; + def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, immtype>; +} + +multiclass sve_int_arith_imm2 { + def _B : sve_int_arith_imm<0b00, 0b110000, asm, ZPR8, simm8>; + def _H : sve_int_arith_imm<0b01, 0b110000, asm, ZPR16, simm8>; + def _S : sve_int_arith_imm<0b10, 0b110000, asm, ZPR32, simm8>; + def _D : sve_int_arith_imm<0b11, 0b110000, asm, ZPR64, simm8>; +} + +//===----------------------------------------------------------------------===// +// SVE Bitwise Logical - Unpredicated Group +//===----------------------------------------------------------------------===// + +class sve_int_bin_cons_log opc, string asm> +: I<(outs ZPR64:$Zd), (ins ZPR64:$Zn, ZPR64:$Zm), + asm, "\t$Zd, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = opc{1-0}; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-10} = 0b001100; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + + +//===----------------------------------------------------------------------===// +// SVE Integer Wide Immediate - Predicated Group +//===----------------------------------------------------------------------===// + +class sve_int_dup_fpimm_pred sz, Operand fpimmtype, + string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPRAny:$Pg, fpimmtype:$imm8), + asm, "\t$Zd, $Pg/m, $imm8", + "", + []>, Sched<[]> { + bits<4> Pg; + bits<5> Zd; + bits<8> imm8; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz; + let Inst{21-20} = 0b01; + let Inst{19-16} = Pg; + let Inst{15-13} = 0b110; + let Inst{12-5} = imm8; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_dup_fpimm_pred { + def _H : sve_int_dup_fpimm_pred<0b01, fpimm16, asm, ZPR16>; + def _S : sve_int_dup_fpimm_pred<0b10, fpimm32, asm, ZPR32>; + def _D : sve_int_dup_fpimm_pred<0b11, fpimm64, asm, ZPR64>; + + def : InstAlias<"fmov $Zd, $Pg/m, $imm8", + (!cast(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, fpimm16:$imm8), 1>; + def : InstAlias<"fmov $Zd, $Pg/m, $imm8", + (!cast(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, fpimm32:$imm8), 1>; + def : InstAlias<"fmov $Zd, $Pg/m, $imm8", + (!cast(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, fpimm64:$imm8), 1>; +} + +class sve_int_dup_imm_pred sz8_64, bit m, string asm, + ZPRRegOp zprty, string pred_qual, dag iops> +: I<(outs zprty:$Zd), iops, + asm, "\t$Zd, $Pg"#pred_qual#", $imm", + "", []>, Sched<[]> { + bits<5> Zd; + bits<4> Pg; + bits<9> imm; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-20} = 0b01; + let Inst{19-16} = Pg; + let Inst{15} = 0b0; + let Inst{14} = m; + let Inst{13} = imm{8}; // sh + let Inst{12-5} = imm{7-0}; // imm8 + let Inst{4-0} = Zd; + + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_dup_imm_pred_merge { + let Constraints = "$Zd = $_Zd" in { + def _B : sve_int_dup_imm_pred<0b00, 1, asm, ZPR8, "/m", (ins ZPR8:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm)>; + def _H : sve_int_dup_imm_pred<0b01, 1, asm, ZPR16, "/m", (ins ZPR16:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm)>; + def _S : sve_int_dup_imm_pred<0b10, 1, asm, ZPR32, "/m", (ins ZPR32:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm)>; + def _D : sve_int_dup_imm_pred<0b11, 1, asm, ZPR64, "/m", (ins ZPR64:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm)>; + } + + def : InstAlias<"mov $Zd, $Pg/m, $imm", + (!cast(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $imm", + (!cast(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $imm", + (!cast(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $imm", + (!cast(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm), 1>; + + def : InstAlias<"fmov $Zd, $Pg/m, #0.0", + (!cast(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, 0, 0), 0>; + def : InstAlias<"fmov $Zd, $Pg/m, #0.0", + (!cast(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, 0, 0), 0>; + def : InstAlias<"fmov $Zd, $Pg/m, #0.0", + (!cast(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, 0, 0), 0>; +} + +multiclass sve_int_dup_imm_pred_zero { + def _B : sve_int_dup_imm_pred<0b00, 0, asm, ZPR8, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm)>; + def _H : sve_int_dup_imm_pred<0b01, 0, asm, ZPR16, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm)>; + def _S : sve_int_dup_imm_pred<0b10, 0, asm, ZPR32, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm)>; + def _D : sve_int_dup_imm_pred<0b11, 0, asm, ZPR64, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm)>; + + def : InstAlias<"mov $Zd, $Pg/z, $imm", + (!cast(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm), 1>; + def : InstAlias<"mov $Zd, $Pg/z, $imm", + (!cast(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm), 1>; + def : InstAlias<"mov $Zd, $Pg/z, $imm", + (!cast(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm), 1>; + def : InstAlias<"mov $Zd, $Pg/z, $imm", + (!cast(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm), 1>; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Compare - Vectors Group +//===----------------------------------------------------------------------===// + +class sve_int_cmp sz8_64, bits<3> opc, string asm, + PPRRegOp pprty, ZPRRegOp zprty1, ZPRRegOp zprty2> +: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty1:$Zn, zprty2:$Zm), + asm, "\t$Pd, $Pg/z, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<3> Pg; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00100100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b0; + let Inst{20-16} = Zm; + let Inst{15} = opc{2}; + let Inst{14} = cmp_1; + let Inst{13} = opc{1}; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Defs = [NZCV]; +} + +multiclass sve_int_cmp_0 opc, string asm> { + def _B : sve_int_cmp<0b0, 0b00, opc, asm, PPR8, ZPR8, ZPR8>; + def _H : sve_int_cmp<0b0, 0b01, opc, asm, PPR16, ZPR16, ZPR16>; + def _S : sve_int_cmp<0b0, 0b10, opc, asm, PPR32, ZPR32, ZPR32>; + def _D : sve_int_cmp<0b0, 0b11, opc, asm, PPR64, ZPR64, ZPR64>; +} + +multiclass sve_int_cmp_0_wide opc, string asm> { + def _B : sve_int_cmp<0b0, 0b00, opc, asm, PPR8, ZPR8, ZPR64>; + def _H : sve_int_cmp<0b0, 0b01, opc, asm, PPR16, ZPR16, ZPR64>; + def _S : sve_int_cmp<0b0, 0b10, opc, asm, PPR32, ZPR32, ZPR64>; +} + +multiclass sve_int_cmp_1_wide opc, string asm> { + def _B : sve_int_cmp<0b1, 0b00, opc, asm, PPR8, ZPR8, ZPR64>; + def _H : sve_int_cmp<0b1, 0b01, opc, asm, PPR16, ZPR16, ZPR64>; + def _S : sve_int_cmp<0b1, 0b10, opc, asm, PPR32, ZPR32, ZPR64>; +} + + +//===----------------------------------------------------------------------===// +// SVE Integer Compare - Signed Immediate Group +//===----------------------------------------------------------------------===// + +class sve_int_scmp_vi sz8_64, bits<3> opc, string asm, PPRRegOp pprty, + ZPRRegOp zprty, + Operand immtype> +: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn, immtype:$imm5), + asm, "\t$Pd, $Pg/z, $Zn, $imm5", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<3> Pg; + bits<5> Zn; + bits<5> imm5; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b0; + let Inst{20-16} = imm5; + let Inst{15} = opc{2}; + let Inst{14} = 0b0; + let Inst{13} = opc{1}; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Defs = [NZCV]; +} + +multiclass sve_int_scmp_vi opc, string asm> { + def _B : sve_int_scmp_vi<0b00, opc, asm, PPR8, ZPR8, simm5_32b>; + def _H : sve_int_scmp_vi<0b01, opc, asm, PPR16, ZPR16, simm5_32b>; + def _S : sve_int_scmp_vi<0b10, opc, asm, PPR32, ZPR32, simm5_32b>; + def _D : sve_int_scmp_vi<0b11, opc, asm, PPR64, ZPR64, simm5_64b>; +} + + +//===----------------------------------------------------------------------===// +// SVE Integer Compare - Unsigned Immediate Group +//===----------------------------------------------------------------------===// + +class sve_int_ucmp_vi sz8_64, bits<2> opc, string asm, PPRRegOp pprty, + ZPRRegOp zprty, Operand immtype> +: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn, immtype:$imm7), + asm, "\t$Pd, $Pg/z, $Zn, $imm7", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<3> Pg; + bits<5> Zn; + bits<7> imm7; + let Inst{31-24} = 0b00100100; + let Inst{23-22} = sz8_64; + let Inst{21} = 1; + let Inst{20-14} = imm7; + let Inst{13} = opc{1}; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Defs = [NZCV]; +} + +multiclass sve_int_ucmp_vi opc, string asm> { + def _B : sve_int_ucmp_vi<0b00, opc, asm, PPR8, ZPR8, imm0_127>; + def _H : sve_int_ucmp_vi<0b01, opc, asm, PPR16, ZPR16, imm0_127>; + def _S : sve_int_ucmp_vi<0b10, opc, asm, PPR32, ZPR32, imm0_127>; + def _D : sve_int_ucmp_vi<0b11, opc, asm, PPR64, ZPR64, imm0_127>; +} + + +//===----------------------------------------------------------------------===// +// SVE Integer Compare - Scalars Group +//===----------------------------------------------------------------------===// + +class sve_int_cterm +: I<(outs), (ins rt:$Rn, rt:$Rm), + asm, "\t$Rn, $Rm", + "", + []>, Sched<[]> { + bits<5> Rm; + bits<5> Rn; + let Inst{31-23} = 0b001001011; + let Inst{22} = sz; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-10} = 0b001000; + let Inst{9-5} = Rn; + let Inst{4} = opc; + let Inst{3-0} = 0b0000; + + let Defs = [NZCV]; +} + +class sve_int_while_rr sz8_64, bits<4> opc, string asm, + RegisterClass gprty, PPRRegOp pprty> +: I<(outs pprty:$Pd), (ins gprty:$Rn, gprty:$Rm), + asm, "\t$Pd, $Rn, $Rm", + "", []>, Sched<[]> { + bits<4> Pd; + bits<5> Rm; + bits<5> Rn; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b000; + let Inst{12-10} = opc{3-1}; + let Inst{9-5} = Rn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Defs = [NZCV]; +} + +multiclass sve_int_while4_rr opc, string asm> { + def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>; + def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>; + def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>; + def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64>; +} + +multiclass sve_int_while8_rr opc, string asm> { + def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>; + def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>; + def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>; + def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>; +} + + +//===----------------------------------------------------------------------===// +// SVE Floating Point Fast Reduction Group +//===----------------------------------------------------------------------===// + +class sve_fp_fast_red sz, bits<3> opc, string asm, + ZPRRegOp zprty, RegisterClass dstRegClass> +: I<(outs dstRegClass:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Vd, $Pg, $Zn", + "", + []>, Sched<[]> { + bits<5> Zn; + bits<5> Vd; + bits<3> Pg; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21-19} = 0b000; + let Inst{18-16} = opc; + let Inst{15-13} = 0b001; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Vd; +} + +multiclass sve_fp_fast_red opc, string asm> { + def _H : sve_fp_fast_red<0b01, opc, asm, ZPR16, FPR16>; + def _S : sve_fp_fast_red<0b10, opc, asm, ZPR32, FPR32>; + def _D : sve_fp_fast_red<0b11, opc, asm, ZPR64, FPR64>; +} + + +//===----------------------------------------------------------------------===// +// SVE Floating Point Accumulating Reduction Group +//===----------------------------------------------------------------------===// + +class sve_fp_2op_p_vd sz, bits<3> opc, string asm, + ZPRRegOp zprty, RegisterClass dstRegClass> +: I<(outs dstRegClass:$Vdn), (ins PPR3bAny:$Pg, dstRegClass:$_Vdn, zprty:$Zm), + asm, "\t$Vdn, $Pg, $_Vdn, $Zm", + "", + []>, + Sched<[]> { + bits<3> Pg; + bits<5> Vdn; + bits<5> Zm; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21-19} = 0b011; + let Inst{18-16} = opc; + let Inst{15-13} = 0b001; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Vdn; + + let Constraints = "$Vdn = $_Vdn"; +} + +multiclass sve_fp_2op_p_vd opc, string asm> { + def _H : sve_fp_2op_p_vd<0b01, opc, asm, ZPR16, FPR16>; + def _S : sve_fp_2op_p_vd<0b10, opc, asm, ZPR32, FPR32>; + def _D : sve_fp_2op_p_vd<0b11, opc, asm, ZPR64, FPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Compare - Vectors Group +//===----------------------------------------------------------------------===// + +class sve_fp_3op_p_pd sz, bits<3> opc, string asm, PPRRegOp pprty, + ZPRRegOp zprty> +: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn, zprty:$Zm), + asm, "\t$Pd, $Pg/z, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<3> Pg; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21} = 0b0; + let Inst{20-16} = Zm; + let Inst{15} = opc{2}; + let Inst{14} = 0b1; + let Inst{13} = opc{1}; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; +} + +multiclass sve_fp_3op_p_pd opc, string asm> { + def _H : sve_fp_3op_p_pd<0b01, opc, asm, PPR16, ZPR16>; + def _S : sve_fp_3op_p_pd<0b10, opc, asm, PPR32, ZPR32>; + def _D : sve_fp_3op_p_pd<0b11, opc, asm, PPR64, ZPR64>; +} + + +//===----------------------------------------------------------------------===// +// SVE Floating Point Compare - with Zero Group +//===----------------------------------------------------------------------===// + +class sve_fp_2op_p_pd sz, bits<3> opc, string asm, PPRRegOp pprty, + ZPRRegOp zprty> +: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Pd, $Pg/z, $Zn, #0.0", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<3> Pg; + bits<5> Zn; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21-18} = 0b0100; + let Inst{17-16} = opc{2-1}; + let Inst{15-13} = 0b001; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; +} + +multiclass sve_fp_2op_p_pd opc, string asm> { + def _H : sve_fp_2op_p_pd<0b01, opc, asm, PPR16, ZPR16>; + def _S : sve_fp_2op_p_pd<0b10, opc, asm, PPR32, ZPR32>; + def _D : sve_fp_2op_p_pd<0b11, opc, asm, PPR64, ZPR64>; +} + + +//===----------------------------------------------------------------------===// +//SVE Index Generation Group +//===----------------------------------------------------------------------===// + +class sve_int_index_ii sz8_64, string asm, ZPRRegOp zprty, + Operand imm_ty> +: I<(outs zprty:$Zd), (ins imm_ty:$imm5, imm_ty:$imm5b), + asm, "\t$Zd, $imm5, $imm5b", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> imm5; + bits<5> imm5b; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = imm5b; + let Inst{15-10} = 0b010000; + let Inst{9-5} = imm5; + let Inst{4-0} = Zd; +} + +multiclass sve_int_index_ii { + def _B : sve_int_index_ii<0b00, asm, ZPR8, simm5_32b>; + def _H : sve_int_index_ii<0b01, asm, ZPR16, simm5_32b>; + def _S : sve_int_index_ii<0b10, asm, ZPR32, simm5_32b>; + def _D : sve_int_index_ii<0b11, asm, ZPR64, simm5_64b>; +} + +class sve_int_index_ir sz8_64, string asm, ZPRRegOp zprty, + RegisterClass srcRegType, Operand imm_ty> +: I<(outs zprty:$Zd), (ins imm_ty:$imm5, srcRegType:$Rm), + asm, "\t$Zd, $imm5, $Rm", + "", []>, Sched<[]> { + bits<5> Rm; + bits<5> Zd; + bits<5> imm5; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-10} = 0b010010; + let Inst{9-5} = imm5; + let Inst{4-0} = Zd; +} + +multiclass sve_int_index_ir { + def _B : sve_int_index_ir<0b00, asm, ZPR8, GPR32, simm5_32b>; + def _H : sve_int_index_ir<0b01, asm, ZPR16, GPR32, simm5_32b>; + def _S : sve_int_index_ir<0b10, asm, ZPR32, GPR32, simm5_32b>; + def _D : sve_int_index_ir<0b11, asm, ZPR64, GPR64, simm5_64b>; +} + +class sve_int_index_ri sz8_64, string asm, ZPRRegOp zprty, + RegisterClass srcRegType, Operand imm_ty> +: I<(outs zprty:$Zd), (ins srcRegType:$Rn, imm_ty:$imm5), + asm, "\t$Zd, $Rn, $imm5", + "", []>, Sched<[]> { + bits<5> Rn; + bits<5> Zd; + bits<5> imm5; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = imm5; + let Inst{15-10} = 0b010001; + let Inst{9-5} = Rn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_index_ri { + def _B : sve_int_index_ri<0b00, asm, ZPR8, GPR32, simm5_32b>; + def _H : sve_int_index_ri<0b01, asm, ZPR16, GPR32, simm5_32b>; + def _S : sve_int_index_ri<0b10, asm, ZPR32, GPR32, simm5_32b>; + def _D : sve_int_index_ri<0b11, asm, ZPR64, GPR64, simm5_64b>; +} + +class sve_int_index_rr sz8_64, string asm, ZPRRegOp zprty, + RegisterClass srcRegType> +: I<(outs zprty:$Zd), (ins srcRegType:$Rn, srcRegType:$Rm), + asm, "\t$Zd, $Rn, $Rm", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Rm; + bits<5> Rn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-10} = 0b010011; + let Inst{9-5} = Rn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_index_rr { + def _B : sve_int_index_rr<0b00, asm, ZPR8, GPR32>; + def _H : sve_int_index_rr<0b01, asm, ZPR16, GPR32>; + def _S : sve_int_index_rr<0b10, asm, ZPR32, GPR32>; + def _D : sve_int_index_rr<0b11, asm, ZPR64, GPR64>; +} +// +//===----------------------------------------------------------------------===// +// SVE Bitwise Shift - Predicated Group +//===----------------------------------------------------------------------===// +class sve_int_bin_pred_shift_imm tsz8_64, bits<3> opc, string asm, + ZPRRegOp zprty, Operand immtype, + ElementSizeEnum size> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm), + asm, "\t$Zdn, $Pg/m, $_Zdn, $imm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zdn; + bits<6> imm; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = tsz8_64{3-2}; + let Inst{21-19} = 0b000; + let Inst{18-16} = opc; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-8} = tsz8_64{1-0}; + let Inst{7-5} = imm{2-0}; // imm3 + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = size; +} + +multiclass sve_int_bin_pred_shift_imm_left opc, string asm> { + def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8, + ElementSizeB>; + def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16, + ElementSizeH> { + let Inst{8} = imm{3}; + } + def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32, + ElementSizeS> { + let Inst{9-8} = imm{4-3}; + } + def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64, + ElementSizeD> { + let Inst{22} = imm{5}; + let Inst{9-8} = imm{4-3}; + } +} + +multiclass sve_int_bin_pred_shift_imm_right opc, string asm> { + def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8, + ElementSizeB>; + def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16, + ElementSizeH> { + let Inst{8} = imm{3}; + } + def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32, + ElementSizeS> { + let Inst{9-8} = imm{4-3}; + } + def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64, + ElementSizeD> { + let Inst{22} = imm{5}; + let Inst{9-8} = imm{4-3}; + } +} + +class sve_int_bin_pred_shift sz8_64, bit wide, bits<3> opc, + string asm, ZPRRegOp zprty, ZPRRegOp zprty2> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty2:$Zm), + asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zdn; + bits<5> Zm; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21-20} = 0b01; + let Inst{19} = wide; + let Inst{18-16} = opc; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_bin_pred_shift opc, string asm> { + def _B : sve_int_bin_pred_shift<0b00, 0b0, opc, asm, ZPR8, ZPR8>; + def _H : sve_int_bin_pred_shift<0b01, 0b0, opc, asm, ZPR16, ZPR16>; + def _S : sve_int_bin_pred_shift<0b10, 0b0, opc, asm, ZPR32, ZPR32>; + def _D : sve_int_bin_pred_shift<0b11, 0b0, opc, asm, ZPR64, ZPR64>; +} + +multiclass sve_int_bin_pred_shift_wide opc, string asm> { + def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>; + def _H : sve_int_bin_pred_shift<0b01, 0b1, opc, asm, ZPR16, ZPR64>; + def _S : sve_int_bin_pred_shift<0b10, 0b1, opc, asm, ZPR32, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Shift - Unpredicated Group +//===----------------------------------------------------------------------===// + +class sve_int_bin_cons_shift_wide sz8_64, bits<2> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn, ZPR64:$Zm), + asm, "\t$Zd, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-12} = 0b1000; + let Inst{11-10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_bin_cons_shift_wide opc, string asm> { + def _B : sve_int_bin_cons_shift_wide<0b00, opc, asm, ZPR8>; + def _H : sve_int_bin_cons_shift_wide<0b01, opc, asm, ZPR16>; + def _S : sve_int_bin_cons_shift_wide<0b10, opc, asm, ZPR32>; +} + +class sve_int_bin_cons_shift_imm tsz8_64, bits<2> opc, string asm, + ZPRRegOp zprty, Operand immtype> +: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm), + asm, "\t$Zd, $Zn, $imm", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<6> imm; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = tsz8_64{3-2}; + let Inst{21} = 0b1; + let Inst{20-19} = tsz8_64{1-0}; + let Inst{18-16} = imm{2-0}; // imm3 + let Inst{15-12} = 0b1001; + let Inst{11-10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_bin_cons_shift_imm_left opc, string asm> { + def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; + def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { + let Inst{19} = imm{3}; + } + def _S : sve_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { + let Inst{20-19} = imm{4-3}; + } + def _D : sve_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { + let Inst{22} = imm{5}; + let Inst{20-19} = imm{4-3}; + } +} + +multiclass sve_int_bin_cons_shift_imm_right opc, string asm> { + def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; + def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { + let Inst{19} = imm{3}; + } + def _S : sve_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { + let Inst{20-19} = imm{4-3}; + } + def _D : sve_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { + let Inst{22} = imm{5}; + let Inst{20-19} = imm{4-3}; + } +} +//===----------------------------------------------------------------------===// +// SVE Memory - Store Group +//===----------------------------------------------------------------------===// + +class sve_mem_cst_si msz, bits<2> esz, string asm, + RegisterOperand VecList> +: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), + asm, "\t$Zt, $Pg, [$Rn, $imm4, mul vl]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zt; + bits<4> imm4; + let Inst{31-25} = 0b1110010; + let Inst{24-23} = msz; + let Inst{22-21} = esz; + let Inst{20} = 0; + let Inst{19-16} = imm4; + let Inst{15-13} = 0b111; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_cst_si msz, bits<2> esz, string asm, + RegisterOperand listty, ZPRRegOp zprty> +{ + def NAME : sve_mem_cst_si; + + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>; + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; + def : InstAlias(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_est_si sz, bits<2> nregs, RegisterOperand VecList, + string asm, Operand immtype> +: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, immtype:$imm4), + asm, "\t$Zt, $Pg, [$Rn, $imm4, mul vl]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zt; + bits<4> imm4; + let Inst{31-25} = 0b1110010; + let Inst{24-23} = sz; + let Inst{22-21} = nregs; + let Inst{20} = 1; + let Inst{19-16} = imm4; + let Inst{15-13} = 0b111; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_est_si sz, bits<2> nregs, RegisterOperand VecList, + string asm, Operand immtype> { + def NAME : sve_mem_est_si; + + def : InstAlias(NAME) VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_est_ss sz, bits<2> nregs, RegisterOperand VecList, + string asm, RegisterOperand gprty> +: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), + asm, "\t$Zt, $Pg, [$Rn, $Rm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rm; + bits<5> Rn; + bits<5> Zt; + let Inst{31-25} = 0b1110010; + let Inst{24-23} = sz; + let Inst{22-21} = nregs; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b011; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +class sve_mem_cst_ss_base dtype, string asm, + RegisterOperand listty, RegisterOperand gprty> +: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), + asm, "\t$Zt, $Pg, [$Rn, $Rm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rm; + bits<5> Rn; + bits<5> Zt; + let Inst{31-25} = 0b1110010; + let Inst{24-21} = dtype; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b010; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_cst_ss dtype, string asm, + RegisterOperand listty, ZPRRegOp zprty, + RegisterOperand gprty> { + def NAME : sve_mem_cst_ss_base; + + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; +} + +class sve_mem_cstnt_si msz, string asm, RegisterOperand VecList> +: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), + asm, "\t$Zt, $Pg, [$Rn, $imm4, mul vl]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zt; + bits<4> imm4; + let Inst{31-25} = 0b1110010; + let Inst{24-23} = msz; + let Inst{22-20} = 0b001; + let Inst{19-16} = imm4; + let Inst{15-13} = 0b111; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_cstnt_si msz, string asm, RegisterOperand listty, + ZPRRegOp zprty> { + def NAME : sve_mem_cstnt_si; + + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>; + def : InstAlias(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_cstnt_ss_base msz, string asm, RegisterOperand listty, + RegisterOperand gprty> +: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), + asm, "\t$Zt, $Pg, [$Rn, $Rm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rm; + bits<5> Rn; + bits<5> Zt; + let Inst{31-25} = 0b1110010; + let Inst{24-23} = msz; + let Inst{22-21} = 0b00; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b011; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_cstnt_ss msz, string asm, RegisterOperand listty, + ZPRRegOp zprty, RegisterOperand gprty> { + def NAME : sve_mem_cstnt_ss_base; + + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; +} + +class sve_mem_sst_sv opc, bit xs, bit scaled, string asm, + RegisterOperand VecList, RegisterOperand zprext> +: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), + asm, "\t$Zt, $Pg, [$Rn, $Zm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zm; + bits<5> Zt; + let Inst{31-25} = 0b1110010; + let Inst{24-22} = opc; + let Inst{21} = scaled; + let Inst{20-16} = Zm; + let Inst{15} = 0b1; + let Inst{14} = xs; + let Inst{13} = 0; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_sst_sv_32_scaled opc, string asm, + RegisterOperand listty, + ZPRRegOp zprty, + RegisterOperand sxtw_opnd, + RegisterOperand uxtw_opnd > { + def _UXTW_SCALED : sve_mem_sst_sv; + def _SXTW_SCALED : sve_mem_sst_sv; + + def : InstAlias(NAME # _UXTW_SCALED) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; + def : InstAlias(NAME # _SXTW_SCALED) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; +} + +multiclass sve_mem_sst_sv_32_unscaled opc, string asm, + RegisterOperand listty, + ZPRRegOp zprty, + RegisterOperand sxtw_opnd, + RegisterOperand uxtw_opnd> { + def _UXTW : sve_mem_sst_sv; + def _SXTW : sve_mem_sst_sv; + + def : InstAlias(NAME # _UXTW) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; + def : InstAlias(NAME # _SXTW) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; +} + +class sve_mem_sst_sv2 msz, bit scaled, string asm, + RegisterOperand zprext> +: I<(outs), (ins Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), + asm, "\t$Zt, $Pg, [$Rn, $Zm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zm; + bits<5> Zt; + let Inst{31-25} = 0b1110010; + let Inst{24-23} = msz; + let Inst{22} = 0b0; + let Inst{21} = scaled; + let Inst{20-16} = Zm; + let Inst{15-13} = 0b101; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_sst_sv_64_scaled msz, string asm, + RegisterOperand zprext> { + def "" : sve_mem_sst_sv2; + + def : InstAlias(NAME) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>; + +} + +multiclass sve_mem_sst_sv_64_unscaled msz, string asm> { + def "" : sve_mem_sst_sv2; + + def : InstAlias(NAME) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>; +} + +class sve_mem_sst_vi opc, string asm, ZPRRegOp zprty, + RegisterOperand VecList, Operand imm_ty> +: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, zprty:$Zn, imm_ty:$imm5), + asm, "\t$Zt, $Pg, [$Zn, $imm5]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> imm5; + bits<5> Zn; + bits<5> Zt; + let Inst{31-25} = 0b1110010; + let Inst{24-23} = opc{2-1}; + let Inst{22} = 0b1; + let Inst{21} = opc{0}; + let Inst{20-16} = imm5; + let Inst{15-13} = 0b101; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_sst_vi_ptrs opc, string asm, RegisterOperand listty, + ZPRRegOp zprty, Operand imm_ty> { + def _IMM : sve_mem_sst_vi; + + def : InstAlias(NAME # _IMM) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, 0), 0>; + def : InstAlias(NAME # _IMM) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, imm_ty:$imm5), 0>; + def : InstAlias(NAME # _IMM) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, 0), 1>; +} + +class sve_mem_z_spill +: I<(outs), (ins ZPRAny:$Zt, GPR64sp:$Rn, simm9:$imm9), + asm, "\t$Zt, [$Rn, $imm9, mul vl]", + "", + []>, Sched<[]> { + bits<5> Rn; + bits<5> Zt; + bits<9> imm9; + let Inst{31-22} = 0b1110010110; + let Inst{21-16} = imm9{8-3}; + let Inst{15-13} = 0b010; + let Inst{12-10} = imm9{2-0}; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_z_spill { + def NAME : sve_mem_z_spill; + + def : InstAlias(NAME) ZPRAny:$Zt, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_p_spill +: I<(outs), (ins PPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), + asm, "\t$Pt, [$Rn, $imm9, mul vl]", + "", + []>, Sched<[]> { + bits<4> Pt; + bits<5> Rn; + bits<9> imm9; + let Inst{31-22} = 0b1110010110; + let Inst{21-16} = imm9{8-3}; + let Inst{15-13} = 0b000; + let Inst{12-10} = imm9{2-0}; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = Pt; + + let mayStore = 1; +} + +multiclass sve_mem_p_spill { + def NAME : sve_mem_p_spill; + + def : InstAlias(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>; +} + +//===----------------------------------------------------------------------===// +// SVE Permute - Predicates Group +//===----------------------------------------------------------------------===// + +class sve_int_perm_bin_perm_pp opc, bits<2> sz8_64, string asm, + PPRRegOp pprty> +: I<(outs pprty:$Pd), (ins pprty:$Pn, pprty:$Pm), + asm, "\t$Pd, $Pn, $Pm", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pm; + bits<4> Pn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-20} = 0b10; + let Inst{19-16} = Pm; + let Inst{15-13} = 0b010; + let Inst{12-10} = opc; + let Inst{9} = 0b0; + let Inst{8-5} = Pn; + let Inst{4} = 0b0; + let Inst{3-0} = Pd; +} + +multiclass sve_int_perm_bin_perm_pp opc, string asm> { + def _B : sve_int_perm_bin_perm_pp; + def _H : sve_int_perm_bin_perm_pp; + def _S : sve_int_perm_bin_perm_pp; + def _D : sve_int_perm_bin_perm_pp; +} + +class sve_int_perm_punpk +: I<(outs PPR16:$Pd), (ins PPR8:$Pn), + asm, "\t$Pd, $Pn", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pn; + let Inst{31-17} = 0b000001010011000; + let Inst{16} = opc; + let Inst{15-9} = 0b0100000; + let Inst{8-5} = Pn; + let Inst{4} = 0b0; + let Inst{3-0} = Pd; +} + +class sve_int_rdffr_pred +: I<(outs PPR8:$Pd), (ins PPRAny:$Pg), + asm, "\t$Pd, $Pg/z", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pg; + let Inst{31-23} = 0b001001010; + let Inst{22} = s; + let Inst{21-9} = 0b0110001111000; + let Inst{8-5} = Pg; + let Inst{4} = 0; + let Inst{3-0} = Pd; + + let Defs = !if(!eq (s, 1), [NZCV], []); + let Uses = [FFR]; +} + +class sve_int_rdffr_unpred : I< + (outs PPR8:$Pd), (ins), + asm, "\t$Pd", + "", + []>, Sched<[]> { + bits<4> Pd; + let Inst{31-4} = 0b0010010100011001111100000000; + let Inst{3-0} = Pd; + + let Uses = [FFR]; +} + +class sve_int_wrffr +: I<(outs), (ins PPR8:$Pn), + asm, "\t$Pn", + "", + []>, Sched<[]> { + bits<4> Pn; + let Inst{31-9} = 0b00100101001010001001000; + let Inst{8-5} = Pn; + let Inst{4-0} = 0b00000; + + let hasSideEffects = 1; + let Defs = [FFR]; +} + +class sve_int_setffr +: I<(outs), (ins), + asm, "", + "", + []>, Sched<[]> { + let Inst{31-0} = 0b00100101001011001001000000000000; + + let hasSideEffects = 1; + let Defs = [FFR]; +} + +//===----------------------------------------------------------------------===// +// SVE Permute Vector - Predicated Group +//===----------------------------------------------------------------------===// + +class sve_int_perm_clast_rz sz8_64, bit ab, string asm, + ZPRRegOp zprty, RegisterClass rt> +: I<(outs rt:$Rdn), (ins PPR3bAny:$Pg, rt:$_Rdn, zprty:$Zm), + asm, "\t$Rdn, $Pg, $_Rdn, $Zm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rdn; + bits<5> Zm; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-17} = 0b11000; + let Inst{16} = ab; + let Inst{15-13} = 0b101; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Rdn; + + let Constraints = "$Rdn = $_Rdn"; +} + +multiclass sve_int_perm_clast_rz { + def _B : sve_int_perm_clast_rz<0b00, ab, asm, ZPR8, GPR32>; + def _H : sve_int_perm_clast_rz<0b01, ab, asm, ZPR16, GPR32>; + def _S : sve_int_perm_clast_rz<0b10, ab, asm, ZPR32, GPR32>; + def _D : sve_int_perm_clast_rz<0b11, ab, asm, ZPR64, GPR64>; +} + +class sve_int_perm_clast_vz sz8_64, bit ab, string asm, + ZPRRegOp zprty, RegisterClass rt> +: I<(outs rt:$Vdn), (ins PPR3bAny:$Pg, rt:$_Vdn, zprty:$Zm), + asm, "\t$Vdn, $Pg, $_Vdn, $Zm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Vdn; + bits<5> Zm; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-17} = 0b10101; + let Inst{16} = ab; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Vdn; + + let Constraints = "$Vdn = $_Vdn"; +} + +multiclass sve_int_perm_clast_vz { + def _B : sve_int_perm_clast_vz<0b00, ab, asm, ZPR8, FPR8>; + def _H : sve_int_perm_clast_vz<0b01, ab, asm, ZPR16, FPR16>; + def _S : sve_int_perm_clast_vz<0b10, ab, asm, ZPR32, FPR32>; + def _D : sve_int_perm_clast_vz<0b11, ab, asm, ZPR64, FPR64>; +} + +class sve_int_perm_clast_zz sz8_64, bit ab, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm), + asm, "\t$Zdn, $Pg, $_Zdn, $Zm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zdn; + bits<5> Zm; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-17} = 0b10100; + let Inst{16} = ab; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_perm_clast_zz { + def _B : sve_int_perm_clast_zz<0b00, ab, asm, ZPR8>; + def _H : sve_int_perm_clast_zz<0b01, ab, asm, ZPR16>; + def _S : sve_int_perm_clast_zz<0b10, ab, asm, ZPR32>; + def _D : sve_int_perm_clast_zz<0b11, ab, asm, ZPR64>; +} + +class sve_int_perm_last_r sz8_64, bit ab, string asm, + ZPRRegOp zprty, RegisterClass resultRegType> +: I<(outs resultRegType:$Rd), (ins PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Rd, $Pg, $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rd; + bits<5> Zn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-17} = 0b10000; + let Inst{16} = ab; + let Inst{15-13} = 0b101; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Rd; +} + +multiclass sve_int_perm_last_r { + def _B : sve_int_perm_last_r<0b00, ab, asm, ZPR8, GPR32>; + def _H : sve_int_perm_last_r<0b01, ab, asm, ZPR16, GPR32>; + def _S : sve_int_perm_last_r<0b10, ab, asm, ZPR32, GPR32>; + def _D : sve_int_perm_last_r<0b11, ab, asm, ZPR64, GPR64>; +} + +class sve_int_perm_last_v sz8_64, bit ab, string asm, + ZPRRegOp zprty, RegisterClass dstRegtype> +: I<(outs dstRegtype:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Vd, $Pg, $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Vd; + bits<5> Zn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-17} = 0b10001; + let Inst{16} = ab; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Vd; +} + +multiclass sve_int_perm_last_v { + def _B : sve_int_perm_last_v<0b00, ab, asm, ZPR8, FPR8>; + def _H : sve_int_perm_last_v<0b01, ab, asm, ZPR16, FPR16>; + def _S : sve_int_perm_last_v<0b10, ab, asm, ZPR32, FPR32>; + def _D : sve_int_perm_last_v<0b11, ab, asm, ZPR64, FPR64>; +} + +class sve_int_perm_splice sz8_64, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm), + asm, "\t$Zdn, $Pg, $_Zdn, $Zm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zdn; + bits<5> Zm; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-13} = 0b101100100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_perm_splice { + def _B : sve_int_perm_splice<0b00, asm, ZPR8>; + def _H : sve_int_perm_splice<0b01, asm, ZPR16>; + def _S : sve_int_perm_splice<0b10, asm, ZPR32>; + def _D : sve_int_perm_splice<0b11, asm, ZPR64>; +} + +class sve_int_perm_rev sz8_64, bits<2> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Zd, $Pg/m, $Zn", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<3> Pg; + bits<5> Zn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-18} = 0b1001; + let Inst{17-16} = opc; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_perm_rev_rbit { + def _B : sve_int_perm_rev<0b00, 0b11, asm, ZPR8>; + def _H : sve_int_perm_rev<0b01, 0b11, asm, ZPR16>; + def _S : sve_int_perm_rev<0b10, 0b11, asm, ZPR32>; + def _D : sve_int_perm_rev<0b11, 0b11, asm, ZPR64>; +} + +multiclass sve_int_perm_rev_revb { + def _H : sve_int_perm_rev<0b01, 0b00, asm, ZPR16>; + def _S : sve_int_perm_rev<0b10, 0b00, asm, ZPR32>; + def _D : sve_int_perm_rev<0b11, 0b00, asm, ZPR64>; +} + +multiclass sve_int_perm_rev_revh { + def _S : sve_int_perm_rev<0b10, 0b01, asm, ZPR32>; + def _D : sve_int_perm_rev<0b11, 0b01, asm, ZPR64>; +} + +multiclass sve_int_perm_rev_revw { + def _D : sve_int_perm_rev<0b11, 0b10, asm, ZPR64>; +} + +class sve_int_perm_cpy_r sz8_64, string asm, ZPRRegOp zprty, + RegisterClass srcRegType> +: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, srcRegType:$Rn), + asm, "\t$Zd, $Pg/m, $Rn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zd; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-13} = 0b101000101; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_perm_cpy_r { + def _B : sve_int_perm_cpy_r<0b00, asm, ZPR8, GPR32sp>; + def _H : sve_int_perm_cpy_r<0b01, asm, ZPR16, GPR32sp>; + def _S : sve_int_perm_cpy_r<0b10, asm, ZPR32, GPR32sp>; + def _D : sve_int_perm_cpy_r<0b11, asm, ZPR64, GPR64sp>; + + def : InstAlias<"mov $Zd, $Pg/m, $Rn", + (!cast(NAME # _B) ZPR8:$Zd, PPR3bAny:$Pg, GPR32sp:$Rn), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Rn", + (!cast(NAME # _H) ZPR16:$Zd, PPR3bAny:$Pg, GPR32sp:$Rn), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Rn", + (!cast(NAME # _S) ZPR32:$Zd, PPR3bAny:$Pg, GPR32sp:$Rn), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Rn", + (!cast(NAME # _D) ZPR64:$Zd, PPR3bAny:$Pg, GPR64sp:$Rn), 1>; +} + +class sve_int_perm_cpy_v sz8_64, string asm, ZPRRegOp zprty, + RegisterClass srcRegtype> +: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, srcRegtype:$Vn), + asm, "\t$Zd, $Pg/m, $Vn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Vn; + bits<5> Zd; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-13} = 0b100000100; + let Inst{12-10} = Pg; + let Inst{9-5} = Vn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_perm_cpy_v { + def _B : sve_int_perm_cpy_v<0b00, asm, ZPR8, FPR8>; + def _H : sve_int_perm_cpy_v<0b01, asm, ZPR16, FPR16>; + def _S : sve_int_perm_cpy_v<0b10, asm, ZPR32, FPR32>; + def _D : sve_int_perm_cpy_v<0b11, asm, ZPR64, FPR64>; + + def : InstAlias<"mov $Zd, $Pg/m, $Vn", + (!cast(NAME # _B) ZPR8:$Zd, PPR3bAny:$Pg, FPR8:$Vn), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Vn", + (!cast(NAME # _H) ZPR16:$Zd, PPR3bAny:$Pg, FPR16:$Vn), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Vn", + (!cast(NAME # _S) ZPR32:$Zd, PPR3bAny:$Pg, FPR32:$Vn), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Vn", + (!cast(NAME # _D) ZPR64:$Zd, PPR3bAny:$Pg, FPR64:$Vn), 1>; +} + +class sve_int_perm_compact +: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Zd, $Pg, $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zd; + bits<5> Zn; + let Inst{31-23} = 0b000001011; + let Inst{22} = sz; + let Inst{21-13} = 0b100001100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_perm_compact { + def _S : sve_int_perm_compact<0b0, asm, ZPR32>; + def _D : sve_int_perm_compact<0b1, asm, ZPR64>; +} + + +//===----------------------------------------------------------------------===// +// SVE Memory - Contiguous Load Group +//===----------------------------------------------------------------------===// + +class sve_mem_cld_si_base dtype, bit nf, string asm, + RegisterOperand VecList> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), + asm, "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zt; + bits<4> imm4; + let Inst{31-25} = 0b1010010; + let Inst{24-21} = dtype; + let Inst{20} = nf; + let Inst{19-16} = imm4; + let Inst{15-13} = 0b101; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; + let Uses = !if(!eq(nf, 1), [FFR], []); + let Defs = !if(!eq(nf, 1), [FFR], []); +} + +multiclass sve_mem_cld_si_base dtype, bit nf, string asm, + RegisterOperand listty, ZPRRegOp zprty> { + def _REAL : sve_mem_cld_si_base; + + def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; + def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>; + def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; +} + +multiclass sve_mem_cld_si dtype, string asm, RegisterOperand listty, + ZPRRegOp zprty> +: sve_mem_cld_si_base; + +class sve_mem_cldnt_si_base msz, string asm, RegisterOperand VecList> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), + asm, "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]", + "", + []>, Sched<[]> { + bits<5> Zt; + bits<3> Pg; + bits<5> Rn; + bits<4> imm4; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = msz; + let Inst{22-20} = 0b000; + let Inst{19-16} = imm4; + let Inst{15-13} = 0b111; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_cldnt_si msz, string asm, RegisterOperand listty, + ZPRRegOp zprty> { + def NAME : sve_mem_cldnt_si_base; + + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>; + def : InstAlias(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_cldnt_ss_base msz, string asm, RegisterOperand VecList, + RegisterOperand gprty> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), + asm, "\t$Zt, $Pg/z, [$Rn, $Rm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rm; + bits<5> Rn; + bits<5> Zt; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = msz; + let Inst{22-21} = 0b00; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b110; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_cldnt_ss msz, string asm, RegisterOperand listty, + ZPRRegOp zprty, RegisterOperand gprty> { + def NAME : sve_mem_cldnt_ss_base; + + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; +} + +class sve_mem_ldqr_si sz, string asm, RegisterOperand VecList> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s16:$imm4), + asm, "\t$Zt, $Pg/z, [$Rn, $imm4]", "", []>, Sched<[]> { + bits<5> Zt; + bits<5> Rn; + bits<3> Pg; + bits<4> imm4; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = sz; + let Inst{22-20} = 0; + let Inst{19-16} = imm4; + let Inst{15-13} = 0b001; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_ldqr_si sz, string asm, RegisterOperand listty, + ZPRRegOp zprty> { + def NAME : sve_mem_ldqr_si; + def : InstAlias(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s16:$imm4), 0>; +} + +class sve_mem_ldqr_ss sz, string asm, RegisterOperand VecList, + RegisterOperand gprty> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), + asm, "\t$Zt, $Pg/z, [$Rn, $Rm]", "", []>, Sched<[]> { + bits<5> Zt; + bits<3> Pg; + bits<5> Rn; + bits<5> Rm; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = sz; + let Inst{22-21} = 0; + let Inst{20-16} = Rm; + let Inst{15-13} = 0; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_ldqr_ss sz, string asm, RegisterOperand listty, + ZPRRegOp zprty, RegisterOperand gprty> { + def NAME : sve_mem_ldqr_ss; + + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; +} + +class sve_mem_ld_dup dtypeh, bits<2> dtypel, string asm, + RegisterOperand VecList, Operand immtype> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, immtype:$imm6), + asm, "\t$Zt, $Pg/z, [$Rn, $imm6]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zt; + bits<6> imm6; + let Inst{31-25} = 0b1000010; + let Inst{24-23} = dtypeh; + let Inst{22} = 1; + let Inst{21-16} = imm6; + let Inst{15} = 0b1; + let Inst{14-13} = dtypel; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_ld_dup dtypeh, bits<2> dtypel, string asm, + RegisterOperand zlistty, ZPRRegOp zprty, Operand immtype> { + def NAME : sve_mem_ld_dup; + + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, immtype:$imm6), 0>; + def : InstAlias(NAME) zlistty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_cld_ss_base dtype, bit ff, dag iops, string asm, + RegisterOperand VecList> +: I<(outs VecList:$Zt), iops, + asm, "\t$Zt, $Pg/z, [$Rn, $Rm]", + "", + []>, Sched<[]> { + bits<5> Zt; + bits<3> Pg; + bits<5> Rm; + bits<5> Rn; + let Inst{31-25} = 0b1010010; + let Inst{24-21} = dtype; + let Inst{20-16} = Rm; + let Inst{15-14} = 0b01; + let Inst{13} = ff; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; + let Uses = !if(!eq(ff, 1), [FFR], []); + let Defs = !if(!eq(ff, 1), [FFR], []); +} + +multiclass sve_mem_cld_ss dtype, string asm, RegisterOperand listty, + ZPRRegOp zprty, RegisterOperand gprty> { + def "" : sve_mem_cld_ss_base; + + def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; +} + +multiclass sve_mem_cldff_ss dtype, string asm, RegisterOperand listty, + ZPRRegOp zprty, RegisterOperand gprty> { + def _REAL : sve_mem_cld_ss_base; + + def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; + + def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>; + + def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>; +} + +multiclass sve_mem_cldnf_si dtype, string asm, RegisterOperand listty, + ZPRRegOp zprty> +: sve_mem_cld_si_base; + +class sve_mem_eld_si sz, bits<2> nregs, RegisterOperand VecList, + string asm, Operand immtype> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, immtype:$imm4), + asm, "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]", + "", + []>, Sched<[]> { + bits<5> Zt; + bits<3> Pg; + bits<5> Rn; + bits<4> imm4; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = sz; + let Inst{22-21} = nregs; + let Inst{20} = 0; + let Inst{19-16} = imm4; + let Inst{15-13} = 0b111; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_eld_si sz, bits<2> nregs, RegisterOperand VecList, + string asm, Operand immtype> { + def NAME : sve_mem_eld_si; + + def : InstAlias(NAME) VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_eld_ss sz, bits<2> nregs, RegisterOperand VecList, + string asm, RegisterOperand gprty> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), + asm, "\t$Zt, $Pg/z, [$Rn, $Rm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rm; + bits<5> Rn; + bits<5> Zt; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = sz; + let Inst{22-21} = nregs; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b110; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +//===----------------------------------------------------------------------===// +// SVE Memory - 32-bit Gather and Unsized Contiguous Group +//===----------------------------------------------------------------------===// + +// bit xs is '1' if offsets are signed +// bit scaled is '1' if the offsets are scaled +class sve_mem_32b_gld_sv opc, bit xs, bit scaled, string asm, + RegisterOperand zprext> +: I<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), + asm, "\t$Zt, $Pg/z, [$Rn, $Zm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zm; + bits<5> Zt; + let Inst{31-25} = 0b1000010; + let Inst{24-23} = opc{3-2}; + let Inst{22} = xs; + let Inst{21} = scaled; + let Inst{20-16} = Zm; + let Inst{15} = 0b0; + let Inst{14-13} = opc{1-0}; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; + let Defs = !if(!eq(opc{0}, 1), [FFR], []); + let Uses = !if(!eq(opc{0}, 1), [FFR], []); +} + +multiclass sve_mem_32b_gld_sv_32_scaled opc, string asm, + RegisterOperand sxtw_opnd, + RegisterOperand uxtw_opnd> { + def _UXTW_SCALED_REAL : sve_mem_32b_gld_sv; + def _SXTW_SCALED_REAL : sve_mem_32b_gld_sv; + + def : InstAlias(NAME # _UXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; + def : InstAlias(NAME # _SXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; +} + +multiclass sve_mem_32b_gld_vs_32_unscaled opc, string asm, + RegisterOperand sxtw_opnd, + RegisterOperand uxtw_opnd> { + def _UXTW_REAL : sve_mem_32b_gld_sv; + def _SXTW_REAL : sve_mem_32b_gld_sv; + + def : InstAlias(NAME # _UXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; + def : InstAlias(NAME # _SXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; +} + + +class sve_mem_32b_gld_vi opc, string asm, Operand imm_ty> +: I<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), + asm, "\t$Zt, $Pg/z, [$Zn, $imm5]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zn; + bits<5> Zt; + bits<5> imm5; + let Inst{31-25} = 0b1000010; + let Inst{24-23} = opc{3-2}; + let Inst{22-21} = 0b01; + let Inst{20-16} = imm5; + let Inst{15} = 0b1; + let Inst{14-13} = opc{1-0}; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zt; + + let mayLoad = 1; + let Defs = !if(!eq(opc{0}, 1), [FFR], []); + let Uses = !if(!eq(opc{0}, 1), [FFR], []); +} + +multiclass sve_mem_32b_gld_vi_32_ptrs opc, string asm, Operand imm_ty> { + def _IMM_REAL : sve_mem_32b_gld_vi; + + def : InstAlias(NAME # _IMM_REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 0>; + def : InstAlias(NAME # _IMM_REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), 0>; + def : InstAlias(NAME # _IMM_REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>; +} + +class sve_mem_prfm_si msz, string asm> +: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, simm6s1:$imm6), + asm, "\t$prfop, $Pg, [$Rn, $imm6, mul vl]", + "", + []>, Sched<[]> { + bits<5> Rn; + bits<3> Pg; + bits<6> imm6; + bits<4> prfop; + let Inst{31-22} = 0b1000010111; + let Inst{21-16} = imm6; + let Inst{15} = 0b0; + let Inst{14-13} = msz; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = prfop; + + let hasSideEffects = 1; +} + +multiclass sve_mem_prfm_si msz, string asm> { + def NAME : sve_mem_prfm_si; + + def : InstAlias(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_prfm_ss opc, string asm, RegisterOperand gprty> +: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), + asm, "\t$prfop, $Pg, [$Rn, $Rm]", + "", + []>, Sched<[]> { + bits<5> Rm; + bits<5> Rn; + bits<3> Pg; + bits<4> prfop; + let Inst{31-25} = 0b1000010; + let Inst{24-23} = opc{2-1}; + let Inst{22-21} = 0b00; + let Inst{20-16} = Rm; + let Inst{15} = 0b1; + let Inst{14} = opc{0}; + let Inst{13} = 0b0; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = prfop; + + let hasSideEffects = 1; +} + +class sve_mem_32b_prfm_sv msz, bit xs, string asm, + RegisterOperand zprext> +: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), + asm, "\t$prfop, $Pg, [$Rn, $Zm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zm; + bits<4> prfop; + let Inst{31-23} = 0b100001000; + let Inst{22} = xs; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15} = 0b0; + let Inst{14-13} = msz; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = prfop; + + let hasSideEffects = 1; +} + +multiclass sve_mem_32b_prfm_sv_scaled msz, string asm, + RegisterOperand sxtw_opnd, + RegisterOperand uxtw_opnd> { + def _UXTW_SCALED : sve_mem_32b_prfm_sv; + def _SXTW_SCALED : sve_mem_32b_prfm_sv; +} + +class sve_mem_32b_prfm_vi msz, string asm, Operand imm_ty> +: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), + asm, "\t$prfop, $Pg, [$Zn, $imm5]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zn; + bits<5> imm5; + bits<4> prfop; + let Inst{31-25} = 0b1000010; + let Inst{24-23} = msz; + let Inst{22-21} = 0b00; + let Inst{20-16} = imm5; + let Inst{15-13} = 0b111; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4} = 0b0; + let Inst{3-0} = prfop; +} + +multiclass sve_mem_32b_prfm_vi msz, string asm, Operand imm_ty> { + def NAME : sve_mem_32b_prfm_vi; + + def : InstAlias(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>; +} + +class sve_mem_z_fill +: I<(outs ZPRAny:$Zt), (ins GPR64sp:$Rn, simm9:$imm9), + asm, "\t$Zt, [$Rn, $imm9, mul vl]", + "", + []>, Sched<[]> { + bits<5> Rn; + bits<5> Zt; + bits<9> imm9; + let Inst{31-22} = 0b1000010110; + let Inst{21-16} = imm9{8-3}; + let Inst{15-13} = 0b010; + let Inst{12-10} = imm9{2-0}; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_z_fill { + def NAME : sve_mem_z_fill; + + def : InstAlias(NAME) ZPRAny:$Zt, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_p_fill +: I<(outs PPRAny:$Pt), (ins GPR64sp:$Rn, simm9:$imm9), + asm, "\t$Pt, [$Rn, $imm9, mul vl]", + "", + []>, Sched<[]> { + bits<4> Pt; + bits<5> Rn; + bits<9> imm9; + let Inst{31-22} = 0b1000010110; + let Inst{21-16} = imm9{8-3}; + let Inst{15-13} = 0b000; + let Inst{12-10} = imm9{2-0}; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = Pt; + + let mayLoad = 1; +} + +multiclass sve_mem_p_fill { + def NAME : sve_mem_p_fill; + + def : InstAlias(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>; +} + +//===----------------------------------------------------------------------===// +// SVE Memory - 64-bit Gather Group +//===----------------------------------------------------------------------===// + +// bit xs is '1' if offsets are signed +// bit scaled is '1' if the offsets are scaled +// bit lsl is '0' if the offsets are extended (uxtw/sxtw), '1' if shifted (lsl) +class sve_mem_64b_gld_sv opc, bit xs, bit scaled, bit lsl, string asm, + RegisterOperand zprext> +: I<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), + asm, "\t$Zt, $Pg/z, [$Rn, $Zm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zm; + bits<5> Zt; + let Inst{31-25} = 0b1100010; + let Inst{24-23} = opc{3-2}; + let Inst{22} = xs; + let Inst{21} = scaled; + let Inst{20-16} = Zm; + let Inst{15} = lsl; + let Inst{14-13} = opc{1-0}; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; + let Defs = !if(!eq(opc{0}, 1), [FFR], []); + let Uses = !if(!eq(opc{0}, 1), [FFR], []); +} + +multiclass sve_mem_64b_gld_sv_32_scaled opc, string asm, + RegisterOperand sxtw_opnd, + RegisterOperand uxtw_opnd> { + def _UXTW_SCALED_REAL : sve_mem_64b_gld_sv; + def _SXTW_SCALED_REAL : sve_mem_64b_gld_sv; + + def : InstAlias(NAME # _UXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; + def : InstAlias(NAME # _SXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; +} + +multiclass sve_mem_64b_gld_vs_32_unscaled opc, string asm, + RegisterOperand sxtw_opnd, + RegisterOperand uxtw_opnd> { + def _UXTW_REAL : sve_mem_64b_gld_sv; + def _SXTW_REAL : sve_mem_64b_gld_sv; + + def : InstAlias(NAME # _UXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; + def : InstAlias(NAME # _SXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; +} + +multiclass sve_mem_64b_gld_sv2_64_scaled opc, string asm, + RegisterOperand zprext> { + def _SCALED_REAL : sve_mem_64b_gld_sv; + + def : InstAlias(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>; +} + +multiclass sve_mem_64b_gld_vs2_64_unscaled opc, string asm> { + def _REAL : sve_mem_64b_gld_sv; + + def : InstAlias(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>; +} + +class sve_mem_64b_gld_vi opc, string asm, Operand imm_ty> +: I<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), + asm, "\t$Zt, $Pg/z, [$Zn, $imm5]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zn; + bits<5> Zt; + bits<5> imm5; + let Inst{31-25} = 0b1100010; + let Inst{24-23} = opc{3-2}; + let Inst{22-21} = 0b01; + let Inst{20-16} = imm5; + let Inst{15} = 0b1; + let Inst{14-13} = opc{1-0}; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zt; + + let mayLoad = 1; + let Defs = !if(!eq(opc{0}, 1), [FFR], []); + let Uses = !if(!eq(opc{0}, 1), [FFR], []); +} + +multiclass sve_mem_64b_gld_vi_64_ptrs opc, string asm, Operand imm_ty> { + def _IMM_REAL : sve_mem_64b_gld_vi; + + def : InstAlias(NAME # _IMM_REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 0>; + def : InstAlias(NAME # _IMM_REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), 0>; + def : InstAlias(NAME # _IMM_REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>; +} + +// bit lsl is '0' if the offsets are extended (uxtw/sxtw), '1' if shifted (lsl) +class sve_mem_64b_prfm_sv msz, bit xs, bit lsl, string asm, + RegisterOperand zprext> +: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), + asm, "\t$prfop, $Pg, [$Rn, $Zm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zm; + bits<4> prfop; + let Inst{31-23} = 0b110001000; + let Inst{22} = xs; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15} = lsl; + let Inst{14-13} = msz; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = prfop; + + let hasSideEffects = 1; +} + +multiclass sve_mem_64b_prfm_sv_ext_scaled msz, string asm, + RegisterOperand sxtw_opnd, + RegisterOperand uxtw_opnd> { + def _UXTW_SCALED : sve_mem_64b_prfm_sv; + def _SXTW_SCALED : sve_mem_64b_prfm_sv; +} + +multiclass sve_mem_64b_prfm_sv_lsl_scaled msz, string asm, + RegisterOperand zprext> { + def NAME : sve_mem_64b_prfm_sv; +} + + +class sve_mem_64b_prfm_vi msz, string asm, Operand imm_ty> +: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), + asm, "\t$prfop, $Pg, [$Zn, $imm5]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zn; + bits<5> imm5; + bits<4> prfop; + let Inst{31-25} = 0b1100010; + let Inst{24-23} = msz; + let Inst{22-21} = 0b00; + let Inst{20-16} = imm5; + let Inst{15-13} = 0b111; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4} = 0b0; + let Inst{3-0} = prfop; + + let hasSideEffects = 1; +} + +multiclass sve_mem_64b_prfm_vi msz, string asm, Operand imm_ty> { + def NAME : sve_mem_64b_prfm_vi; + + def : InstAlias(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>; +} + + +//===----------------------------------------------------------------------===// +// SVE Compute Vector Address Group +//===----------------------------------------------------------------------===// + +class sve_int_bin_cons_misc_0_a opc, bits<2> msz, string asm, + ZPRRegOp zprty, RegisterOperand zprext> +: I<(outs zprty:$Zd), (ins zprty:$Zn, zprext:$Zm), + asm, "\t$Zd, [$Zn, $Zm]", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<5> Zm; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = opc; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-12} = 0b1010; + let Inst{11-10} = msz; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_bin_cons_misc_0_a_uxtw opc, string asm> { + def _0 : sve_int_bin_cons_misc_0_a; + def _1 : sve_int_bin_cons_misc_0_a; + def _2 : sve_int_bin_cons_misc_0_a; + def _3 : sve_int_bin_cons_misc_0_a; +} + +multiclass sve_int_bin_cons_misc_0_a_sxtw opc, string asm> { + def _0 : sve_int_bin_cons_misc_0_a; + def _1 : sve_int_bin_cons_misc_0_a; + def _2 : sve_int_bin_cons_misc_0_a; + def _3 : sve_int_bin_cons_misc_0_a; +} + +multiclass sve_int_bin_cons_misc_0_a_32_lsl opc, string asm> { + def _0 : sve_int_bin_cons_misc_0_a; + def _1 : sve_int_bin_cons_misc_0_a; + def _2 : sve_int_bin_cons_misc_0_a; + def _3 : sve_int_bin_cons_misc_0_a; +} + +multiclass sve_int_bin_cons_misc_0_a_64_lsl opc, string asm> { + def _0 : sve_int_bin_cons_misc_0_a; + def _1 : sve_int_bin_cons_misc_0_a; + def _2 : sve_int_bin_cons_misc_0_a; + def _3 : sve_int_bin_cons_misc_0_a; +} + + +//===----------------------------------------------------------------------===// +// SVE Integer Misc - Unpredicated Group +//===----------------------------------------------------------------------===// + +class sve_int_bin_cons_misc_0_b sz, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), + asm, "\t$Zd, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-10} = 0b101100; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_bin_cons_misc_0_b { + def _H : sve_int_bin_cons_misc_0_b<0b01, asm, ZPR16>; + def _S : sve_int_bin_cons_misc_0_b<0b10, asm, ZPR32>; + def _D : sve_int_bin_cons_misc_0_b<0b11, asm, ZPR64>; +} + +class sve_int_bin_cons_misc_0_c opc, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn), + asm, "\t$Zd, $Zn", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = opc{7-6}; + let Inst{21} = 0b1; + let Inst{20-16} = opc{5-1}; + let Inst{15-11} = 0b10111; + let Inst{10} = opc{0}; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Reduction Group +//===----------------------------------------------------------------------===// + +class sve_int_reduce sz8_32, bits<2> fmt, bits<3> opc, string asm, + ZPRRegOp zprty, RegisterClass regtype> +: I<(outs regtype:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Vd, $Pg, $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Vd; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_32; + let Inst{21} = 0b0; + let Inst{20-19} = fmt; + let Inst{18-16} = opc; + let Inst{15-13} = 0b001; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Vd; +} + +multiclass sve_int_reduce_0_saddv opc, string asm> { + def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>; + def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>; + def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>; +} + +multiclass sve_int_reduce_0_uaddv opc, string asm> { + def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>; + def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>; + def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>; + def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64>; +} + +multiclass sve_int_reduce_1 opc, string asm> { + def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8>; + def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16>; + def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32>; + def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64>; +} + +multiclass sve_int_reduce_2 opc, string asm> { + def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8>; + def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16>; + def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32>; + def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64>; +} + +class sve_int_movprfx_pred sz8_32, bits<3> opc, string asm, + ZPRRegOp zprty, string pg_suffix, dag iops> +: I<(outs zprty:$Zd), iops, + asm, "\t$Zd, $Pg"#pg_suffix#", $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_32; + let Inst{21-19} = 0b010; + let Inst{18-16} = opc; + let Inst{15-13} = 0b001; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_movprfx_pred_merge opc, string asm> { +let Constraints = "$Zd = $_Zd" in { + def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/m", + (ins ZPR8:$_Zd, PPR3bAny:$Pg, ZPR8:$Zn)>; + def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/m", + (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR16:$Zn)>; + def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/m", + (ins ZPR32:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn)>; + def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/m", + (ins ZPR64:$_Zd, PPR3bAny:$Pg, ZPR64:$Zn)>; +} +} + +multiclass sve_int_movprfx_pred_zero opc, string asm> { + def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/z", + (ins PPR3bAny:$Pg, ZPR8:$Zn)>; + def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/z", + (ins PPR3bAny:$Pg, ZPR16:$Zn)>; + def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/z", + (ins PPR3bAny:$Pg, ZPR32:$Zn)>; + def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/z", + (ins PPR3bAny:$Pg, ZPR64:$Zn)>; +} + +//===----------------------------------------------------------------------===// +// SVE Propagate Break Group +//===----------------------------------------------------------------------===// + +class sve_int_brkp opc, string asm> +: I<(outs PPR8:$Pd), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$Pm), + asm, "\t$Pd, $Pg/z, $Pn, $Pm", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pg; + bits<4> Pm; + bits<4> Pn; + let Inst{31-24} = 0b00100101; + let Inst{23} = 0b0; + let Inst{22} = opc{1}; + let Inst{21-20} = 0b00; + let Inst{19-16} = Pm; + let Inst{15-14} = 0b11; + let Inst{13-10} = Pg; + let Inst{9} = 0b0; + let Inst{8-5} = Pn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Defs = !if(!eq (opc{1}, 1), [NZCV], []); +} + + +//===----------------------------------------------------------------------===// +// SVE Partition Break Group +//===----------------------------------------------------------------------===// + +class sve_int_brkn +: I<(outs PPR8:$Pdm), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$_Pdm), + asm, "\t$Pdm, $Pg/z, $Pn, $_Pdm", + "", + []>, Sched<[]> { + bits<4> Pdm; + bits<4> Pg; + bits<4> Pn; + let Inst{31-23} = 0b001001010; + let Inst{22} = S; + let Inst{21-14} = 0b01100001; + let Inst{13-10} = Pg; + let Inst{9} = 0b0; + let Inst{8-5} = Pn; + let Inst{4} = 0b0; + let Inst{3-0} = Pdm; + + let Constraints = "$Pdm = $_Pdm"; + let Defs = !if(!eq (S, 0b1), [NZCV], []); +} + +class sve_int_break opc, string asm, string suffix, dag iops> +: I<(outs PPR8:$Pd), iops, + asm, "\t$Pd, $Pg"#suffix#", $Pn", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pg; + bits<4> Pn; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = opc{2-1}; + let Inst{21-14} = 0b01000001; + let Inst{13-10} = Pg; + let Inst{9} = 0b0; + let Inst{8-5} = Pn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Constraints = !if(!eq (opc{0}, 1), "$Pd = $_Pd", ""); + let Defs = !if(!eq (opc{1}, 1), [NZCV], []); + +} + +multiclass sve_int_break_m opc, string asm> { + def NAME : sve_int_break; +} + +multiclass sve_int_break_z opc, string asm> { + def NAME : sve_int_break; +} + diff --git a/capstone/suite/synctools/tablegen/ARM/ARM-digit.td b/capstone/suite/synctools/tablegen/ARM/ARM-digit.td new file mode 100644 index 000000000..d915f49be --- /dev/null +++ b/capstone/suite/synctools/tablegen/ARM/ARM-digit.td @@ -0,0 +1,1098 @@ +//===-- ARM.td - Describe the ARM Target Machine -----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// ARM Subtarget state. +// + +def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", + "true", "Thumb mode">; + +def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat", + "true", "Use software floating " + "point features.">; + + +//===----------------------------------------------------------------------===// +// ARM Subtarget features. +// + +// Floating Point, HW Division and Neon Support +def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", + "Enable VFP2 instructions">; + +def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", + "Enable VFP3 instructions", + [FeatureVFP2]>; + +def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", + "Enable NEON instructions", + [FeatureVFP3]>; + +def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", + "Enable half-precision " + "floating point">; + +def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", + "Enable VFP4 instructions", + [FeatureVFP3, FeatureFP16]>; + +def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", + "true", "Enable ARMv8 FP", + [FeatureVFP4]>; + +def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", + "Enable full half-precision " + "floating point", + [FeatureFPARMv8]>; + +def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", + "Floating point unit supports " + "single precision only">; + +def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", + "Restrict FP to 16 double registers">; + +def FeatureHWDivThumb : SubtargetFeature<"hwdiv", + "HasHardwareDivideInThumb", "true", + "Enable divide instructions in Thumb">; + +def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", + "HasHardwareDivideInARM", "true", + "Enable divide instructions in ARM mode">; + +// Atomic Support +def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", + "Has data barrier (dmb/dsb) instructions">; + +def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", + "Has v7 clrex instruction">; + +def FeatureDFB : SubtargetFeature<"dfb", "HasFullDataBarrier", "true", + "Has full data barrier (dfb) instruction">; + +def FeatureAcquireRelease : SubtargetFeature<"acquire-release", + "HasAcquireRelease", "true", + "Has v8 acquire/release (lda/ldaex " + " etc) instructions">; + + +def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", + "FP compare + branch is slow">; + +def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", + "Enable support for Performance " + "Monitor extensions">; + + +// TrustZone Security Extensions +def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", + "Enable support for TrustZone " + "security extensions">; + +def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true", + "Enable support for ARMv8-M " + "Security Extensions">; + +def FeatureSHA2 : SubtargetFeature<"sha2", "HasSHA2", "true", + "Enable SHA1 and SHA256 support", [FeatureNEON]>; + +def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", + "Enable AES support", [FeatureNEON]>; + +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable support for " + "Cryptography extensions", + [FeatureNEON, FeatureSHA2, FeatureAES]>; + +def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", + "Enable support for CRC instructions">; + +def FeatureDotProd : SubtargetFeature<"dotprod", "HasDotProd", "true", + "Enable support for dot product instructions", + [FeatureNEON]>; + +// Not to be confused with FeatureHasRetAddrStack (return address stack) +def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", + "Enable Reliability, Availability " + "and Serviceability extensions">; + +// Fast computation of non-negative address offsets +def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", + "Enable fast computation of " + "positive address offsets">; + +// Fast execution of AES crypto operations +def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", + "CPU fuses AES crypto operations">; + +// Fast execution of bottom and top halves of literal generation +def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true", + "CPU fuses literal generation operations">; + +// The way of reading thread pointer +def FeatureReadTp : SubtargetFeature<"read-tp-hard", "ReadTPHard", "true", + "Reading thread pointer from register">; + +// Cyclone can zero VFP registers in 0 cycles. +def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", + "Has zero-cycle zeroing instructions">; + +// Whether it is profitable to unpredicate certain instructions during if-conversion +def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr", + "IsProfitableToUnpredicate", "true", + "Is profitable to unpredicate">; + +// Some targets (e.g. Swift) have microcoded VGETLNi32. +def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", + "HasSlowVGETLNi32", "true", + "Has slow VGETLNi32 - prefer VMOV">; + +// Some targets (e.g. Swift) have microcoded VDUP32. +def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", + "true", + "Has slow VDUP32 - prefer VMOV">; + +// Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON +// for scalar FP, as this allows more effective execution domain optimization. +def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", + "true", "Prefer VMOVSR">; + +// Swift has ISHST barriers compatible with Atomic Release semantics but weaker +// than ISH +def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST", + "true", "Prefer ISHST barriers">; + +// Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU. +def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", + "true", + "Has muxed AGU and NEON/FPU">; + +// Whether VLDM/VSTM starting with odd register number need more microops +// than single VLDRS +def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister", + "true", "VLDM/VSTM starting " + "with an odd register is slow">; + +// Some targets have a renaming dependency when loading into D subregisters. +def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg", + "SlowLoadDSubregister", "true", + "Loading into D subregs is slow">; + +// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD. +def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs", + "DontWidenVMOVS", "true", + "Don't widen VMOVS to VMOVD">; + +// Some targets (e.g. Cortex-A15) prefer to avoid mixing operations on different +// VFP register widths. +def FeatureSplatVFPToNeon : SubtargetFeature<"splat-vfp-neon", + "SplatVFPToNeon", "true", + "Splat register from VFP to NEON", + [FeatureDontWidenVMOVS]>; + +// Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions. +def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", + "ExpandMLx", "true", + "Expand VFP/NEON MLA/MLS instructions">; + +// Some targets have special RAW hazards for VFP/NEON VMLA/VMLS. +def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards", + "true", "Has VMLx hazards">; + +// Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from +// VFP to NEON, as an execution domain optimization. +def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", + "UseNEONForFPMovs", "true", + "Convert VMOVSR, VMOVRS, " + "VMOVS to NEON">; + +// Some processors benefit from using NEON instructions for scalar +// single-precision FP operations. This affects instruction selection and should +// only be enabled if the handling of denormals is not important. +def FeatureNEONForFP : SubtargetFeature<"neonfp", + "UseNEONForSinglePrecisionFP", + "true", + "Use NEON for single precision FP">; + +// On some processors, VLDn instructions that access unaligned data take one +// extra cycle. Take that into account when computing operand latencies. +def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign", + "true", + "Check for VLDn unaligned access">; + +// Some processors have a nonpipelined VFP coprocessor. +def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp", + "NonpipelinedVFP", "true", + "VFP instructions are not pipelined">; + +// Some processors have FP multiply-accumulate instructions that don't +// play nicely with other VFP / NEON instructions, and it's generally better +// to just not use them. +def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", + "Disable VFP / NEON MAC instructions">; + +// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding. +def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", + "HasVMLxForwarding", "true", + "Has multiplier accumulator forwarding">; + +// Disable 32-bit to 16-bit narrowing for experimentation. +def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", + "Prefer 32-bit Thumb instrs">; + +/// Some instructions update CPSR partially, which can add false dependency for +/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is +/// mapped to a separate physical register. Avoid partial CPSR update for these +/// processors. +def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", + "AvoidCPSRPartialUpdate", "true", + "Avoid CPSR partial update for OOO execution">; + +/// Disable +1 predication cost for instructions updating CPSR. +/// Enabled for Cortex-A57. +def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr", + "CheapPredicableCPSRDef", + "true", + "Disable +1 predication cost for instructions updating CPSR">; + +def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", + "AvoidMOVsShifterOperand", "true", + "Avoid movs instructions with " + "shifter operand">; + +// Some processors perform return stack prediction. CodeGen should avoid issue +// "normal" call instructions to callees which do not return. +def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", + "HasRetAddrStack", "true", + "Has return address stack">; + +// Some processors have no branch predictor, which changes the expected cost of +// taking a branch which affects the choice of whether to use predicated +// instructions. +def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor", + "HasBranchPredictor", "false", + "Has no branch predictor">; + +/// DSP extension. +def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", + "Supports DSP instructions in " + "ARM and/or Thumb2">; + +// Multiprocessing extension. +def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", + "Supports Multiprocessing extension">; + +// Virtualization extension - requires HW divide (ARMv7-AR ARMARM - 4.4.8). +def FeatureVirtualization : SubtargetFeature<"virtualization", + "HasVirtualization", "true", + "Supports Virtualization extension", + [FeatureHWDivThumb, FeatureHWDivARM]>; + +// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. +// See ARMInstrInfo.td for details. +def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", + "NaCl trap">; + +def FeatureStrictAlign : SubtargetFeature<"strict-align", + "StrictAlign", "true", + "Disallow all unaligned memory " + "access">; + +def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", + "Generate calls via indirect call " + "instructions">; + +def FeatureExecuteOnly : SubtargetFeature<"execute-only", + "GenExecuteOnly", "true", + "Enable the generation of " + "execute only code.">; + +def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", + "Reserve R9, making it unavailable" + " as GPR">; + +def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", + "Don't use movt/movw pairs for " + "32-bit imms">; + +def FeatureNoNegativeImmediates + : SubtargetFeature<"no-neg-immediates", + "NegativeImmediates", "false", + "Convert immediates and instructions " + "to their negated or complemented " + "equivalent when the immediate does " + "not fit in the encoding.">; + +// Use the MachineScheduler for instruction scheduling for the subtarget. +def FeatureUseMISched: SubtargetFeature<"use-misched", "UseMISched", "true", + "Use the MachineScheduler">; + +def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler", + "DisablePostRAScheduler", "true", + "Don't schedule again after register allocation">; + +// Enable use of alias analysis during code generation +def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", + "Use alias analysis during codegen">; + +//===----------------------------------------------------------------------===// +// ARM architecture class +// + +// A-series ISA +def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass", + "Is application profile ('A' series)">; + +// R-series ISA +def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", + "Is realtime profile ('R' series)">; + +// M-series ISA +def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", + "Is microcontroller profile ('M' series)">; + + +def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", + "Enable Thumb2 instructions">; + +def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", + "Does not support ARM mode execution">; + +//===----------------------------------------------------------------------===// +// ARM ISAa. +// + +def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", + "Support ARM v4T instructions">; + +def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true", + "Support ARM v5T instructions", + [HasV4TOps]>; + +def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true", + "Support ARM v5TE, v5TEj, and " + "v5TExp instructions", + [HasV5TOps]>; + +def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true", + "Support ARM v6 instructions", + [HasV5TEOps]>; + +def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true", + "Support ARM v6M instructions", + [HasV6Ops]>; + +def HasV8MBaselineOps : SubtargetFeature<"v8m", "HasV8MBaselineOps", "true", + "Support ARM v8M Baseline instructions", + [HasV6MOps]>; + +def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true", + "Support ARM v6k instructions", + [HasV6Ops]>; + +def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", + "Support ARM v6t2 instructions", + [HasV8MBaselineOps, HasV6KOps, FeatureThumb2]>; + +def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", + "Support ARM v7 instructions", + [HasV6T2Ops, FeaturePerfMon, + FeatureV7Clrex]>; + +def HasV8MMainlineOps : + SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true", + "Support ARM v8M Mainline instructions", + [HasV7Ops]>; + +def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", + "Support ARM v8 instructions", + [HasV7Ops, FeatureAcquireRelease]>; + +def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", + "Support ARM v8.1a instructions", + [HasV8Ops]>; + +def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", + "Support ARM v8.2a instructions", + [HasV8_1aOps]>; + +def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", + "Support ARM v8.3a instructions", + [HasV8_2aOps]>; + +def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true", + "Support ARM v8.4a instructions", + [HasV8_3aOps, FeatureDotProd]>; + +//===----------------------------------------------------------------------===// +// ARM Processor subtarget features. +// + +def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", + "Cortex-A5 ARM processors", []>; +def ProcA7 : SubtargetFeature<"a7", "ARMProcFamily", "CortexA7", + "Cortex-A7 ARM processors", []>; +def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", + "Cortex-A8 ARM processors", []>; +def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", + "Cortex-A9 ARM processors", []>; +def ProcA12 : SubtargetFeature<"a12", "ARMProcFamily", "CortexA12", + "Cortex-A12 ARM processors", []>; +def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", + "Cortex-A15 ARM processors", []>; +def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17", + "Cortex-A17 ARM processors", []>; +def ProcA32 : SubtargetFeature<"a32", "ARMProcFamily", "CortexA32", + "Cortex-A32 ARM processors", []>; +def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", + "Cortex-A35 ARM processors", []>; +def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", + "Cortex-A53 ARM processors", []>; +def ProcA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", + "Cortex-A55 ARM processors", []>; +def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", + "Cortex-A57 ARM processors", []>; +def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", + "Cortex-A72 ARM processors", []>; +def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", + "Cortex-A73 ARM processors", []>; +def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", + "Cortex-A75 ARM processors", []>; + +def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", + "Qualcomm Krait processors", []>; +def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", + "Qualcomm Kryo processors", []>; +def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", + "Swift ARM processors", []>; + +def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", + "Samsung Exynos-Mx processors", []>; + +def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4", + "Cortex-R4 ARM processors", []>; +def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", + "Cortex-R5 ARM processors", []>; +def ProcR7 : SubtargetFeature<"r7", "ARMProcFamily", "CortexR7", + "Cortex-R7 ARM processors", []>; +def ProcR52 : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52", + "Cortex-R52 ARM processors", []>; + +def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3", + "Cortex-M3 ARM processors", []>; + + +//===----------------------------------------------------------------------===// +// ARM Helper classes. +// + +class Architecture features> + : SubtargetFeature; + +class ProcNoItin Features> + : Processor; + + +//===----------------------------------------------------------------------===// +// ARM architectures +// + +def ARMv2 : Architecture<"armv2", "ARMv2", []>; + +def ARMv2a : Architecture<"armv2a", "ARMv2a", []>; + +def ARMv3 : Architecture<"armv3", "ARMv3", []>; + +def ARMv3m : Architecture<"armv3m", "ARMv3m", []>; + +def ARMv4 : Architecture<"armv4", "ARMv4", []>; + +def ARMv4t : Architecture<"armv4t", "ARMv4t", [HasV4TOps]>; + +def ARMv5t : Architecture<"armv5t", "ARMv5t", [HasV5TOps]>; + +def ARMv5te : Architecture<"armv5te", "ARMv5te", [HasV5TEOps]>; + +def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>; + +def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops, + FeatureDSP]>; + +def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops, + FeatureDSP]>; + +def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>; + +def ARMv6kz : Architecture<"armv6kz", "ARMv6kz", [HasV6KOps, + FeatureTrustZone]>; + +def ARMv6m : Architecture<"armv6-m", "ARMv6m", [HasV6MOps, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureMClass, + FeatureStrictAlign]>; + +def ARMv6sm : Architecture<"armv6s-m", "ARMv6sm", [HasV6MOps, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureMClass, + FeatureStrictAlign]>; + +def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops, + FeatureNEON, + FeatureDB, + FeatureDSP, + FeatureAClass]>; + +def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops, + FeatureNEON, + FeatureDB, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureAClass]>; + +def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops, + FeatureDB, + FeatureDSP, + FeatureHWDivThumb, + FeatureRClass]>; + +def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops, + FeatureThumb2, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureHWDivThumb, + FeatureMClass]>; + +def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops, + FeatureThumb2, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureHWDivThumb, + FeatureMClass, + FeatureDSP]>; + +def ARMv8a : Architecture<"armv8-a", "ARMv8a", [HasV8Ops, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC]>; + +def ARMv81a : Architecture<"armv8.1-a", "ARMv81a", [HasV8_1aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC]>; + +def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS]>; + +def ARMv83a : Architecture<"armv8.3-a", "ARMv83a", [HasV8_3aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS]>; + +def ARMv84a : Architecture<"armv8.4-a", "ARMv84a", [HasV8_4aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; + +def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops, + FeatureRClass, + FeatureDB, + FeatureDFB, + FeatureDSP, + FeatureCRC, + FeatureMP, + FeatureVirtualization, + FeatureFPARMv8, + FeatureNEON]>; + +def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline", + [HasV8MBaselineOps, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureHWDivThumb, + FeatureV7Clrex, + Feature8MSecExt, + FeatureAcquireRelease, + FeatureMClass, + FeatureStrictAlign]>; + +def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline", + [HasV8MMainlineOps, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureHWDivThumb, + Feature8MSecExt, + FeatureAcquireRelease, + FeatureMClass]>; + +// Aliases +def IWMMXT : Architecture<"iwmmxt", "ARMv5te", [ARMv5te]>; +def IWMMXT2 : Architecture<"iwmmxt2", "ARMv5te", [ARMv5te]>; +def XScale : Architecture<"xscale", "ARMv5te", [ARMv5te]>; +def ARMv6j : Architecture<"armv6j", "ARMv7a", [ARMv6]>; +def ARMv7k : Architecture<"armv7k", "ARMv7a", [ARMv7a]>; +def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>; + + +//===----------------------------------------------------------------------===// +// ARM schedules. +//===----------------------------------------------------------------------===// +// +include "ARMSchedule.td" + +//===----------------------------------------------------------------------===// +// ARM processors +// + +// Dummy CPU, used to target architectures +def : ProcessorModel<"generic", CortexA8Model, []>; + +// FIXME: Several processors below are not using their own scheduler +// model, but one of similar/previous processor. These should be fixed. + +def : ProcNoItin<"arm8", [ARMv4]>; +def : ProcNoItin<"arm810", [ARMv4]>; +def : ProcNoItin<"strongarm", [ARMv4]>; +def : ProcNoItin<"strongarm110", [ARMv4]>; +def : ProcNoItin<"strongarm1100", [ARMv4]>; +def : ProcNoItin<"strongarm1110", [ARMv4]>; + +def : ProcNoItin<"arm7tdmi", [ARMv4t]>; +def : ProcNoItin<"arm7tdmi-s", [ARMv4t]>; +def : ProcNoItin<"arm710t", [ARMv4t]>; +def : ProcNoItin<"arm720t", [ARMv4t]>; +def : ProcNoItin<"arm9", [ARMv4t]>; +def : ProcNoItin<"arm9tdmi", [ARMv4t]>; +def : ProcNoItin<"arm920", [ARMv4t]>; +def : ProcNoItin<"arm920t", [ARMv4t]>; +def : ProcNoItin<"arm922t", [ARMv4t]>; +def : ProcNoItin<"arm940t", [ARMv4t]>; +def : ProcNoItin<"ep9312", [ARMv4t]>; + +def : ProcNoItin<"arm10tdmi", [ARMv5t]>; +def : ProcNoItin<"arm1020t", [ARMv5t]>; + +def : ProcNoItin<"arm9e", [ARMv5te]>; +def : ProcNoItin<"arm926ej-s", [ARMv5te]>; +def : ProcNoItin<"arm946e-s", [ARMv5te]>; +def : ProcNoItin<"arm966e-s", [ARMv5te]>; +def : ProcNoItin<"arm968e-s", [ARMv5te]>; +def : ProcNoItin<"arm10e", [ARMv5te]>; +def : ProcNoItin<"arm1020e", [ARMv5te]>; +def : ProcNoItin<"arm1022e", [ARMv5te]>; +def : ProcNoItin<"xscale", [ARMv5te]>; +def : ProcNoItin<"iwmmxt", [ARMv5te]>; + +def : Processor<"arm1136j-s", ARMV6Itineraries, [ARMv6]>; +def : Processor<"arm1136jf-s", ARMV6Itineraries, [ARMv6, + FeatureVFP2, + FeatureHasSlowFPVMLx]>; + +def : Processor<"cortex-m0", ARMV6Itineraries, [ARMv6m]>; +def : Processor<"cortex-m0plus", ARMV6Itineraries, [ARMv6m]>; +def : Processor<"cortex-m1", ARMV6Itineraries, [ARMv6m]>; +def : Processor<"sc000", ARMV6Itineraries, [ARMv6m]>; + +def : Processor<"arm1176j-s", ARMV6Itineraries, [ARMv6kz]>; +def : Processor<"arm1176jz-s", ARMV6Itineraries, [ARMv6kz]>; +def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ARMv6kz, + FeatureVFP2, + FeatureHasSlowFPVMLx]>; + +def : Processor<"mpcorenovfp", ARMV6Itineraries, [ARMv6k]>; +def : Processor<"mpcore", ARMV6Itineraries, [ARMv6k, + FeatureVFP2, + FeatureHasSlowFPVMLx]>; + +def : Processor<"arm1156t2-s", ARMV6Itineraries, [ARMv6t2]>; +def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2, + FeatureVFP2, + FeatureHasSlowFPVMLx]>; + +def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5, + FeatureHasRetAddrStack, + FeatureTrustZone, + FeatureSlowFPBrcc, + FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, + FeatureMP, + FeatureVFP4]>; + +def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7, + FeatureHasRetAddrStack, + FeatureTrustZone, + FeatureSlowFPBrcc, + FeatureHasVMLxHazards, + FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, + FeatureMP, + FeatureVFP4, + FeatureVirtualization]>; + +def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8, + FeatureHasRetAddrStack, + FeatureNonpipelinedVFP, + FeatureTrustZone, + FeatureSlowFPBrcc, + FeatureHasVMLxHazards, + FeatureHasSlowFPVMLx, + FeatureVMLxForwarding]>; + +def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9, + FeatureHasRetAddrStack, + FeatureTrustZone, + FeatureHasVMLxHazards, + FeatureVMLxForwarding, + FeatureFP16, + FeatureAvoidPartialCPSR, + FeatureExpandMLx, + FeaturePreferVMOVSR, + FeatureMuxedUnits, + FeatureNEONForFPMovs, + FeatureCheckVLDnAlign, + FeatureMP]>; + +def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12, + FeatureHasRetAddrStack, + FeatureTrustZone, + FeatureVMLxForwarding, + FeatureVFP4, + FeatureAvoidPartialCPSR, + FeatureVirtualization, + FeatureMP]>; + +def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, + FeatureDontWidenVMOVS, + FeatureSplatVFPToNeon, + FeatureHasRetAddrStack, + FeatureMuxedUnits, + FeatureTrustZone, + FeatureVFP4, + FeatureMP, + FeatureCheckVLDnAlign, + FeatureAvoidPartialCPSR, + FeatureVirtualization]>; + +def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17, + FeatureHasRetAddrStack, + FeatureTrustZone, + FeatureMP, + FeatureVMLxForwarding, + FeatureVFP4, + FeatureAvoidPartialCPSR, + FeatureVirtualization]>; + +// FIXME: krait has currently the same features as A9 plus VFP4 and HWDiv +def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, + FeatureHasRetAddrStack, + FeatureMuxedUnits, + FeatureCheckVLDnAlign, + FeatureVMLxForwarding, + FeatureFP16, + FeatureAvoidPartialCPSR, + FeatureVFP4, + FeatureHWDivThumb, + FeatureHWDivARM]>; + +def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, + FeatureHasRetAddrStack, + FeatureNEONForFP, + FeatureVFP4, + FeatureMP, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureAvoidPartialCPSR, + FeatureAvoidMOVsShOp, + FeatureHasSlowFPVMLx, + FeatureHasVMLxHazards, + FeatureProfUnpredicate, + FeaturePrefISHSTBarrier, + FeatureSlowOddRegister, + FeatureSlowLoadDSubreg, + FeatureSlowVGETLNi32, + FeatureSlowVDUP32, + FeatureUseMISched, + FeatureNoPostRASched]>; + +def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4, + FeatureHasRetAddrStack, + FeatureAvoidPartialCPSR]>; + +def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, + FeatureHasRetAddrStack, + FeatureSlowFPBrcc, + FeatureHasSlowFPVMLx, + FeatureVFP3, + FeatureD16, + FeatureAvoidPartialCPSR]>; + +def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, + FeatureHasRetAddrStack, + FeatureVFP3, + FeatureD16, + FeatureSlowFPBrcc, + FeatureHWDivARM, + FeatureHasSlowFPVMLx, + FeatureAvoidPartialCPSR]>; + +def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, + FeatureHasRetAddrStack, + FeatureVFP3, + FeatureD16, + FeatureFP16, + FeatureMP, + FeatureSlowFPBrcc, + FeatureHWDivARM, + FeatureHasSlowFPVMLx, + FeatureAvoidPartialCPSR]>; + +def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r, + FeatureHasRetAddrStack, + FeatureVFP3, + FeatureD16, + FeatureFP16, + FeatureMP, + FeatureSlowFPBrcc, + FeatureHWDivARM, + FeatureHasSlowFPVMLx, + FeatureAvoidPartialCPSR]>; + +def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m, + ProcM3, + FeatureHasNoBranchPredictor]>; + +def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m, + ProcM3, + FeatureHasNoBranchPredictor]>; + +def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em, + FeatureVFP4, + FeatureVFPOnlySP, + FeatureD16, + FeatureHasNoBranchPredictor]>; + +def : ProcNoItin<"cortex-m7", [ARMv7em, + FeatureFPARMv8, + FeatureD16]>; + +def : ProcNoItin<"cortex-m23", [ARMv8mBaseline, + FeatureNoMovt]>; + +def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline, + FeatureDSP, + FeatureFPARMv8, + FeatureD16, + FeatureVFPOnlySP, + FeatureHasNoBranchPredictor]>; + +def : ProcNoItin<"cortex-a32", [ARMv8a, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFPAO]>; + +def : ProcNoItin<"cortex-a55", [ARMv82a, ProcA55, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureDotProd]>; + +def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFPAO, + FeatureAvoidPartialCPSR, + FeatureCheapPredicableCPSR]>; + +def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"cortex-a75", [ARMv82a, ProcA75, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureDotProd]>; + +def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, + FeatureHasRetAddrStack, + FeatureNEONForFP, + FeatureVFP4, + FeatureMP, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureAvoidPartialCPSR, + FeatureAvoidMOVsShOp, + FeatureHasSlowFPVMLx, + FeatureCrypto, + FeatureUseMISched, + FeatureZCZeroing, + FeatureNoPostRASched]>; + +def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynosM1, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"exynos-m4", [ARMv8a, ProcExynosM1, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"kryo", [ARMv8a, ProcKryo, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52, + FeatureUseMISched, + FeatureFPAO, + FeatureUseAA]>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "ARMRegisterInfo-digit.td" +include "ARMRegisterBanks.td" +include "ARMCallingConv.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "ARMInstrInfo.td" +def ARMInstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// + +def ARMAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 1; + int Variant = 0; + bit isMCAsmWriter = 1; +} + +def ARMAsmParser : AsmParser { + bit ReportMultipleNearMisses = 1; +} + +def ARMAsmParserVariant : AsmParserVariant { + int Variant = 0; + string Name = "ARM"; + string BreakCharacters = "."; +} + +def ARM : Target { + // Pull in Instruction Info. + let InstructionSet = ARMInstrInfo; + let AssemblyWriters = [ARMAsmWriter]; + let AssemblyParsers = [ARMAsmParser]; + let AssemblyParserVariants = [ARMAsmParserVariant]; + let AllowRegisterRenaming = 1; +} diff --git a/capstone/suite/synctools/tablegen/ARM/ARM.td b/capstone/suite/synctools/tablegen/ARM/ARM.td new file mode 100644 index 000000000..2e62a0790 --- /dev/null +++ b/capstone/suite/synctools/tablegen/ARM/ARM.td @@ -0,0 +1,1098 @@ +//===-- ARM.td - Describe the ARM Target Machine -----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// ARM Subtarget state. +// + +def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", + "true", "Thumb mode">; + +def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat", + "true", "Use software floating " + "point features.">; + + +//===----------------------------------------------------------------------===// +// ARM Subtarget features. +// + +// Floating Point, HW Division and Neon Support +def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", + "Enable VFP2 instructions">; + +def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", + "Enable VFP3 instructions", + [FeatureVFP2]>; + +def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", + "Enable NEON instructions", + [FeatureVFP3]>; + +def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", + "Enable half-precision " + "floating point">; + +def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", + "Enable VFP4 instructions", + [FeatureVFP3, FeatureFP16]>; + +def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", + "true", "Enable ARMv8 FP", + [FeatureVFP4]>; + +def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", + "Enable full half-precision " + "floating point", + [FeatureFPARMv8]>; + +def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", + "Floating point unit supports " + "single precision only">; + +def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", + "Restrict FP to 16 double registers">; + +def FeatureHWDivThumb : SubtargetFeature<"hwdiv", + "HasHardwareDivideInThumb", "true", + "Enable divide instructions in Thumb">; + +def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", + "HasHardwareDivideInARM", "true", + "Enable divide instructions in ARM mode">; + +// Atomic Support +def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", + "Has data barrier (dmb/dsb) instructions">; + +def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", + "Has v7 clrex instruction">; + +def FeatureDFB : SubtargetFeature<"dfb", "HasFullDataBarrier", "true", + "Has full data barrier (dfb) instruction">; + +def FeatureAcquireRelease : SubtargetFeature<"acquire-release", + "HasAcquireRelease", "true", + "Has v8 acquire/release (lda/ldaex " + " etc) instructions">; + + +def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", + "FP compare + branch is slow">; + +def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", + "Enable support for Performance " + "Monitor extensions">; + + +// TrustZone Security Extensions +def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", + "Enable support for TrustZone " + "security extensions">; + +def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true", + "Enable support for ARMv8-M " + "Security Extensions">; + +def FeatureSHA2 : SubtargetFeature<"sha2", "HasSHA2", "true", + "Enable SHA1 and SHA256 support", [FeatureNEON]>; + +def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", + "Enable AES support", [FeatureNEON]>; + +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable support for " + "Cryptography extensions", + [FeatureNEON, FeatureSHA2, FeatureAES]>; + +def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", + "Enable support for CRC instructions">; + +def FeatureDotProd : SubtargetFeature<"dotprod", "HasDotProd", "true", + "Enable support for dot product instructions", + [FeatureNEON]>; + +// Not to be confused with FeatureHasRetAddrStack (return address stack) +def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", + "Enable Reliability, Availability " + "and Serviceability extensions">; + +// Fast computation of non-negative address offsets +def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", + "Enable fast computation of " + "positive address offsets">; + +// Fast execution of AES crypto operations +def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", + "CPU fuses AES crypto operations">; + +// Fast execution of bottom and top halves of literal generation +def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true", + "CPU fuses literal generation operations">; + +// The way of reading thread pointer +def FeatureReadTp : SubtargetFeature<"read-tp-hard", "ReadTPHard", "true", + "Reading thread pointer from register">; + +// Cyclone can zero VFP registers in 0 cycles. +def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", + "Has zero-cycle zeroing instructions">; + +// Whether it is profitable to unpredicate certain instructions during if-conversion +def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr", + "IsProfitableToUnpredicate", "true", + "Is profitable to unpredicate">; + +// Some targets (e.g. Swift) have microcoded VGETLNi32. +def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", + "HasSlowVGETLNi32", "true", + "Has slow VGETLNi32 - prefer VMOV">; + +// Some targets (e.g. Swift) have microcoded VDUP32. +def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", + "true", + "Has slow VDUP32 - prefer VMOV">; + +// Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON +// for scalar FP, as this allows more effective execution domain optimization. +def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", + "true", "Prefer VMOVSR">; + +// Swift has ISHST barriers compatible with Atomic Release semantics but weaker +// than ISH +def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST", + "true", "Prefer ISHST barriers">; + +// Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU. +def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", + "true", + "Has muxed AGU and NEON/FPU">; + +// Whether VLDM/VSTM starting with odd register number need more microops +// than single VLDRS +def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister", + "true", "VLDM/VSTM starting " + "with an odd register is slow">; + +// Some targets have a renaming dependency when loading into D subregisters. +def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg", + "SlowLoadDSubregister", "true", + "Loading into D subregs is slow">; + +// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD. +def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs", + "DontWidenVMOVS", "true", + "Don't widen VMOVS to VMOVD">; + +// Some targets (e.g. Cortex-A15) prefer to avoid mixing operations on different +// VFP register widths. +def FeatureSplatVFPToNeon : SubtargetFeature<"splat-vfp-neon", + "SplatVFPToNeon", "true", + "Splat register from VFP to NEON", + [FeatureDontWidenVMOVS]>; + +// Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions. +def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", + "ExpandMLx", "true", + "Expand VFP/NEON MLA/MLS instructions">; + +// Some targets have special RAW hazards for VFP/NEON VMLA/VMLS. +def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards", + "true", "Has VMLx hazards">; + +// Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from +// VFP to NEON, as an execution domain optimization. +def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", + "UseNEONForFPMovs", "true", + "Convert VMOVSR, VMOVRS, " + "VMOVS to NEON">; + +// Some processors benefit from using NEON instructions for scalar +// single-precision FP operations. This affects instruction selection and should +// only be enabled if the handling of denormals is not important. +def FeatureNEONForFP : SubtargetFeature<"neonfp", + "UseNEONForSinglePrecisionFP", + "true", + "Use NEON for single precision FP">; + +// On some processors, VLDn instructions that access unaligned data take one +// extra cycle. Take that into account when computing operand latencies. +def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign", + "true", + "Check for VLDn unaligned access">; + +// Some processors have a nonpipelined VFP coprocessor. +def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp", + "NonpipelinedVFP", "true", + "VFP instructions are not pipelined">; + +// Some processors have FP multiply-accumulate instructions that don't +// play nicely with other VFP / NEON instructions, and it's generally better +// to just not use them. +def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", + "Disable VFP / NEON MAC instructions">; + +// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding. +def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", + "HasVMLxForwarding", "true", + "Has multiplier accumulator forwarding">; + +// Disable 32-bit to 16-bit narrowing for experimentation. +def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", + "Prefer 32-bit Thumb instrs">; + +/// Some instructions update CPSR partially, which can add false dependency for +/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is +/// mapped to a separate physical register. Avoid partial CPSR update for these +/// processors. +def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", + "AvoidCPSRPartialUpdate", "true", + "Avoid CPSR partial update for OOO execution">; + +/// Disable +1 predication cost for instructions updating CPSR. +/// Enabled for Cortex-A57. +def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr", + "CheapPredicableCPSRDef", + "true", + "Disable +1 predication cost for instructions updating CPSR">; + +def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", + "AvoidMOVsShifterOperand", "true", + "Avoid movs instructions with " + "shifter operand">; + +// Some processors perform return stack prediction. CodeGen should avoid issue +// "normal" call instructions to callees which do not return. +def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", + "HasRetAddrStack", "true", + "Has return address stack">; + +// Some processors have no branch predictor, which changes the expected cost of +// taking a branch which affects the choice of whether to use predicated +// instructions. +def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor", + "HasBranchPredictor", "false", + "Has no branch predictor">; + +/// DSP extension. +def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", + "Supports DSP instructions in " + "ARM and/or Thumb2">; + +// Multiprocessing extension. +def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", + "Supports Multiprocessing extension">; + +// Virtualization extension - requires HW divide (ARMv7-AR ARMARM - 4.4.8). +def FeatureVirtualization : SubtargetFeature<"virtualization", + "HasVirtualization", "true", + "Supports Virtualization extension", + [FeatureHWDivThumb, FeatureHWDivARM]>; + +// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. +// See ARMInstrInfo.td for details. +def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", + "NaCl trap">; + +def FeatureStrictAlign : SubtargetFeature<"strict-align", + "StrictAlign", "true", + "Disallow all unaligned memory " + "access">; + +def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", + "Generate calls via indirect call " + "instructions">; + +def FeatureExecuteOnly : SubtargetFeature<"execute-only", + "GenExecuteOnly", "true", + "Enable the generation of " + "execute only code.">; + +def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", + "Reserve R9, making it unavailable" + " as GPR">; + +def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", + "Don't use movt/movw pairs for " + "32-bit imms">; + +def FeatureNoNegativeImmediates + : SubtargetFeature<"no-neg-immediates", + "NegativeImmediates", "false", + "Convert immediates and instructions " + "to their negated or complemented " + "equivalent when the immediate does " + "not fit in the encoding.">; + +// Use the MachineScheduler for instruction scheduling for the subtarget. +def FeatureUseMISched: SubtargetFeature<"use-misched", "UseMISched", "true", + "Use the MachineScheduler">; + +def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler", + "DisablePostRAScheduler", "true", + "Don't schedule again after register allocation">; + +// Enable use of alias analysis during code generation +def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", + "Use alias analysis during codegen">; + +//===----------------------------------------------------------------------===// +// ARM architecture class +// + +// A-series ISA +def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass", + "Is application profile ('A' series)">; + +// R-series ISA +def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", + "Is realtime profile ('R' series)">; + +// M-series ISA +def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", + "Is microcontroller profile ('M' series)">; + + +def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", + "Enable Thumb2 instructions">; + +def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", + "Does not support ARM mode execution">; + +//===----------------------------------------------------------------------===// +// ARM ISAa. +// + +def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", + "Support ARM v4T instructions">; + +def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true", + "Support ARM v5T instructions", + [HasV4TOps]>; + +def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true", + "Support ARM v5TE, v5TEj, and " + "v5TExp instructions", + [HasV5TOps]>; + +def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true", + "Support ARM v6 instructions", + [HasV5TEOps]>; + +def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true", + "Support ARM v6M instructions", + [HasV6Ops]>; + +def HasV8MBaselineOps : SubtargetFeature<"v8m", "HasV8MBaselineOps", "true", + "Support ARM v8M Baseline instructions", + [HasV6MOps]>; + +def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true", + "Support ARM v6k instructions", + [HasV6Ops]>; + +def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", + "Support ARM v6t2 instructions", + [HasV8MBaselineOps, HasV6KOps, FeatureThumb2]>; + +def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", + "Support ARM v7 instructions", + [HasV6T2Ops, FeaturePerfMon, + FeatureV7Clrex]>; + +def HasV8MMainlineOps : + SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true", + "Support ARM v8M Mainline instructions", + [HasV7Ops]>; + +def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", + "Support ARM v8 instructions", + [HasV7Ops, FeatureAcquireRelease]>; + +def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", + "Support ARM v8.1a instructions", + [HasV8Ops]>; + +def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", + "Support ARM v8.2a instructions", + [HasV8_1aOps]>; + +def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", + "Support ARM v8.3a instructions", + [HasV8_2aOps]>; + +def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true", + "Support ARM v8.4a instructions", + [HasV8_3aOps, FeatureDotProd]>; + +//===----------------------------------------------------------------------===// +// ARM Processor subtarget features. +// + +def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", + "Cortex-A5 ARM processors", []>; +def ProcA7 : SubtargetFeature<"a7", "ARMProcFamily", "CortexA7", + "Cortex-A7 ARM processors", []>; +def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", + "Cortex-A8 ARM processors", []>; +def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", + "Cortex-A9 ARM processors", []>; +def ProcA12 : SubtargetFeature<"a12", "ARMProcFamily", "CortexA12", + "Cortex-A12 ARM processors", []>; +def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", + "Cortex-A15 ARM processors", []>; +def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17", + "Cortex-A17 ARM processors", []>; +def ProcA32 : SubtargetFeature<"a32", "ARMProcFamily", "CortexA32", + "Cortex-A32 ARM processors", []>; +def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", + "Cortex-A35 ARM processors", []>; +def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", + "Cortex-A53 ARM processors", []>; +def ProcA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", + "Cortex-A55 ARM processors", []>; +def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", + "Cortex-A57 ARM processors", []>; +def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", + "Cortex-A72 ARM processors", []>; +def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", + "Cortex-A73 ARM processors", []>; +def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", + "Cortex-A75 ARM processors", []>; + +def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", + "Qualcomm Krait processors", []>; +def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", + "Qualcomm Kryo processors", []>; +def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", + "Swift ARM processors", []>; + +def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", + "Samsung Exynos-Mx processors", []>; + +def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4", + "Cortex-R4 ARM processors", []>; +def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", + "Cortex-R5 ARM processors", []>; +def ProcR7 : SubtargetFeature<"r7", "ARMProcFamily", "CortexR7", + "Cortex-R7 ARM processors", []>; +def ProcR52 : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52", + "Cortex-R52 ARM processors", []>; + +def ProcM3 : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3", + "Cortex-M3 ARM processors", []>; + + +//===----------------------------------------------------------------------===// +// ARM Helper classes. +// + +class Architecture features> + : SubtargetFeature; + +class ProcNoItin Features> + : Processor; + + +//===----------------------------------------------------------------------===// +// ARM architectures +// + +def ARMv2 : Architecture<"armv2", "ARMv2", []>; + +def ARMv2a : Architecture<"armv2a", "ARMv2a", []>; + +def ARMv3 : Architecture<"armv3", "ARMv3", []>; + +def ARMv3m : Architecture<"armv3m", "ARMv3m", []>; + +def ARMv4 : Architecture<"armv4", "ARMv4", []>; + +def ARMv4t : Architecture<"armv4t", "ARMv4t", [HasV4TOps]>; + +def ARMv5t : Architecture<"armv5t", "ARMv5t", [HasV5TOps]>; + +def ARMv5te : Architecture<"armv5te", "ARMv5te", [HasV5TEOps]>; + +def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>; + +def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops, + FeatureDSP]>; + +def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops, + FeatureDSP]>; + +def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>; + +def ARMv6kz : Architecture<"armv6kz", "ARMv6kz", [HasV6KOps, + FeatureTrustZone]>; + +def ARMv6m : Architecture<"armv6-m", "ARMv6m", [HasV6MOps, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureMClass, + FeatureStrictAlign]>; + +def ARMv6sm : Architecture<"armv6s-m", "ARMv6sm", [HasV6MOps, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureMClass, + FeatureStrictAlign]>; + +def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops, + FeatureNEON, + FeatureDB, + FeatureDSP, + FeatureAClass]>; + +def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops, + FeatureNEON, + FeatureDB, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureAClass]>; + +def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops, + FeatureDB, + FeatureDSP, + FeatureHWDivThumb, + FeatureRClass]>; + +def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops, + FeatureThumb2, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureHWDivThumb, + FeatureMClass]>; + +def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops, + FeatureThumb2, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureHWDivThumb, + FeatureMClass, + FeatureDSP]>; + +def ARMv8a : Architecture<"armv8-a", "ARMv8a", [HasV8Ops, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC]>; + +def ARMv81a : Architecture<"armv8.1-a", "ARMv81a", [HasV8_1aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC]>; + +def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS]>; + +def ARMv83a : Architecture<"armv8.3-a", "ARMv83a", [HasV8_3aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS]>; + +def ARMv84a : Architecture<"armv8.4-a", "ARMv84a", [HasV8_4aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS, + FeatureDotProd]>; + +def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops, + FeatureRClass, + FeatureDB, + FeatureDFB, + FeatureDSP, + FeatureCRC, + FeatureMP, + FeatureVirtualization, + FeatureFPARMv8, + FeatureNEON]>; + +def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline", + [HasV8MBaselineOps, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureHWDivThumb, + FeatureV7Clrex, + Feature8MSecExt, + FeatureAcquireRelease, + FeatureMClass, + FeatureStrictAlign]>; + +def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline", + [HasV8MMainlineOps, + FeatureNoARM, + ModeThumb, + FeatureDB, + FeatureHWDivThumb, + Feature8MSecExt, + FeatureAcquireRelease, + FeatureMClass]>; + +// Aliases +def IWMMXT : Architecture<"iwmmxt", "ARMv5te", [ARMv5te]>; +def IWMMXT2 : Architecture<"iwmmxt2", "ARMv5te", [ARMv5te]>; +def XScale : Architecture<"xscale", "ARMv5te", [ARMv5te]>; +def ARMv6j : Architecture<"armv6j", "ARMv7a", [ARMv6]>; +def ARMv7k : Architecture<"armv7k", "ARMv7a", [ARMv7a]>; +def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>; + + +//===----------------------------------------------------------------------===// +// ARM schedules. +//===----------------------------------------------------------------------===// +// +include "ARMSchedule.td" + +//===----------------------------------------------------------------------===// +// ARM processors +// + +// Dummy CPU, used to target architectures +def : ProcessorModel<"generic", CortexA8Model, []>; + +// FIXME: Several processors below are not using their own scheduler +// model, but one of similar/previous processor. These should be fixed. + +def : ProcNoItin<"arm8", [ARMv4]>; +def : ProcNoItin<"arm810", [ARMv4]>; +def : ProcNoItin<"strongarm", [ARMv4]>; +def : ProcNoItin<"strongarm110", [ARMv4]>; +def : ProcNoItin<"strongarm1100", [ARMv4]>; +def : ProcNoItin<"strongarm1110", [ARMv4]>; + +def : ProcNoItin<"arm7tdmi", [ARMv4t]>; +def : ProcNoItin<"arm7tdmi-s", [ARMv4t]>; +def : ProcNoItin<"arm710t", [ARMv4t]>; +def : ProcNoItin<"arm720t", [ARMv4t]>; +def : ProcNoItin<"arm9", [ARMv4t]>; +def : ProcNoItin<"arm9tdmi", [ARMv4t]>; +def : ProcNoItin<"arm920", [ARMv4t]>; +def : ProcNoItin<"arm920t", [ARMv4t]>; +def : ProcNoItin<"arm922t", [ARMv4t]>; +def : ProcNoItin<"arm940t", [ARMv4t]>; +def : ProcNoItin<"ep9312", [ARMv4t]>; + +def : ProcNoItin<"arm10tdmi", [ARMv5t]>; +def : ProcNoItin<"arm1020t", [ARMv5t]>; + +def : ProcNoItin<"arm9e", [ARMv5te]>; +def : ProcNoItin<"arm926ej-s", [ARMv5te]>; +def : ProcNoItin<"arm946e-s", [ARMv5te]>; +def : ProcNoItin<"arm966e-s", [ARMv5te]>; +def : ProcNoItin<"arm968e-s", [ARMv5te]>; +def : ProcNoItin<"arm10e", [ARMv5te]>; +def : ProcNoItin<"arm1020e", [ARMv5te]>; +def : ProcNoItin<"arm1022e", [ARMv5te]>; +def : ProcNoItin<"xscale", [ARMv5te]>; +def : ProcNoItin<"iwmmxt", [ARMv5te]>; + +def : Processor<"arm1136j-s", ARMV6Itineraries, [ARMv6]>; +def : Processor<"arm1136jf-s", ARMV6Itineraries, [ARMv6, + FeatureVFP2, + FeatureHasSlowFPVMLx]>; + +def : Processor<"cortex-m0", ARMV6Itineraries, [ARMv6m]>; +def : Processor<"cortex-m0plus", ARMV6Itineraries, [ARMv6m]>; +def : Processor<"cortex-m1", ARMV6Itineraries, [ARMv6m]>; +def : Processor<"sc000", ARMV6Itineraries, [ARMv6m]>; + +def : Processor<"arm1176j-s", ARMV6Itineraries, [ARMv6kz]>; +def : Processor<"arm1176jz-s", ARMV6Itineraries, [ARMv6kz]>; +def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ARMv6kz, + FeatureVFP2, + FeatureHasSlowFPVMLx]>; + +def : Processor<"mpcorenovfp", ARMV6Itineraries, [ARMv6k]>; +def : Processor<"mpcore", ARMV6Itineraries, [ARMv6k, + FeatureVFP2, + FeatureHasSlowFPVMLx]>; + +def : Processor<"arm1156t2-s", ARMV6Itineraries, [ARMv6t2]>; +def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2, + FeatureVFP2, + FeatureHasSlowFPVMLx]>; + +def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5, + FeatureHasRetAddrStack, + FeatureTrustZone, + FeatureSlowFPBrcc, + FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, + FeatureMP, + FeatureVFP4]>; + +def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7, + FeatureHasRetAddrStack, + FeatureTrustZone, + FeatureSlowFPBrcc, + FeatureHasVMLxHazards, + FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, + FeatureMP, + FeatureVFP4, + FeatureVirtualization]>; + +def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8, + FeatureHasRetAddrStack, + FeatureNonpipelinedVFP, + FeatureTrustZone, + FeatureSlowFPBrcc, + FeatureHasVMLxHazards, + FeatureHasSlowFPVMLx, + FeatureVMLxForwarding]>; + +def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9, + FeatureHasRetAddrStack, + FeatureTrustZone, + FeatureHasVMLxHazards, + FeatureVMLxForwarding, + FeatureFP16, + FeatureAvoidPartialCPSR, + FeatureExpandMLx, + FeaturePreferVMOVSR, + FeatureMuxedUnits, + FeatureNEONForFPMovs, + FeatureCheckVLDnAlign, + FeatureMP]>; + +def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12, + FeatureHasRetAddrStack, + FeatureTrustZone, + FeatureVMLxForwarding, + FeatureVFP4, + FeatureAvoidPartialCPSR, + FeatureVirtualization, + FeatureMP]>; + +def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, + FeatureDontWidenVMOVS, + FeatureSplatVFPToNeon, + FeatureHasRetAddrStack, + FeatureMuxedUnits, + FeatureTrustZone, + FeatureVFP4, + FeatureMP, + FeatureCheckVLDnAlign, + FeatureAvoidPartialCPSR, + FeatureVirtualization]>; + +def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17, + FeatureHasRetAddrStack, + FeatureTrustZone, + FeatureMP, + FeatureVMLxForwarding, + FeatureVFP4, + FeatureAvoidPartialCPSR, + FeatureVirtualization]>; + +// FIXME: krait has currently the same features as A9 plus VFP4 and HWDiv +def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, + FeatureHasRetAddrStack, + FeatureMuxedUnits, + FeatureCheckVLDnAlign, + FeatureVMLxForwarding, + FeatureFP16, + FeatureAvoidPartialCPSR, + FeatureVFP4, + FeatureHWDivThumb, + FeatureHWDivARM]>; + +def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, + FeatureHasRetAddrStack, + FeatureNEONForFP, + FeatureVFP4, + FeatureMP, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureAvoidPartialCPSR, + FeatureAvoidMOVsShOp, + FeatureHasSlowFPVMLx, + FeatureHasVMLxHazards, + FeatureProfUnpredicate, + FeaturePrefISHSTBarrier, + FeatureSlowOddRegister, + FeatureSlowLoadDSubreg, + FeatureSlowVGETLNi32, + FeatureSlowVDUP32, + FeatureUseMISched, + FeatureNoPostRASched]>; + +def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4, + FeatureHasRetAddrStack, + FeatureAvoidPartialCPSR]>; + +def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, + FeatureHasRetAddrStack, + FeatureSlowFPBrcc, + FeatureHasSlowFPVMLx, + FeatureVFP3, + FeatureD16, + FeatureAvoidPartialCPSR]>; + +def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, + FeatureHasRetAddrStack, + FeatureVFP3, + FeatureD16, + FeatureSlowFPBrcc, + FeatureHWDivARM, + FeatureHasSlowFPVMLx, + FeatureAvoidPartialCPSR]>; + +def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, + FeatureHasRetAddrStack, + FeatureVFP3, + FeatureD16, + FeatureFP16, + FeatureMP, + FeatureSlowFPBrcc, + FeatureHWDivARM, + FeatureHasSlowFPVMLx, + FeatureAvoidPartialCPSR]>; + +def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r, + FeatureHasRetAddrStack, + FeatureVFP3, + FeatureD16, + FeatureFP16, + FeatureMP, + FeatureSlowFPBrcc, + FeatureHWDivARM, + FeatureHasSlowFPVMLx, + FeatureAvoidPartialCPSR]>; + +def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m, + ProcM3, + FeatureHasNoBranchPredictor]>; + +def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m, + ProcM3, + FeatureHasNoBranchPredictor]>; + +def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em, + FeatureVFP4, + FeatureVFPOnlySP, + FeatureD16, + FeatureHasNoBranchPredictor]>; + +def : ProcNoItin<"cortex-m7", [ARMv7em, + FeatureFPARMv8, + FeatureD16]>; + +def : ProcNoItin<"cortex-m23", [ARMv8mBaseline, + FeatureNoMovt]>; + +def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline, + FeatureDSP, + FeatureFPARMv8, + FeatureD16, + FeatureVFPOnlySP, + FeatureHasNoBranchPredictor]>; + +def : ProcNoItin<"cortex-a32", [ARMv8a, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFPAO]>; + +def : ProcNoItin<"cortex-a55", [ARMv82a, ProcA55, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureDotProd]>; + +def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFPAO, + FeatureAvoidPartialCPSR, + FeatureCheapPredicableCPSR]>; + +def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"cortex-a75", [ARMv82a, ProcA75, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureDotProd]>; + +def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, + FeatureHasRetAddrStack, + FeatureNEONForFP, + FeatureVFP4, + FeatureMP, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureAvoidPartialCPSR, + FeatureAvoidMOVsShOp, + FeatureHasSlowFPVMLx, + FeatureCrypto, + FeatureUseMISched, + FeatureZCZeroing, + FeatureNoPostRASched]>; + +def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynosM1, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"exynos-m4", [ARMv8a, ProcExynosM1, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"kryo", [ARMv8a, ProcKryo, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + +def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52, + FeatureUseMISched, + FeatureFPAO, + FeatureUseAA]>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "ARMRegisterInfo.td" +include "ARMRegisterBanks.td" +include "ARMCallingConv.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "ARMInstrInfo.td" +def ARMInstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// + +def ARMAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 1; + int Variant = 0; + bit isMCAsmWriter = 1; +} + +def ARMAsmParser : AsmParser { + bit ReportMultipleNearMisses = 1; +} + +def ARMAsmParserVariant : AsmParserVariant { + int Variant = 0; + string Name = "ARM"; + string BreakCharacters = "."; +} + +def ARM : Target { + // Pull in Instruction Info. + let InstructionSet = ARMInstrInfo; + let AssemblyWriters = [ARMAsmWriter]; + let AssemblyParsers = [ARMAsmParser]; + let AssemblyParserVariants = [ARMAsmParserVariant]; + let AllowRegisterRenaming = 1; +} diff --git a/capstone/suite/synctools/tablegen/ARM/ARMCallingConv.td b/capstone/suite/synctools/tablegen/ARM/ARMCallingConv.td new file mode 100644 index 000000000..f173e423f --- /dev/null +++ b/capstone/suite/synctools/tablegen/ARM/ARMCallingConv.td @@ -0,0 +1,318 @@ +//===-- ARMCallingConv.td - Calling Conventions for ARM ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for ARM architecture. +//===----------------------------------------------------------------------===// + +/// CCIfAlign - Match of the original alignment of the arg +class CCIfAlign: + CCIf; + +//===----------------------------------------------------------------------===// +// ARM APCS Calling Convention +//===----------------------------------------------------------------------===// +def CC_ARM_APCS : CallingConv<[ + + // Handles byval parameters. + CCIfByVal>, + + CCIfType<[i1, i8, i16], CCPromoteToType>, + + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf>>, + + // A SwiftError is passed in R8. + CCIfSwiftError>>, + + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + + // f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack + CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>, + + CCIfType<[f32], CCBitConvertToType>, + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, + + CCIfType<[i32], CCAssignToStack<4, 4>>, + CCIfType<[f64], CCAssignToStack<8, 4>>, + CCIfType<[v2f64], CCAssignToStack<16, 4>> +]>; + +def RetCC_ARM_APCS : CallingConv<[ + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[f32], CCBitConvertToType>, + + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf>>, + + // A SwiftError is returned in R8. + CCIfSwiftError>>, + + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + + CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>, + + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> +]>; + +//===----------------------------------------------------------------------===// +// ARM APCS Calling Convention for FastCC (when VFP2 or later is available) +//===----------------------------------------------------------------------===// +def FastCC_ARM_APCS : CallingConv<[ + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + + CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, + S9, S10, S11, S12, S13, S14, S15]>>, + + // CPRCs may be allocated to co-processor registers or the stack - they + // may never be allocated to core registers. + CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>, + CCIfType<[f64], CCAssignToStackWithShadow<8, 4, [Q0, Q1, Q2, Q3]>>, + CCIfType<[v2f64], CCAssignToStackWithShadow<16, 4, [Q0, Q1, Q2, Q3]>>, + + CCDelegateTo +]>; + +def RetFastCC_ARM_APCS : CallingConv<[ + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + + CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, + S9, S10, S11, S12, S13, S14, S15]>>, + CCDelegateTo +]>; + +//===----------------------------------------------------------------------===// +// ARM APCS Calling Convention for GHC +//===----------------------------------------------------------------------===// + +def CC_ARM_APCS_GHC : CallingConv<[ + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + + CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, + CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>, + CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>, + + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType>, + + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim + CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>> +]>; + +//===----------------------------------------------------------------------===// +// ARM AAPCS (EABI) Calling Convention, common parts +//===----------------------------------------------------------------------===// + +def CC_ARM_AAPCS_Common : CallingConv<[ + + CCIfType<[i1, i8, i16], CCPromoteToType>, + + // i64/f64 is passed in even pairs of GPRs + // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register + // (and the same is true for f64 if VFP is not enabled) + CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>, + CCIfType<[i32], CCIf<"ArgFlags.getOrigAlign() != 8", + CCAssignToReg<[R0, R1, R2, R3]>>>, + + CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, [R0, R1, R2, R3]>>>, + CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>, + CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>, + CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>, + CCIfType<[v2f64], CCIfAlign<"16", + CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>, + CCIfType<[v2f64], CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>> +]>; + +def RetCC_ARM_AAPCS_Common : CallingConv<[ + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> +]>; + +//===----------------------------------------------------------------------===// +// ARM AAPCS (EABI) Calling Convention +//===----------------------------------------------------------------------===// + +def CC_ARM_AAPCS : CallingConv<[ + // Handles byval parameters. + CCIfByVal>, + + // The 'nest' parameter, if any, is passed in R12. + CCIfNest>, + + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, + + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf>>, + + // A SwiftError is passed in R8. + CCIfSwiftError>>, + + CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, + CCIfType<[f32], CCBitConvertToType>, + CCDelegateTo +]>; + +def RetCC_ARM_AAPCS : CallingConv<[ + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16,v16i8, v4f32], CCBitConvertToType>, + + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf>>, + + // A SwiftError is returned in R8. + CCIfSwiftError>>, + + CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>, + CCIfType<[f32], CCBitConvertToType>, + + CCDelegateTo +]>; + +//===----------------------------------------------------------------------===// +// ARM AAPCS-VFP (EABI) Calling Convention +// Also used for FastCC (when VFP2 or later is available) +//===----------------------------------------------------------------------===// + +def CC_ARM_AAPCS_VFP : CallingConv<[ + // Handles byval parameters. + CCIfByVal>, + + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, + + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf>>, + + // A SwiftError is passed in R8. + CCIfSwiftError>>, + + // HFAs are passed in a contiguous block of registers, or on the stack + CCIfConsecutiveRegs>, + + CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, + S9, S10, S11, S12, S13, S14, S15]>>, + CCDelegateTo +]>; + +def RetCC_ARM_AAPCS_VFP : CallingConv<[ + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, + + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf>>, + + // A SwiftError is returned in R8. + CCIfSwiftError>>, + + CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, + S9, S10, S11, S12, S13, S14, S15]>>, + CCDelegateTo +]>; + +//===----------------------------------------------------------------------===// +// Callee-saved register lists. +//===----------------------------------------------------------------------===// + +def CSR_NoRegs : CalleeSavedRegs<(add)>; +def CSR_FPRegs : CalleeSavedRegs<(add (sequence "D%u", 0, 31))>; + +def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, + (sequence "D%u", 15, 8))>; + +// R8 is used to pass swifterror, remove it from CSR. +def CSR_AAPCS_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS, R8)>; + +// The order of callee-saved registers needs to match the order we actually push +// them in FrameLowering, because this order is what's used by +// PrologEpilogInserter to allocate frame index slots. So when R7 is the frame +// pointer, we use this AAPCS alternative. +def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4, + R11, R10, R9, R8, + (sequence "D%u", 15, 8))>; + +// R8 is used to pass swifterror, remove it from CSR. +def CSR_AAPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush, + R8)>; + +// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this' +// and the pointer return value are both passed in R0 in these cases, this can +// be partially modelled by treating R0 as a callee-saved register +// Only the resulting RegMask is used; the SaveList is ignored +def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, + R5, R4, (sequence "D%u", 15, 8), + R0)>; + +// iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register. +// Also save R7-R4 first to match the stack frame fixed spill areas. +def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; + +// R8 is used to pass swifterror, remove it from CSR. +def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>; + +def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, + (sub CSR_AAPCS_ThisReturn, R9))>; + +def CSR_iOS_TLSCall + : CalleeSavedRegs<(add LR, SP, (sub(sequence "R%u", 12, 1), R9, R12), + (sequence "D%u", 31, 0))>; + +// C++ TLS access function saves all registers except SP. Try to match +// the order of CSRs in CSR_iOS. +def CSR_iOS_CXX_TLS : CalleeSavedRegs<(add CSR_iOS, (sequence "R%u", 12, 1), + (sequence "D%u", 31, 0))>; + +// CSRs that are handled by prologue, epilogue. +def CSR_iOS_CXX_TLS_PE : CalleeSavedRegs<(add LR, R12, R11, R7, R5, R4)>; + +// CSRs that are handled explicitly via copies. +def CSR_iOS_CXX_TLS_ViaCopy : CalleeSavedRegs<(sub CSR_iOS_CXX_TLS, + CSR_iOS_CXX_TLS_PE)>; + +// The "interrupt" attribute is used to generate code that is acceptable in +// exception-handlers of various kinds. It makes us use a different return +// instruction (handled elsewhere) and affects which registers we must return to +// our "caller" in the same state as we receive them. + +// For most interrupts, all registers except SP and LR are shared with +// user-space. We mark LR to be saved anyway, since this is what the ARM backend +// generally does rather than tracking its liveness as a normal register. +def CSR_GenericInt : CalleeSavedRegs<(add LR, (sequence "R%u", 12, 0))>; + +// The fast interrupt handlers have more private state and get their own copies +// of R8-R12, in addition to SP and LR. As before, mark LR for saving too. + +// FIXME: we mark R11 as callee-saved since it's often the frame-pointer, and +// current frame lowering expects to encounter it while processing callee-saved +// registers. +def CSR_FIQ : CalleeSavedRegs<(add LR, R11, (sequence "R%u", 7, 0))>; + + diff --git a/capstone/suite/synctools/tablegen/ARM/ARMInstrFormats.td b/capstone/suite/synctools/tablegen/ARM/ARMInstrFormats.td new file mode 100644 index 000000000..1d3b1414f --- /dev/null +++ b/capstone/suite/synctools/tablegen/ARM/ARMInstrFormats.td @@ -0,0 +1,2620 @@ +//===-- ARMInstrFormats.td - ARM Instruction Formats -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// ARM Instruction Format Definitions. +// + +// Format specifies the encoding used by the instruction. This is part of the +// ad-hoc solution used to emit machine instruction encodings by our machine +// code emitter. +class Format val> { + bits<6> Value = val; +} + +def Pseudo : Format<0>; +def MulFrm : Format<1>; +def BrFrm : Format<2>; +def BrMiscFrm : Format<3>; + +def DPFrm : Format<4>; +def DPSoRegRegFrm : Format<5>; + +def LdFrm : Format<6>; +def StFrm : Format<7>; +def LdMiscFrm : Format<8>; +def StMiscFrm : Format<9>; +def LdStMulFrm : Format<10>; + +def LdStExFrm : Format<11>; + +def ArithMiscFrm : Format<12>; +def SatFrm : Format<13>; +def ExtFrm : Format<14>; + +def VFPUnaryFrm : Format<15>; +def VFPBinaryFrm : Format<16>; +def VFPConv1Frm : Format<17>; +def VFPConv2Frm : Format<18>; +def VFPConv3Frm : Format<19>; +def VFPConv4Frm : Format<20>; +def VFPConv5Frm : Format<21>; +def VFPLdStFrm : Format<22>; +def VFPLdStMulFrm : Format<23>; +def VFPMiscFrm : Format<24>; + +def ThumbFrm : Format<25>; +def MiscFrm : Format<26>; + +def NGetLnFrm : Format<27>; +def NSetLnFrm : Format<28>; +def NDupFrm : Format<29>; +def NLdStFrm : Format<30>; +def N1RegModImmFrm: Format<31>; +def N2RegFrm : Format<32>; +def NVCVTFrm : Format<33>; +def NVDupLnFrm : Format<34>; +def N2RegVShLFrm : Format<35>; +def N2RegVShRFrm : Format<36>; +def N3RegFrm : Format<37>; +def N3RegVShFrm : Format<38>; +def NVExtFrm : Format<39>; +def NVMulSLFrm : Format<40>; +def NVTBLFrm : Format<41>; +def DPSoRegImmFrm : Format<42>; +def N3RegCplxFrm : Format<43>; + +// Misc flags. + +// The instruction has an Rn register operand. +// UnaryDP - Indicates this is a unary data processing instruction, i.e. +// it doesn't have a Rn operand. +class UnaryDP { bit isUnaryDataProc = 1; } + +// Xform16Bit - Indicates this Thumb2 instruction may be transformed into +// a 16-bit Thumb instruction if certain conditions are met. +class Xform16Bit { bit canXformTo16Bit = 1; } + +//===----------------------------------------------------------------------===// +// ARM Instruction flags. These need to match ARMBaseInstrInfo.h. +// + +// FIXME: Once the JIT is MC-ized, these can go away. +// Addressing mode. +class AddrMode val> { + bits<5> Value = val; +} +def AddrModeNone : AddrMode<0>; +def AddrMode1 : AddrMode<1>; +def AddrMode2 : AddrMode<2>; +def AddrMode3 : AddrMode<3>; +def AddrMode4 : AddrMode<4>; +def AddrMode5 : AddrMode<5>; +def AddrMode6 : AddrMode<6>; +def AddrModeT1_1 : AddrMode<7>; +def AddrModeT1_2 : AddrMode<8>; +def AddrModeT1_4 : AddrMode<9>; +def AddrModeT1_s : AddrMode<10>; +def AddrModeT2_i12 : AddrMode<11>; +def AddrModeT2_i8 : AddrMode<12>; +def AddrModeT2_so : AddrMode<13>; +def AddrModeT2_pc : AddrMode<14>; +def AddrModeT2_i8s4 : AddrMode<15>; +def AddrMode_i12 : AddrMode<16>; +def AddrMode5FP16 : AddrMode<17>; +def AddrModeT2_ldrex : AddrMode<18>; + +// Load / store index mode. +class IndexMode val> { + bits<2> Value = val; +} +def IndexModeNone : IndexMode<0>; +def IndexModePre : IndexMode<1>; +def IndexModePost : IndexMode<2>; +def IndexModeUpd : IndexMode<3>; + +// Instruction execution domain. +class Domain val> { + bits<3> Value = val; +} +def GenericDomain : Domain<0>; +def VFPDomain : Domain<1>; // Instructions in VFP domain only +def NeonDomain : Domain<2>; // Instructions in Neon domain only +def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains +def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8 + +//===----------------------------------------------------------------------===// +// ARM special operands. +// + +// ARM imod and iflag operands, used only by the CPS instruction. +def imod_op : Operand { + let PrintMethod = "printCPSIMod"; +} + +def ProcIFlagsOperand : AsmOperandClass { + let Name = "ProcIFlags"; + let ParserMethod = "parseProcIFlagsOperand"; +} +def iflags_op : Operand { + let PrintMethod = "printCPSIFlag"; + let ParserMatchClass = ProcIFlagsOperand; +} + +// ARM Predicate operand. Default to 14 = always (AL). Second part is CC +// register whose default is 0 (no register). +def CondCodeOperand : AsmOperandClass { let Name = "CondCode"; } +def pred : PredicateOperand { + let PrintMethod = "printPredicateOperand"; + let ParserMatchClass = CondCodeOperand; + let DecoderMethod = "DecodePredicateOperand"; +} + +// Selectable predicate operand for CMOV instructions. We can't use a normal +// predicate because the default values interfere with instruction selection. In +// all other respects it is identical though: pseudo-instruction expansion +// relies on the MachineOperands being compatible. +def cmovpred : Operand, PredicateOp, + ComplexPattern { + let MIOperandInfo = (ops i32imm, i32imm); + let PrintMethod = "printPredicateOperand"; +} + +// Conditional code result for instructions whose 's' bit is set, e.g. subs. +def CCOutOperand : AsmOperandClass { let Name = "CCOut"; } +def cc_out : OptionalDefOperand { + let EncoderMethod = "getCCOutOpValue"; + let PrintMethod = "printSBitModifierOperand"; + let ParserMatchClass = CCOutOperand; + let DecoderMethod = "DecodeCCOutOperand"; +} + +// Same as cc_out except it defaults to setting CPSR. +def s_cc_out : OptionalDefOperand { + let EncoderMethod = "getCCOutOpValue"; + let PrintMethod = "printSBitModifierOperand"; + let ParserMatchClass = CCOutOperand; + let DecoderMethod = "DecodeCCOutOperand"; +} + +// ARM special operands for disassembly only. +// +def SetEndAsmOperand : ImmAsmOperand<0,1> { + let Name = "SetEndImm"; + let ParserMethod = "parseSetEndImm"; +} +def setend_op : Operand { + let PrintMethod = "printSetendOperand"; + let ParserMatchClass = SetEndAsmOperand; +} + +def MSRMaskOperand : AsmOperandClass { + let Name = "MSRMask"; + let ParserMethod = "parseMSRMaskOperand"; +} +def msr_mask : Operand { + let PrintMethod = "printMSRMaskOperand"; + let DecoderMethod = "DecodeMSRMask"; + let ParserMatchClass = MSRMaskOperand; +} + +def BankedRegOperand : AsmOperandClass { + let Name = "BankedReg"; + let ParserMethod = "parseBankedRegOperand"; +} +def banked_reg : Operand { + let PrintMethod = "printBankedRegOperand"; + let DecoderMethod = "DecodeBankedReg"; + let ParserMatchClass = BankedRegOperand; +} + +// Shift Right Immediate - A shift right immediate is encoded differently from +// other shift immediates. The imm6 field is encoded like so: +// +// Offset Encoding +// 8 imm6<5:3> = '001', 8 - is encoded in imm6<2:0> +// 16 imm6<5:4> = '01', 16 - is encoded in imm6<3:0> +// 32 imm6<5> = '1', 32 - is encoded in imm6<4:0> +// 64 64 - is encoded in imm6<5:0> +def shr_imm8_asm_operand : ImmAsmOperand<1,8> { let Name = "ShrImm8"; } +def shr_imm8 : Operand, ImmLeaf 0 && Imm <= 8; }]> { + let EncoderMethod = "getShiftRight8Imm"; + let DecoderMethod = "DecodeShiftRight8Imm"; + let ParserMatchClass = shr_imm8_asm_operand; +} +def shr_imm16_asm_operand : ImmAsmOperand<1,16> { let Name = "ShrImm16"; } +def shr_imm16 : Operand, ImmLeaf 0 && Imm <= 16; }]> { + let EncoderMethod = "getShiftRight16Imm"; + let DecoderMethod = "DecodeShiftRight16Imm"; + let ParserMatchClass = shr_imm16_asm_operand; +} +def shr_imm32_asm_operand : ImmAsmOperand<1,32> { let Name = "ShrImm32"; } +def shr_imm32 : Operand, ImmLeaf 0 && Imm <= 32; }]> { + let EncoderMethod = "getShiftRight32Imm"; + let DecoderMethod = "DecodeShiftRight32Imm"; + let ParserMatchClass = shr_imm32_asm_operand; +} +def shr_imm64_asm_operand : ImmAsmOperand<1,64> { let Name = "ShrImm64"; } +def shr_imm64 : Operand, ImmLeaf 0 && Imm <= 64; }]> { + let EncoderMethod = "getShiftRight64Imm"; + let DecoderMethod = "DecodeShiftRight64Imm"; + let ParserMatchClass = shr_imm64_asm_operand; +} + + +// ARM Assembler operand for ldr Rd, =expression which generates an offset +// to a constant pool entry or a MOV depending on the value of expression +def const_pool_asm_operand : AsmOperandClass { let Name = "ConstPoolAsmImm"; } +def const_pool_asm_imm : Operand { + let ParserMatchClass = const_pool_asm_operand; +} + + +//===----------------------------------------------------------------------===// +// ARM Assembler alias templates. +// +// Note: When EmitPriority == 1, the alias will be used for printing +class ARMInstAlias + : InstAlias, Requires<[IsARM]>; +class ARMInstSubst + : InstAlias, + Requires<[IsARM,UseNegativeImmediates]>; +class tInstAlias + : InstAlias, Requires<[IsThumb]>; +class tInstSubst + : InstAlias, + Requires<[IsThumb,UseNegativeImmediates]>; +class t2InstAlias + : InstAlias, Requires<[IsThumb2]>; +class t2InstSubst + : InstAlias, + Requires<[IsThumb2,UseNegativeImmediates]>; +class VFP2InstAlias + : InstAlias, Requires<[HasVFP2]>; +class VFP2DPInstAlias + : InstAlias, Requires<[HasVFP2,HasDPVFP]>; +class VFP3InstAlias + : InstAlias, Requires<[HasVFP3]>; +class NEONInstAlias + : InstAlias, Requires<[HasNEON]>; + + +class VFP2MnemonicAlias : MnemonicAlias, + Requires<[HasVFP2]>; +class NEONMnemonicAlias : MnemonicAlias, + Requires<[HasNEON]>; + +//===----------------------------------------------------------------------===// +// ARM Instruction templates. +// + + +class InstTemplate + : Instruction { + let Namespace = "ARM"; + + AddrMode AM = am; + int Size = sz; + IndexMode IM = im; + bits<2> IndexModeBits = IM.Value; + Format F = f; + bits<6> Form = F.Value; + Domain D = d; + bit isUnaryDataProc = 0; + bit canXformTo16Bit = 0; + // The instruction is a 16-bit flag setting Thumb instruction. Used + // by the parser to determine whether to require the 'S' suffix on the + // mnemonic (when not in an IT block) or preclude it (when in an IT block). + bit thumbArithFlagSetting = 0; + + // If this is a pseudo instruction, mark it isCodeGenOnly. + let isCodeGenOnly = !eq(!cast(f), "Pseudo"); + + // The layout of TSFlags should be kept in sync with ARMBaseInfo.h. + let TSFlags{4-0} = AM.Value; + let TSFlags{6-5} = IndexModeBits; + let TSFlags{12-7} = Form; + let TSFlags{13} = isUnaryDataProc; + let TSFlags{14} = canXformTo16Bit; + let TSFlags{17-15} = D.Value; + let TSFlags{18} = thumbArithFlagSetting; + + let Constraints = cstr; + let Itinerary = itin; +} + +class Encoding { + field bits<32> Inst; + // Mask of bits that cause an encoding to be UNPREDICTABLE. + // If a bit is set, then if the corresponding bit in the + // target encoding differs from its value in the "Inst" field, + // the instruction is UNPREDICTABLE (SoftFail in abstract parlance). + field bits<32> Unpredictable = 0; + // SoftFail is the generic name for this field, but we alias it so + // as to make it more obvious what it means in ARM-land. + field bits<32> SoftFail = Unpredictable; +} + +class InstARM + : InstTemplate, Encoding { + let DecoderNamespace = "ARM"; +} + +// This Encoding-less class is used by Thumb1 to specify the encoding bits later +// on by adding flavors to specific instructions. +class InstThumb + : InstTemplate { + let DecoderNamespace = "Thumb"; +} + +// Pseudo-instructions for alternate assembly syntax (never used by codegen). +// These are aliases that require C++ handling to convert to the target +// instruction, while InstAliases can be handled directly by tblgen. +class AsmPseudoInst + : InstTemplate { + let OutOperandList = oops; + let InOperandList = iops; + let Pattern = []; + let isCodeGenOnly = 0; // So we get asm matcher for it. + let AsmString = asm; + let isPseudo = 1; +} + +class ARMAsmPseudo + : AsmPseudoInst, Requires<[IsARM]>; +class tAsmPseudo + : AsmPseudoInst, Requires<[IsThumb]>; +class t2AsmPseudo + : AsmPseudoInst, Requires<[IsThumb2]>; +class VFP2AsmPseudo + : AsmPseudoInst, Requires<[HasVFP2]>; +class NEONAsmPseudo + : AsmPseudoInst, Requires<[HasNEON]>; + +// Pseudo instructions for the code generator. +class PseudoInst pattern> + : InstTemplate { + let OutOperandList = oops; + let InOperandList = iops; + let Pattern = pattern; + let isCodeGenOnly = 1; + let isPseudo = 1; +} + +// PseudoInst that's ARM-mode only. +class ARMPseudoInst pattern> + : PseudoInst { + let Size = sz; + list Predicates = [IsARM]; +} + +// PseudoInst that's Thumb-mode only. +class tPseudoInst pattern> + : PseudoInst { + let Size = sz; + list Predicates = [IsThumb]; +} + +// PseudoInst that's in ARMv8-M baseline (Somewhere between Thumb and Thumb2) +class t2basePseudoInst pattern> + : PseudoInst { + let Size = sz; + list Predicates = [IsThumb,HasV8MBaseline]; +} + +// PseudoInst that's Thumb2-mode only. +class t2PseudoInst pattern> + : PseudoInst { + let Size = sz; + list Predicates = [IsThumb2]; +} + +class ARMPseudoExpand pattern, + dag Result> + : ARMPseudoInst, + PseudoInstExpansion; + +class tPseudoExpand pattern, + dag Result> + : tPseudoInst, + PseudoInstExpansion; + +class t2PseudoExpand pattern, + dag Result> + : t2PseudoInst, + PseudoInstExpansion; + +// Almost all ARM instructions are predicable. +class I pattern> + : InstARM { + bits<4> p; + let Inst{31-28} = p; + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + let AsmString = !strconcat(opc, "${p}", asm); + let Pattern = pattern; + list Predicates = [IsARM]; +} + +// A few are not predicable +class InoP pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = !strconcat(opc, asm); + let Pattern = pattern; + let isPredicable = 0; + list Predicates = [IsARM]; +} + +// Same as I except it can optionally modify CPSR. Note it's modeled as an input +// operand since by default it's a zero register. It will become an implicit def +// once it's "flipped". +class sI pattern> + : InstARM { + bits<4> p; // Predicate operand + bits<1> s; // condition-code set flag ('1' if the insn should set the flags) + let Inst{31-28} = p; + let Inst{20} = s; + + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); + let AsmString = !strconcat(opc, "${s}${p}", asm); + let Pattern = pattern; + list Predicates = [IsARM]; +} + +// Special cases +class XI pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = asm; + let Pattern = pattern; + list Predicates = [IsARM]; +} + +class AI pattern> + : I; +class AsI pattern> + : sI; +class AXI pattern> + : XI; +class AXIM pattern> + : XI; +class AInoP pattern> + : InoP; + +// Ctrl flow instructions +class ABI opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : I { + let Inst{27-24} = opcod; +} +class ABXI opcod, dag oops, dag iops, InstrItinClass itin, + string asm, list pattern> + : XI { + let Inst{27-24} = opcod; +} + +// BR_JT instructions +class JTI pattern> + : XI; + +class AIldr_ex_or_acq opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : I { + bits<4> Rt; + bits<4> addr; + let Inst{27-23} = 0b00011; + let Inst{22-21} = opcod; + let Inst{20} = 1; + let Inst{19-16} = addr; + let Inst{15-12} = Rt; + let Inst{11-10} = 0b11; + let Inst{9-8} = opcod2; + let Inst{7-0} = 0b10011111; +} +class AIstr_ex_or_rel opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : I { + bits<4> Rt; + bits<4> addr; + let Inst{27-23} = 0b00011; + let Inst{22-21} = opcod; + let Inst{20} = 0; + let Inst{19-16} = addr; + let Inst{11-10} = 0b11; + let Inst{9-8} = opcod2; + let Inst{7-4} = 0b1001; + let Inst{3-0} = Rt; +} +// Atomic load/store instructions +class AIldrex opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIldr_ex_or_acq; + +class AIstrex opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIstr_ex_or_rel { + bits<4> Rd; + let Inst{15-12} = Rd; +} + +// Exclusive load/store instructions + +class AIldaex opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIldr_ex_or_acq, + Requires<[IsARM, HasAcquireRelease, HasV7Clrex]>; + +class AIstlex opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIstr_ex_or_rel, + Requires<[IsARM, HasAcquireRelease, HasV7Clrex]> { + bits<4> Rd; + let Inst{15-12} = Rd; +} + +class AIswp pattern> + : AI { + bits<4> Rt; + bits<4> Rt2; + bits<4> addr; + let Inst{27-23} = 0b00010; + let Inst{22} = b; + let Inst{21-20} = 0b00; + let Inst{19-16} = addr; + let Inst{15-12} = Rt; + let Inst{11-4} = 0b00001001; + let Inst{3-0} = Rt2; + + let Unpredictable{11-8} = 0b1111; + let DecoderMethod = "DecodeSwap"; +} +// Acquire/Release load/store instructions +class AIldracq opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIldr_ex_or_acq, + Requires<[IsARM, HasAcquireRelease]>; + +class AIstrrel opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIstr_ex_or_rel, + Requires<[IsARM, HasAcquireRelease]> { + let Inst{15-12} = 0b1111; +} + +// addrmode1 instructions +class AI1 opcod, dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, list pattern> + : I { + let Inst{24-21} = opcod; + let Inst{27-26} = 0b00; +} +class AsI1 opcod, dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, list pattern> + : sI { + let Inst{24-21} = opcod; + let Inst{27-26} = 0b00; +} +class AXI1 opcod, dag oops, dag iops, Format f, InstrItinClass itin, + string asm, list pattern> + : XI { + let Inst{24-21} = opcod; + let Inst{27-26} = 0b00; +} + +// loads + +// LDR/LDRB/STR/STRB/... +class AI2ldst op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am, + Format f, InstrItinClass itin, string opc, string asm, + list pattern> + : I { + let Inst{27-25} = op; + let Inst{24} = 1; // 24 == P + // 23 == U + let Inst{22} = isByte; + let Inst{21} = 0; // 21 == W + let Inst{20} = isLd; +} +// Indexed load/stores +class AI2ldstidx pattern> + : I { + bits<4> Rt; + let Inst{27-26} = 0b01; + let Inst{24} = isPre; // P bit + let Inst{22} = isByte; // B bit + let Inst{21} = isPre; // W bit + let Inst{20} = isLd; // L bit + let Inst{15-12} = Rt; +} +class AI2stridx_reg pattern> + : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr, + pattern> { + // AM2 store w/ two operands: (GPR, am2offset) + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> Rn; + let Inst{25} = 1; + let Inst{23} = offset{12}; + let Inst{19-16} = Rn; + let Inst{11-5} = offset{11-5}; + let Inst{4} = 0; + let Inst{3-0} = offset{3-0}; +} + +class AI2stridx_imm pattern> + : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr, + pattern> { + // AM2 store w/ two operands: (GPR, am2offset) + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> Rn; + let Inst{25} = 0; + let Inst{23} = offset{12}; + let Inst{19-16} = Rn; + let Inst{11-0} = offset{11-0}; +} + + +// FIXME: Merge with the above class when addrmode2 gets used for STR, STRB +// but for now use this class for STRT and STRBT. +class AI2stridxT pattern> + : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr, + pattern> { + // AM2 store w/ two operands: (GPR, am2offset) + // {17-14} Rn + // {13} 1 == Rm, 0 == imm12 + // {12} isAdd + // {11-0} imm12/Rm + bits<18> addr; + let Inst{25} = addr{13}; + let Inst{23} = addr{12}; + let Inst{19-16} = addr{17-14}; + let Inst{11-0} = addr{11-0}; +} + +// addrmode3 instructions +class AI3ld op, bit op20, dag oops, dag iops, Format f, + InstrItinClass itin, string opc, string asm, list pattern> + : I { + bits<14> addr; + bits<4> Rt; + let Inst{27-25} = 0b000; + let Inst{24} = 1; // P bit + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{21} = 0; // W bit + let Inst{20} = op20; // L bit + let Inst{19-16} = addr{12-9}; // Rn + let Inst{15-12} = Rt; // Rt + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{7-4} = op; + let Inst{3-0} = addr{3-0}; // imm3_0/Rm + + let DecoderMethod = "DecodeAddrMode3Instruction"; +} + +class AI3ldstidx op, bit op20, bit isPre, dag oops, dag iops, + IndexMode im, Format f, InstrItinClass itin, string opc, + string asm, string cstr, list pattern> + : I { + bits<4> Rt; + let Inst{27-25} = 0b000; + let Inst{24} = isPre; // P bit + let Inst{21} = isPre; // W bit + let Inst{20} = op20; // L bit + let Inst{15-12} = Rt; // Rt + let Inst{7-4} = op; +} + +// FIXME: Merge with the above class when addrmode2 gets used for LDR, LDRB +// but for now use this class for LDRSBT, LDRHT, LDSHT. +class AI3ldstidxT op, bit isLoad, dag oops, dag iops, + IndexMode im, Format f, InstrItinClass itin, string opc, + string asm, string cstr, list pattern> + : I { + // {13} 1 == imm8, 0 == Rm + // {12-9} Rn + // {8} isAdd + // {7-4} imm7_4/zero + // {3-0} imm3_0/Rm + bits<4> addr; + bits<4> Rt; + let Inst{27-25} = 0b000; + let Inst{24} = 0; // P bit + let Inst{21} = 1; + let Inst{20} = isLoad; // L bit + let Inst{19-16} = addr; // Rn + let Inst{15-12} = Rt; // Rt + let Inst{7-4} = op; +} + +// stores +class AI3str op, dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, list pattern> + : I { + bits<14> addr; + bits<4> Rt; + let Inst{27-25} = 0b000; + let Inst{24} = 1; // P bit + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{21} = 0; // W bit + let Inst{20} = 0; // L bit + let Inst{19-16} = addr{12-9}; // Rn + let Inst{15-12} = Rt; // Rt + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{7-4} = op; + let Inst{3-0} = addr{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; +} + +// addrmode4 instructions +class AXI4 pattern> + : XI { + bits<4> p; + bits<16> regs; + bits<4> Rn; + let Inst{31-28} = p; + let Inst{27-25} = 0b100; + let Inst{22} = 0; // S bit + let Inst{19-16} = Rn; + let Inst{15-0} = regs; +} + +// Unsigned multiply, multiply-accumulate instructions. +class AMul1I opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : I { + let Inst{7-4} = 0b1001; + let Inst{20} = 0; // S bit + let Inst{27-21} = opcod; +} +class AsMul1I opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : sI { + let Inst{7-4} = 0b1001; + let Inst{27-21} = opcod; +} + +// Most significant word multiply +class AMul2I opcod, bits<4> opc7_4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : I { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{7-4} = opc7_4; + let Inst{20} = 1; + let Inst{27-21} = opcod; + let Inst{19-16} = Rd; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} +// MSW multiple w/ Ra operand +class AMul2Ia opcod, bits<4> opc7_4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : AMul2I { + bits<4> Ra; + let Inst{15-12} = Ra; +} + +// SMUL / SMULW / SMLA / SMLAW +class AMulxyIbase opcod, bits<2> bit6_5, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : I { + bits<4> Rn; + bits<4> Rm; + let Inst{4} = 0; + let Inst{7} = 1; + let Inst{20} = 0; + let Inst{27-21} = opcod; + let Inst{6-5} = bit6_5; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} +class AMulxyI opcod, bits<2> bit6_5, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : AMulxyIbase { + bits<4> Rd; + let Inst{19-16} = Rd; +} + +// AMulxyI with Ra operand +class AMulxyIa opcod, bits<2> bit6_5, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : AMulxyI { + bits<4> Ra; + let Inst{15-12} = Ra; +} +// SMLAL* +class AMulxyI64 opcod, bits<2> bit6_5, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : AMulxyIbase { + bits<4> RdLo; + bits<4> RdHi; + let Inst{19-16} = RdHi; + let Inst{15-12} = RdLo; +} + +// Extend instructions. +class AExtI opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : I { + // All AExtI instructions have Rd and Rm register operands. + bits<4> Rd; + bits<4> Rm; + let Inst{15-12} = Rd; + let Inst{3-0} = Rm; + let Inst{7-4} = 0b0111; + let Inst{9-8} = 0b00; + let Inst{27-20} = opcod; + + let Unpredictable{9-8} = 0b11; +} + +// Misc Arithmetic instructions. +class AMiscA1I opcod, bits<4> opc7_4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : I { + bits<4> Rd; + bits<4> Rm; + let Inst{27-20} = opcod; + let Inst{19-16} = 0b1111; + let Inst{15-12} = Rd; + let Inst{11-8} = 0b1111; + let Inst{7-4} = opc7_4; + let Inst{3-0} = Rm; +} + +// Division instructions. +class ADivA1I opcod, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : I { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{27-23} = 0b01110; + let Inst{22-20} = opcod; + let Inst{19-16} = Rd; + let Inst{15-12} = 0b1111; + let Inst{11-8} = Rm; + let Inst{7-4} = 0b0001; + let Inst{3-0} = Rn; +} + +// PKH instructions +def PKHLSLAsmOperand : ImmAsmOperand<0,31> { + let Name = "PKHLSLImm"; + let ParserMethod = "parsePKHLSLImm"; +} +def pkh_lsl_amt: Operand, ImmLeaf= 0 && Imm < 32; }]>{ + let PrintMethod = "printPKHLSLShiftImm"; + let ParserMatchClass = PKHLSLAsmOperand; +} +def PKHASRAsmOperand : AsmOperandClass { + let Name = "PKHASRImm"; + let ParserMethod = "parsePKHASRImm"; +} +def pkh_asr_amt: Operand, ImmLeaf 0 && Imm <= 32; }]>{ + let PrintMethod = "printPKHASRShiftImm"; + let ParserMatchClass = PKHASRAsmOperand; +} + +class APKHI opcod, bit tb, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : I { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + bits<5> sh; + let Inst{27-20} = opcod; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-7} = sh; + let Inst{6} = tb; + let Inst{5-4} = 0b01; + let Inst{3-0} = Rm; +} + +//===----------------------------------------------------------------------===// + +// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode. +class ARMPat : Pat { + list Predicates = [IsARM]; +} +class ARMV5TPat : Pat { + list Predicates = [IsARM, HasV5T]; +} +class ARMV5TEPat : Pat { + list Predicates = [IsARM, HasV5TE]; +} +// ARMV5MOPat - Same as ARMV5TEPat with UseMulOps. +class ARMV5MOPat : Pat { + list Predicates = [IsARM, HasV5TE, UseMulOps]; +} +class ARMV6Pat : Pat { + list Predicates = [IsARM, HasV6]; +} +class VFPPat : Pat { + list Predicates = [HasVFP2]; +} +class VFPNoNEONPat : Pat { + list Predicates = [HasVFP2, DontUseNEONForFP]; +} +class Thumb2DSPPat : Pat { + list Predicates = [IsThumb2, HasDSP]; +} +class Thumb2DSPMulPat : Pat { + list Predicates = [IsThumb2, UseMulOps, HasDSP]; +} +class FP16Pat : Pat { + list Predicates = [HasFP16]; +} +class FullFP16Pat : Pat { + list Predicates = [HasFullFP16]; +} +//===----------------------------------------------------------------------===// +// Thumb Instruction Format Definitions. +// + +class ThumbI pattern> + : InstThumb { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = asm; + let Pattern = pattern; + list Predicates = [IsThumb]; +} + +// TI - Thumb instruction. +class TI pattern> + : ThumbI; + +// Two-address instructions +class TIt pattern> + : ThumbI; + +// tBL, tBX 32-bit instructions +class TIx2 opcod1, bits<2> opcod2, bit opcod3, + dag oops, dag iops, InstrItinClass itin, string asm, + list pattern> + : ThumbI, + Encoding { + let Inst{31-27} = opcod1; + let Inst{15-14} = opcod2; + let Inst{12} = opcod3; +} + +// BR_JT instructions +class TJTI pattern> + : ThumbI; + +// Thumb1 only +class Thumb1I pattern> + : InstThumb { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = asm; + let Pattern = pattern; + list Predicates = [IsThumb, IsThumb1Only]; +} + +class T1I pattern> + : Thumb1I; +class T1Ix2 pattern> + : Thumb1I; + +// Two-address instructions +class T1It pattern> + : Thumb1I; + +// Thumb1 instruction that can either be predicated or set CPSR. +class Thumb1sI pattern> + : InstThumb { + let OutOperandList = !con(oops, (outs s_cc_out:$s)); + let InOperandList = !con(iops, (ins pred:$p)); + let AsmString = !strconcat(opc, "${s}${p}", asm); + let Pattern = pattern; + let thumbArithFlagSetting = 1; + list Predicates = [IsThumb, IsThumb1Only]; + let DecoderNamespace = "ThumbSBit"; +} + +class T1sI pattern> + : Thumb1sI; + +// Two-address instructions +class T1sIt pattern> + : Thumb1sI; + +// Thumb1 instruction that can be predicated. +class Thumb1pI pattern> + : InstThumb { + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + let AsmString = !strconcat(opc, "${p}", asm); + let Pattern = pattern; + list Predicates = [IsThumb, IsThumb1Only]; +} + +class T1pI pattern> + : Thumb1pI; + +// Two-address instructions +class T1pIt pattern> + : Thumb1pI; + +class T1pIs pattern> + : Thumb1pI; + +class Encoding16 : Encoding { + let Inst{31-16} = 0x0000; +} + +// A6.2 16-bit Thumb instruction encoding +class T1Encoding opcode> : Encoding16 { + let Inst{15-10} = opcode; +} + +// A6.2.1 Shift (immediate), add, subtract, move, and compare encoding. +class T1General opcode> : Encoding16 { + let Inst{15-14} = 0b00; + let Inst{13-9} = opcode; +} + +// A6.2.2 Data-processing encoding. +class T1DataProcessing opcode> : Encoding16 { + let Inst{15-10} = 0b010000; + let Inst{9-6} = opcode; +} + +// A6.2.3 Special data instructions and branch and exchange encoding. +class T1Special opcode> : Encoding16 { + let Inst{15-10} = 0b010001; + let Inst{9-6} = opcode; +} + +// A6.2.4 Load/store single data item encoding. +class T1LoadStore opA, bits<3> opB> : Encoding16 { + let Inst{15-12} = opA; + let Inst{11-9} = opB; +} +class T1LdStSP opB> : T1LoadStore<0b1001, opB>; // SP relative + +class T1BranchCond opcode> : Encoding16 { + let Inst{15-12} = opcode; +} + +// Helper classes to encode Thumb1 loads and stores. For immediates, the +// following bits are used for "opA" (see A6.2.4): +// +// 0b0110 => Immediate, 4 bytes +// 0b1000 => Immediate, 2 bytes +// 0b0111 => Immediate, 1 byte +class T1pILdStEncode opcode, dag oops, dag iops, AddrMode am, + InstrItinClass itin, string opc, string asm, + list pattern> + : Thumb1pI, + T1LoadStore<0b0101, opcode> { + bits<3> Rt; + bits<8> addr; + let Inst{8-6} = addr{5-3}; // Rm + let Inst{5-3} = addr{2-0}; // Rn + let Inst{2-0} = Rt; +} +class T1pILdStEncodeImm opA, bit opB, dag oops, dag iops, AddrMode am, + InstrItinClass itin, string opc, string asm, + list pattern> + : Thumb1pI, + T1LoadStore { + bits<3> Rt; + bits<8> addr; + let Inst{10-6} = addr{7-3}; // imm5 + let Inst{5-3} = addr{2-0}; // Rn + let Inst{2-0} = Rt; +} + +// A6.2.5 Miscellaneous 16-bit instructions encoding. +class T1Misc opcode> : Encoding16 { + let Inst{15-12} = 0b1011; + let Inst{11-5} = opcode; +} + +// Thumb2I - Thumb2 instruction. Almost all Thumb2 instructions are predicable. +class Thumb2I pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + let AsmString = !strconcat(opc, "${p}", asm); + let Pattern = pattern; + list Predicates = [IsThumb2]; + let DecoderNamespace = "Thumb2"; +} + +// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an +// input operand since by default it's a zero register. It will become an +// implicit def once it's "flipped". +// +// FIXME: This uses unified syntax so {s} comes before {p}. We should make it +// more consistent. +class Thumb2sI pattern> + : InstARM { + bits<1> s; // condition-code set flag ('1' if the insn should set the flags) + let Inst{20} = s; + + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); + let AsmString = !strconcat(opc, "${s}${p}", asm); + let Pattern = pattern; + list Predicates = [IsThumb2]; + let DecoderNamespace = "Thumb2"; +} + +// Special cases +class Thumb2XI pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = asm; + let Pattern = pattern; + list Predicates = [IsThumb2]; + let DecoderNamespace = "Thumb2"; +} + +class ThumbXI pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = asm; + let Pattern = pattern; + list Predicates = [IsThumb, IsThumb1Only]; + let DecoderNamespace = "Thumb"; +} + +class T2I pattern> + : Thumb2I; +class T2Ii12 pattern> + : Thumb2I; +class T2Ii8 pattern> + : Thumb2I; +class T2Iso pattern> + : Thumb2I; +class T2Ipc pattern> + : Thumb2I; +class T2Ii8s4 pattern> + : Thumb2I { + bits<4> Rt; + bits<4> Rt2; + bits<13> addr; + let Inst{31-25} = 0b1110100; + let Inst{24} = P; + let Inst{23} = addr{8}; + let Inst{22} = 1; + let Inst{21} = W; + let Inst{20} = isLoad; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = Rt{3-0}; + let Inst{11-8} = Rt2{3-0}; + let Inst{7-0} = addr{7-0}; +} +class T2Ii8s4post pattern> + : Thumb2I { + bits<4> Rt; + bits<4> Rt2; + bits<4> addr; + bits<9> imm; + let Inst{31-25} = 0b1110100; + let Inst{24} = P; + let Inst{23} = imm{8}; + let Inst{22} = 1; + let Inst{21} = W; + let Inst{20} = isLoad; + let Inst{19-16} = addr; + let Inst{15-12} = Rt{3-0}; + let Inst{11-8} = Rt2{3-0}; + let Inst{7-0} = imm{7-0}; +} + +class T2sI pattern> + : Thumb2sI; + +class T2XI pattern> + : Thumb2XI; +class T2JTI pattern> + : Thumb2XI; + +// Move to/from coprocessor instructions +class T2Cop opc, dag oops, dag iops, string opcstr, string asm, + list pattern> + : T2I , Requires<[IsThumb2]> { + let Inst{31-28} = opc; +} + +// Two-address instructions +class T2XIt pattern> + : Thumb2XI; + +// T2Ipreldst - Thumb2 pre-indexed load / store instructions. +class T2Ipreldst opcod, bit load, bit pre, + dag oops, dag iops, + AddrMode am, IndexMode im, InstrItinClass itin, + string opc, string asm, string cstr, list pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + let AsmString = !strconcat(opc, "${p}", asm); + let Pattern = pattern; + list Predicates = [IsThumb2]; + let DecoderNamespace = "Thumb2"; + + bits<4> Rt; + bits<13> addr; + let Inst{31-27} = 0b11111; + let Inst{26-25} = 0b00; + let Inst{24} = signed; + let Inst{23} = 0; + let Inst{22-21} = opcod; + let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = Rt{3-0}; + let Inst{11} = 1; + // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed + let Inst{10} = pre; // The P bit. + let Inst{9} = addr{8}; // Sign bit + let Inst{8} = 1; // The W bit. + let Inst{7-0} = addr{7-0}; + + let DecoderMethod = "DecodeT2LdStPre"; +} + +// T2Ipostldst - Thumb2 post-indexed load / store instructions. +class T2Ipostldst opcod, bit load, bit pre, + dag oops, dag iops, + AddrMode am, IndexMode im, InstrItinClass itin, + string opc, string asm, string cstr, list pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + let AsmString = !strconcat(opc, "${p}", asm); + let Pattern = pattern; + list Predicates = [IsThumb2]; + let DecoderNamespace = "Thumb2"; + + bits<4> Rt; + bits<4> Rn; + bits<9> offset; + let Inst{31-27} = 0b11111; + let Inst{26-25} = 0b00; + let Inst{24} = signed; + let Inst{23} = 0; + let Inst{22-21} = opcod; + let Inst{20} = load; + let Inst{19-16} = Rn; + let Inst{15-12} = Rt{3-0}; + let Inst{11} = 1; + // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed + let Inst{10} = pre; // The P bit. + let Inst{9} = offset{8}; // Sign bit + let Inst{8} = 1; // The W bit. + let Inst{7-0} = offset{7-0}; + + let DecoderMethod = "DecodeT2LdStPre"; +} + +// T1Pat - Same as Pat<>, but requires that the compiler be in Thumb1 mode. +class T1Pat : Pat { + list Predicates = [IsThumb, IsThumb1Only]; +} + +// T2v6Pat - Same as Pat<>, but requires V6T2 Thumb2 mode. +class T2v6Pat : Pat { + list Predicates = [IsThumb2, HasV6T2]; +} + +// T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode. +class T2Pat : Pat { + list Predicates = [IsThumb2]; +} + +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ARM VFP Instruction templates. +// + +// Almost all VFP instructions are predicable. +class VFPI pattern> + : InstARM { + bits<4> p; + let Inst{31-28} = p; + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + let AsmString = !strconcat(opc, "${p}", asm); + let Pattern = pattern; + let PostEncoderMethod = "VFPThumb2PostEncoder"; + let DecoderNamespace = "VFP"; + list Predicates = [HasVFP2]; +} + +// Special cases +class VFPXI pattern> + : InstARM { + bits<4> p; + let Inst{31-28} = p; + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = asm; + let Pattern = pattern; + let PostEncoderMethod = "VFPThumb2PostEncoder"; + let DecoderNamespace = "VFP"; + list Predicates = [HasVFP2]; +} + +class VFPAI pattern> + : VFPI { + let PostEncoderMethod = "VFPThumb2PostEncoder"; +} + +// ARM VFP addrmode5 loads and stores +class ADI5 opcod1, bits<2> opcod2, dag oops, dag iops, + InstrItinClass itin, + string opc, string asm, list pattern> + : VFPI { + // Instruction operands. + bits<5> Dd; + bits<13> addr; + + // Encode instruction operands. + let Inst{23} = addr{8}; // U (add = (U == '1')) + let Inst{22} = Dd{4}; + let Inst{19-16} = addr{12-9}; // Rn + let Inst{15-12} = Dd{3-0}; + let Inst{7-0} = addr{7-0}; // imm8 + + let Inst{27-24} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // Double precision + + // Loads & stores operate on both NEON and VFP pipelines. + let D = VFPNeonDomain; +} + +class ASI5 opcod1, bits<2> opcod2, dag oops, dag iops, + InstrItinClass itin, + string opc, string asm, list pattern> + : VFPI { + // Instruction operands. + bits<5> Sd; + bits<13> addr; + + // Encode instruction operands. + let Inst{23} = addr{8}; // U (add = (U == '1')) + let Inst{22} = Sd{0}; + let Inst{19-16} = addr{12-9}; // Rn + let Inst{15-12} = Sd{4-1}; + let Inst{7-0} = addr{7-0}; // imm8 + + let Inst{27-24} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision + + // Loads & stores operate on both NEON and VFP pipelines. + let D = VFPNeonDomain; +} + +class AHI5 opcod1, bits<2> opcod2, dag oops, dag iops, + InstrItinClass itin, + string opc, string asm, list pattern> + : VFPI { + list Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<13> addr; + + // Encode instruction operands. + let Inst{23} = addr{8}; // U (add = (U == '1')) + let Inst{22} = Sd{0}; + let Inst{19-16} = addr{12-9}; // Rn + let Inst{15-12} = Sd{4-1}; + let Inst{7-0} = addr{7-0}; // imm8 + + let Inst{27-24} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1001; // Half precision + + // Loads & stores operate on both NEON and VFP pipelines. + let D = VFPNeonDomain; +} + +// VFP Load / store multiple pseudo instructions. +class PseudoVFPLdStM pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + let Pattern = pattern; + list Predicates = [HasVFP2]; +} + +// Load / store multiple + +// Unknown precision +class AXXI4 pattern> + : VFPXI { + // Instruction operands. + bits<4> Rn; + bits<13> regs; + + // Encode instruction operands. + let Inst{19-16} = Rn; + let Inst{22} = 0; + let Inst{15-12} = regs{11-8}; + let Inst{7-1} = regs{7-1}; + + let Inst{27-25} = 0b110; + let Inst{11-8} = 0b1011; + let Inst{0} = 1; +} + +// Double precision +class AXDI4 pattern> + : VFPXI { + // Instruction operands. + bits<4> Rn; + bits<13> regs; + + // Encode instruction operands. + let Inst{19-16} = Rn; + let Inst{22} = regs{12}; + let Inst{15-12} = regs{11-8}; + let Inst{7-1} = regs{7-1}; + + let Inst{27-25} = 0b110; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // Double precision + let Inst{0} = 0; +} + +// Single Precision +class AXSI4 pattern> + : VFPXI { + // Instruction operands. + bits<4> Rn; + bits<13> regs; + + // Encode instruction operands. + let Inst{19-16} = Rn; + let Inst{22} = regs{8}; + let Inst{15-12} = regs{12-9}; + let Inst{7-0} = regs{7-0}; + + let Inst{27-25} = 0b110; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision +} + +// Double precision, unary +class ADuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, + string asm, list pattern> + : VFPAI { + // Instruction operands. + bits<5> Dd; + bits<5> Dm; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // Double precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; + + let Predicates = [HasVFP2, HasDPVFP]; +} + +// Double precision, unary, not-predicated +class ADuInp opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, + string asm, list pattern> + : VFPXI { + // Instruction operands. + bits<5> Dd; + bits<5> Dm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // Double precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; +} + +// Double precision, binary +class ADbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, + dag iops, InstrItinClass itin, string opc, string asm, + list pattern> + : VFPAI { + // Instruction operands. + bits<5> Dd; + bits<5> Dn; + bits<5> Dm; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{19-16} = Dn{3-0}; + let Inst{7} = Dn{4}; + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // Double precision + let Inst{6} = op6; + let Inst{4} = op4; + + let Predicates = [HasVFP2, HasDPVFP]; +} + +// FP, binary, not predicated +class ADbInp opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, + InstrItinClass itin, string asm, list pattern> + : VFPXI +{ + // Instruction operands. + bits<5> Dd; + bits<5> Dn; + bits<5> Dm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{19-16} = Dn{3-0}; + let Inst{7} = Dn{4}; + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // double precision + let Inst{6} = opcod3; + let Inst{4} = 0; + + let Predicates = [HasVFP2, HasDPVFP]; +} + +// Single precision, unary, predicated +class ASuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, + string asm, list pattern> + : VFPAI { + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; +} + +// Single precision, unary, non-predicated +class ASuInp opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, + string asm, list pattern> + : VFPXI { + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; +} + +// Single precision unary, if no NEON. Same as ASuI except not available if +// NEON is enabled. +class ASuIn opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, + string asm, list pattern> + : ASuI { + list Predicates = [HasVFP2,DontUseNEONForFP]; +} + +// Single precision, binary +class ASbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : VFPAI { + // Instruction operands. + bits<5> Sd; + bits<5> Sn; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision + let Inst{6} = op6; + let Inst{4} = op4; +} + +// Single precision, binary, not predicated +class ASbInp opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, + InstrItinClass itin, string asm, list pattern> + : VFPXI +{ + // Instruction operands. + bits<5> Sd; + bits<5> Sn; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision + let Inst{6} = opcod3; + let Inst{4} = 0; +} + +// Single precision binary, if no NEON. Same as ASbI except not available if +// NEON is enabled. +class ASbIn opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, + dag iops, InstrItinClass itin, string opc, string asm, + list pattern> + : ASbI { + list Predicates = [HasVFP2,DontUseNEONForFP]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sn; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; +} + +// Half precision, unary, predicated +class AHuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, + string asm, list pattern> + : VFPAI { + list Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-8} = 0b1001; // Half precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; +} + +// Half precision, unary, non-predicated +class AHuInp opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, + string asm, list pattern> + : VFPXI { + list Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-8} = 0b1001; // Half precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; +} + +// Half precision, binary +class AHbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : VFPAI { + list Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sn; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1001; // Half precision + let Inst{6} = op6; + let Inst{4} = op4; +} + +// Half precision, binary, not predicated +class AHbInp opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, + InstrItinClass itin, string asm, list pattern> + : VFPXI { + list Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sn; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1001; // Half precision + let Inst{6} = opcod3; + let Inst{4} = 0; +} + +// VFP conversion instructions +class AVConv1I opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, + dag oops, dag iops, InstrItinClass itin, string opc, string asm, + list pattern> + : VFPAI { + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-8} = opcod4; + let Inst{6} = 1; + let Inst{4} = 0; +} + +// VFP conversion between floating-point and fixed-point +class AVConv1XI op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, + dag oops, dag iops, InstrItinClass itin, string opc, string asm, + list pattern> + : AVConv1I { + bits<5> fbits; + // size (fixed-point number): sx == 0 ? 16 : 32 + let Inst{7} = op5; // sx + let Inst{5} = fbits{0}; + let Inst{3-0} = fbits{4-1}; +} + +// VFP conversion instructions, if no NEON +class AVConv1In opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AVConv1I { + list Predicates = [HasVFP2,DontUseNEONForFP]; +} + +class AVConvXI opcod1, bits<4> opcod2, dag oops, dag iops, Format f, + InstrItinClass itin, + string opc, string asm, list pattern> + : VFPAI { + let Inst{27-20} = opcod1; + let Inst{11-8} = opcod2; + let Inst{4} = 1; +} + +class AVConv2I opcod1, bits<4> opcod2, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : AVConvXI; + +class AVConv3I opcod1, bits<4> opcod2, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : AVConvXI; + +class AVConv4I opcod1, bits<4> opcod2, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : AVConvXI; + +class AVConv5I opcod1, bits<4> opcod2, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : AVConvXI; + +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ARM NEON Instruction templates. +// + +class NeonI pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm); + let Pattern = pattern; + list Predicates = [HasNEON]; + let DecoderNamespace = "NEON"; +} + +// Same as NeonI except it does not have a "data type" specifier. +class NeonXI pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + let AsmString = !strconcat(opc, "${p}", "\t", asm); + let Pattern = pattern; + list Predicates = [HasNEON]; + let DecoderNamespace = "NEON"; +} + +// Same as NeonI except it is not predicated +class NeonInp pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = !strconcat(opc, ".", dt, "\t", asm); + let Pattern = pattern; + list Predicates = [HasNEON]; + let DecoderNamespace = "NEON"; + + let Inst{31-28} = 0b1111; +} + +class NLdSt op21_20, bits<4> op11_8, bits<4> op7_4, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list pattern> + : NeonI { + let Inst{31-24} = 0b11110100; + let Inst{23} = op23; + let Inst{21-20} = op21_20; + let Inst{11-8} = op11_8; + let Inst{7-4} = op7_4; + + let PostEncoderMethod = "NEONThumb2LoadStorePostEncoder"; + let DecoderNamespace = "NEONLoadStore"; + + bits<5> Vd; + bits<6> Rn; + bits<4> Rm; + + let Inst{22} = Vd{4}; + let Inst{15-12} = Vd{3-0}; + let Inst{19-16} = Rn{3-0}; + let Inst{3-0} = Rm{3-0}; +} + +class NLdStLn op21_20, bits<4> op11_8, bits<4> op7_4, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list pattern> + : NLdSt { + bits<3> lane; +} + +class PseudoNLdSt + : InstARM { + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + list Predicates = [HasNEON]; +} + +class PseudoNeonI pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + let Pattern = pattern; + list Predicates = [HasNEON]; +} + +class NDataI pattern> + : NeonI { + let Inst{31-25} = 0b1111001; + let PostEncoderMethod = "NEONThumb2DataIPostEncoder"; + let DecoderNamespace = "NEONData"; +} + +class NDataXI pattern> + : NeonXI { + let Inst{31-25} = 0b1111001; + let PostEncoderMethod = "NEONThumb2DataIPostEncoder"; + let DecoderNamespace = "NEONData"; +} + +// NEON "one register and a modified immediate" format. +class N1ModImm op21_19, bits<4> op11_8, bit op7, bit op6, + bit op5, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, string cstr, + list pattern> + : NDataI { + let Inst{23} = op23; + let Inst{21-19} = op21_19; + let Inst{11-8} = op11_8; + let Inst{7} = op7; + let Inst{6} = op6; + let Inst{5} = op5; + let Inst{4} = op4; + + // Instruction operands. + bits<5> Vd; + bits<13> SIMM; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{24} = SIMM{7}; + let Inst{18-16} = SIMM{6-4}; + let Inst{3-0} = SIMM{3-0}; + let DecoderMethod = "DecodeNEONModImmInstruction"; +} + +// NEON 2 vector register format. +class N2V op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, + bits<5> op11_7, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list pattern> + : NDataI { + let Inst{24-23} = op24_23; + let Inst{21-20} = op21_20; + let Inst{19-18} = op19_18; + let Inst{17-16} = op17_16; + let Inst{11-7} = op11_7; + let Inst{6} = op6; + let Inst{4} = op4; + + // Instruction operands. + bits<5> Vd; + bits<5> Vm; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{3-0} = Vm{3-0}; + let Inst{5} = Vm{4}; +} + +// Same as N2V but not predicated. +class N2Vnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, + dag oops, dag iops, InstrItinClass itin, string OpcodeStr, + string Dt, list pattern> + : NeonInp { + bits<5> Vd; + bits<5> Vm; + + // Encode instruction operands + let Inst{22} = Vd{4}; + let Inst{15-12} = Vd{3-0}; + let Inst{5} = Vm{4}; + let Inst{3-0} = Vm{3-0}; + + // Encode constant bits + let Inst{27-23} = 0b00111; + let Inst{21-20} = 0b11; + let Inst{19-18} = op19_18; + let Inst{17-16} = op17_16; + let Inst{11} = 0; + let Inst{10-8} = op10_8; + let Inst{7} = op7; + let Inst{6} = op6; + let Inst{4} = 0; + + let DecoderNamespace = "NEON"; +} + +// Same as N2V except it doesn't have a datatype suffix. +class N2VX op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, + bits<5> op11_7, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, string cstr, list pattern> + : NDataXI { + let Inst{24-23} = op24_23; + let Inst{21-20} = op21_20; + let Inst{19-18} = op19_18; + let Inst{17-16} = op17_16; + let Inst{11-7} = op11_7; + let Inst{6} = op6; + let Inst{4} = op4; + + // Instruction operands. + bits<5> Vd; + bits<5> Vm; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{3-0} = Vm{3-0}; + let Inst{5} = Vm{4}; +} + +// NEON 2 vector register with immediate. +class N2VImm op11_8, bit op7, bit op6, bit op4, + dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list pattern> + : NDataI { + let Inst{24} = op24; + let Inst{23} = op23; + let Inst{11-8} = op11_8; + let Inst{7} = op7; + let Inst{6} = op6; + let Inst{4} = op4; + + // Instruction operands. + bits<5> Vd; + bits<5> Vm; + bits<6> SIMM; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{3-0} = Vm{3-0}; + let Inst{5} = Vm{4}; + let Inst{21-16} = SIMM{5-0}; +} + +// NEON 3 vector register format. + +class N3VCommon op21_20, bits<4> op11_8, bit op6, + bit op4, dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string dt, string asm, string cstr, + list pattern> + : NDataI { + let Inst{24} = op24; + let Inst{23} = op23; + let Inst{21-20} = op21_20; + let Inst{11-8} = op11_8; + let Inst{6} = op6; + let Inst{4} = op4; +} + +class N3V op21_20, bits<4> op11_8, bit op6, bit op4, + dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list pattern> + : N3VCommon { + // Instruction operands. + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{19-16} = Vn{3-0}; + let Inst{7} = Vn{4}; + let Inst{3-0} = Vm{3-0}; + let Inst{5} = Vm{4}; +} + +class N3Vnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, dag oops, dag iops,Format f, InstrItinClass itin, + string OpcodeStr, string Dt, list pattern> + : NeonInp { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + // Encode instruction operands + let Inst{22} = Vd{4}; + let Inst{15-12} = Vd{3-0}; + let Inst{19-16} = Vn{3-0}; + let Inst{7} = Vn{4}; + let Inst{5} = Vm{4}; + let Inst{3-0} = Vm{3-0}; + + // Encode constant bits + let Inst{27-23} = op27_23; + let Inst{21-20} = op21_20; + let Inst{11-8} = op11_8; + let Inst{6} = op6; + let Inst{4} = op4; +} + +class N3VLane32 op21_20, bits<4> op11_8, bit op6, + bit op4, dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string dt, string asm, string cstr, + list pattern> + : N3VCommon { + + // Instruction operands. + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + bit lane; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{19-16} = Vn{3-0}; + let Inst{7} = Vn{4}; + let Inst{3-0} = Vm{3-0}; + let Inst{5} = lane; +} + +class N3VLane16 op21_20, bits<4> op11_8, bit op6, + bit op4, dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string dt, string asm, string cstr, + list pattern> + : N3VCommon { + + // Instruction operands. + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + bits<2> lane; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{19-16} = Vn{3-0}; + let Inst{7} = Vn{4}; + let Inst{2-0} = Vm{2-0}; + let Inst{5} = lane{1}; + let Inst{3} = lane{0}; +} + +// Same as N3V except it doesn't have a data type suffix. +class N3VX op21_20, bits<4> op11_8, bit op6, + bit op4, + dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, string cstr, list pattern> + : NDataXI { + let Inst{24} = op24; + let Inst{23} = op23; + let Inst{21-20} = op21_20; + let Inst{11-8} = op11_8; + let Inst{6} = op6; + let Inst{4} = op4; + + // Instruction operands. + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{19-16} = Vn{3-0}; + let Inst{7} = Vn{4}; + let Inst{3-0} = Vm{3-0}; + let Inst{5} = Vm{4}; +} + +// NEON VMOVs between scalar and core registers. +class NVLaneOp opcod1, bits<4> opcod2, bits<2> opcod3, + dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string dt, string asm, list pattern> + : InstARM { + let Inst{27-20} = opcod1; + let Inst{11-8} = opcod2; + let Inst{6-5} = opcod3; + let Inst{4} = 1; + // A8.6.303, A8.6.328, A8.6.329 + let Inst{3-0} = 0b0000; + + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm); + let Pattern = pattern; + list Predicates = [HasNEON]; + + let PostEncoderMethod = "NEONThumb2DupPostEncoder"; + let DecoderNamespace = "NEONDup"; + + bits<5> V; + bits<4> R; + bits<4> p; + bits<4> lane; + + let Inst{31-28} = p{3-0}; + let Inst{7} = V{4}; + let Inst{19-16} = V{3-0}; + let Inst{15-12} = R{3-0}; +} +class NVGetLane opcod1, bits<4> opcod2, bits<2> opcod3, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, list pattern> + : NVLaneOp; +class NVSetLane opcod1, bits<4> opcod2, bits<2> opcod3, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, list pattern> + : NVLaneOp; +class NVDup opcod1, bits<4> opcod2, bits<2> opcod3, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, list pattern> + : NVLaneOp; + +// Vector Duplicate Lane (from scalar to all elements) +class NVDupLane op19_16, bit op6, dag oops, dag iops, + InstrItinClass itin, string opc, string dt, string asm, + list pattern> + : NDataI { + let Inst{24-23} = 0b11; + let Inst{21-20} = 0b11; + let Inst{19-16} = op19_16; + let Inst{11-7} = 0b11000; + let Inst{6} = op6; + let Inst{4} = 0; + + bits<5> Vd; + bits<5> Vm; + + let Inst{22} = Vd{4}; + let Inst{15-12} = Vd{3-0}; + let Inst{5} = Vm{4}; + let Inst{3-0} = Vm{3-0}; +} + +// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON +// for single-precision FP. +class NEONFPPat : Pat { + list Predicates = [HasNEON,UseNEONForFP]; +} + +// VFP/NEON Instruction aliases for type suffices. +// Note: When EmitPriority == 1, the alias will be used for printing +class VFPDataTypeInstAlias : + InstAlias, Requires<[HasVFP2]>; + +// Note: When EmitPriority == 1, the alias will be used for printing +multiclass VFPDTAnyInstAlias { + def : VFPDataTypeInstAlias; + def : VFPDataTypeInstAlias; + def : VFPDataTypeInstAlias; + def : VFPDataTypeInstAlias; +} + +// Note: When EmitPriority == 1, the alias will be used for printing +multiclass NEONDTAnyInstAlias { + let Predicates = [HasNEON] in { + def : VFPDataTypeInstAlias; + def : VFPDataTypeInstAlias; + def : VFPDataTypeInstAlias; + def : VFPDataTypeInstAlias; +} +} + +// The same alias classes using AsmPseudo instead, for the more complex +// stuff in NEON that InstAlias can't quite handle. +// Note that we can't use anonymous defm references here like we can +// above, as we care about the ultimate instruction enum names generated, unlike +// for instalias defs. +class NEONDataTypeAsmPseudoInst : + AsmPseudoInst, Requires<[HasNEON]>; + +// Extension of NEON 3-vector data processing instructions in coprocessor 8 +// encoding space, introduced in ARMv8.3-A. +class N3VCP8 op24_23, bits<2> op21_20, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list pattern> + : NeonInp { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + let DecoderNamespace = "VFPV8"; + // These have the same encodings in ARM and Thumb2 + let PostEncoderMethod = ""; + + let Inst{31-25} = 0b1111110; + let Inst{24-23} = op24_23; + let Inst{22} = Vd{4}; + let Inst{21-20} = op21_20; + let Inst{19-16} = Vn{3-0}; + let Inst{15-12} = Vd{3-0}; + let Inst{11-8} = 0b1000; + let Inst{7} = Vn{4}; + let Inst{6} = op6; + let Inst{5} = Vm{4}; + let Inst{4} = op4; + let Inst{3-0} = Vm{3-0}; +} + +// Extension of NEON 2-vector-and-scalar data processing instructions in +// coprocessor 8 encoding space, introduced in ARMv8.3-A. +class N3VLaneCP8 op21_20, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list pattern> + : NeonInp { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + let DecoderNamespace = "VFPV8"; + // These have the same encodings in ARM and Thumb2 + let PostEncoderMethod = ""; + + let Inst{31-24} = 0b11111110; + let Inst{23} = op23; + let Inst{22} = Vd{4}; + let Inst{21-20} = op21_20; + let Inst{19-16} = Vn{3-0}; + let Inst{15-12} = Vd{3-0}; + let Inst{11-8} = 0b1000; + let Inst{7} = Vn{4}; + let Inst{6} = op6; + // Bit 5 set by sub-classes + let Inst{4} = op4; + let Inst{3-0} = Vm{3-0}; +} + +// Operand types for complex instructions +class ComplexRotationOperand + : AsmOperandClass { + let PredicateMethod = "isComplexRotation<" # Angle # ", " # Remainder # ">"; + let DiagnosticString = "complex rotation must be " # Diag; + let Name = "ComplexRotation" # Type; +} +def complexrotateop : Operand { + let ParserMatchClass = ComplexRotationOperand<90, 0, "Even", "0, 90, 180 or 270">; + let PrintMethod = "printComplexRotationOp<90, 0>"; +} +def complexrotateopodd : Operand { + let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd", "90 or 270">; + let PrintMethod = "printComplexRotationOp<180, 90>"; +} + +// Data type suffix token aliases. Implements Table A7-3 in the ARM ARM. +def : TokenAlias<".s8", ".i8">; +def : TokenAlias<".u8", ".i8">; +def : TokenAlias<".s16", ".i16">; +def : TokenAlias<".u16", ".i16">; +def : TokenAlias<".s32", ".i32">; +def : TokenAlias<".u32", ".i32">; +def : TokenAlias<".s64", ".i64">; +def : TokenAlias<".u64", ".i64">; + +def : TokenAlias<".i8", ".8">; +def : TokenAlias<".i16", ".16">; +def : TokenAlias<".i32", ".32">; +def : TokenAlias<".i64", ".64">; + +def : TokenAlias<".p8", ".8">; +def : TokenAlias<".p16", ".16">; + +def : TokenAlias<".f32", ".32">; +def : TokenAlias<".f64", ".64">; +def : TokenAlias<".f", ".f32">; +def : TokenAlias<".d", ".f64">; diff --git a/capstone/suite/synctools/tablegen/ARM/ARMInstrInfo.td b/capstone/suite/synctools/tablegen/ARM/ARMInstrInfo.td new file mode 100644 index 000000000..d4c342cee --- /dev/null +++ b/capstone/suite/synctools/tablegen/ARM/ARMInstrInfo.td @@ -0,0 +1,6167 @@ +//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the ARM instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ARM specific DAG Nodes. +// + +// Type profiles. +def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; +def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; +def SDT_ARMStructByVal : SDTypeProfile<0, 4, + [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; + +def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>; + +def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; + +def SDT_ARMCMov : SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisVT<3, i32>]>; + +def SDT_ARMBrcond : SDTypeProfile<0, 2, + [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>; + +def SDT_ARMBrJT : SDTypeProfile<0, 2, + [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; + +def SDT_ARMBr2JT : SDTypeProfile<0, 3, + [SDTCisPtrTy<0>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>]>; + +def SDT_ARMBCC_i64 : SDTypeProfile<0, 6, + [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, SDTCisVT<2, i32>, + SDTCisVT<3, i32>, SDTCisVT<4, i32>, + SDTCisVT<5, OtherVT>]>; + +def SDT_ARMAnd : SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>]>; + +def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; +def SDT_ARMFCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; + +def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, + SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; + +def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; +def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, + SDTCisInt<2>]>; +def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; +def SDT_ARMEH_SJLJ_SetupDispatch: SDTypeProfile<0, 0, []>; + +def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>; + +def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, + SDTCisInt<1>]>; + +def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; + +def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; + +def SDT_WIN__DBZCHK : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; + +def SDT_ARMMEMCPY : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>, + SDTCisVT<4, i32>]>; + +def SDTBinaryArithWithFlags : SDTypeProfile<2, 2, + [SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, SDTCisVT<1, i32>]>; + +// SDTBinaryArithWithFlagsInOut - RES1, CPSR = op LHS, RHS, CPSR +def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, + [SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, + SDTCisVT<1, i32>, + SDTCisVT<4, i32>]>; + +def SDT_LongMac : SDTypeProfile<2, 4, [SDTCisVT<0, i32>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisSameAs<0, 4>, + SDTCisSameAs<0, 5>]>; + +def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>; +def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>; +def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>; +def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>; + +def SDT_MulHSR : SDTypeProfile<1, 3, [SDTCisVT<0,i32>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>; + +def ARMsmmlar : SDNode<"ARMISD::SMMLAR", SDT_MulHSR>; +def ARMsmmlsr : SDNode<"ARMISD::SMMLSR", SDT_MulHSR>; + +// Node definitions. +def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; +def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; +def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntUnaryOp>; + +def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart, + [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; +def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd, + [SDNPHasChain, SDNPSideEffect, + SDNPOptInGlue, SDNPOutGlue]>; +def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" , + SDT_ARMStructByVal, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, + SDNPMayStore, SDNPMayLoad]>; + +def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + +def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, + [SDNPInGlue]>; + +def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>; + +def ARMusatnoshift : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>; + +def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + +def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT, + [SDNPHasChain]>; +def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT, + [SDNPHasChain]>; + +def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64, + [SDNPHasChain]>; + +def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp, + [SDNPOutGlue]>; + +def ARMcmn : SDNode<"ARMISD::CMN", SDT_ARMCmp, + [SDNPOutGlue]>; + +def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp, + [SDNPOutGlue, SDNPCommutative]>; + +def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>; + +def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>; +def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>; +def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>; + +def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags, + [SDNPCommutative]>; +def ARMsubc : SDNode<"ARMISD::SUBC", SDTBinaryArithWithFlags>; +def ARMadde : SDNode<"ARMISD::ADDE", SDTBinaryArithWithFlagsInOut>; +def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>; + +def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; +def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", + SDT_ARMEH_SJLJ_Setjmp, + [SDNPHasChain, SDNPSideEffect]>; +def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", + SDT_ARMEH_SJLJ_Longjmp, + [SDNPHasChain, SDNPSideEffect]>; +def ARMeh_sjlj_setup_dispatch: SDNode<"ARMISD::EH_SJLJ_SETUP_DISPATCH", + SDT_ARMEH_SJLJ_SetupDispatch, + [SDNPHasChain, SDNPSideEffect]>; + +def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, + [SDNPHasChain, SDNPSideEffect]>; +def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; + +def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>; + +def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, + SDNPMayStore, SDNPMayLoad]>; + +def ARMsmulwb : SDNode<"ARMISD::SMULWB", SDTIntBinOp, []>; +def ARMsmulwt : SDNode<"ARMISD::SMULWT", SDTIntBinOp, []>; +def ARMsmlalbb : SDNode<"ARMISD::SMLALBB", SDT_LongMac, []>; +def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>; +def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>; +def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>; + +//===----------------------------------------------------------------------===// +// ARM Instruction Predicate Definitions. +// +def HasV4T : Predicate<"Subtarget->hasV4TOps()">, + AssemblerPredicate<"HasV4TOps", "armv4t">; +def NoV4T : Predicate<"!Subtarget->hasV4TOps()">; +def HasV5T : Predicate<"Subtarget->hasV5TOps()">, + AssemblerPredicate<"HasV5TOps", "armv5t">; +def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">, + AssemblerPredicate<"HasV5TEOps", "armv5te">; +def HasV6 : Predicate<"Subtarget->hasV6Ops()">, + AssemblerPredicate<"HasV6Ops", "armv6">; +def NoV6 : Predicate<"!Subtarget->hasV6Ops()">; +def HasV6M : Predicate<"Subtarget->hasV6MOps()">, + AssemblerPredicate<"HasV6MOps", + "armv6m or armv6t2">; +def HasV8MBaseline : Predicate<"Subtarget->hasV8MBaselineOps()">, + AssemblerPredicate<"HasV8MBaselineOps", + "armv8m.base">; +def HasV8MMainline : Predicate<"Subtarget->hasV8MMainlineOps()">, + AssemblerPredicate<"HasV8MMainlineOps", + "armv8m.main">; +def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">, + AssemblerPredicate<"HasV6T2Ops", "armv6t2">; +def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; +def HasV6K : Predicate<"Subtarget->hasV6KOps()">, + AssemblerPredicate<"HasV6KOps", "armv6k">; +def NoV6K : Predicate<"!Subtarget->hasV6KOps()">; +def HasV7 : Predicate<"Subtarget->hasV7Ops()">, + AssemblerPredicate<"HasV7Ops", "armv7">; +def HasV8 : Predicate<"Subtarget->hasV8Ops()">, + AssemblerPredicate<"HasV8Ops", "armv8">; +def PreV8 : Predicate<"!Subtarget->hasV8Ops()">, + AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">; +def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, + AssemblerPredicate<"HasV8_1aOps", "armv8.1a">; +def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, + AssemblerPredicate<"HasV8_2aOps", "armv8.2a">; +def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">, + AssemblerPredicate<"HasV8_3aOps", "armv8.3a">; +def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">, + AssemblerPredicate<"HasV8_4aOps", "armv8.4a">; +def NoVFP : Predicate<"!Subtarget->hasVFP2()">; +def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, + AssemblerPredicate<"FeatureVFP2", "VFP2">; +def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, + AssemblerPredicate<"FeatureVFP3", "VFP3">; +def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, + AssemblerPredicate<"FeatureVFP4", "VFP4">; +def HasDPVFP : Predicate<"!Subtarget->isFPOnlySP()">, + AssemblerPredicate<"!FeatureVFPOnlySP", + "double precision VFP">; +def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, + AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">; +def HasNEON : Predicate<"Subtarget->hasNEON()">, + AssemblerPredicate<"FeatureNEON", "NEON">; +def HasSHA2 : Predicate<"Subtarget->hasSHA2()">, + AssemblerPredicate<"FeatureSHA2", "sha2">; +def HasAES : Predicate<"Subtarget->hasAES()">, + AssemblerPredicate<"FeatureAES", "aes">; +def HasCrypto : Predicate<"Subtarget->hasCrypto()">, + AssemblerPredicate<"FeatureCrypto", "crypto">; +def HasDotProd : Predicate<"Subtarget->hasDotProd()">, + AssemblerPredicate<"FeatureDotProd", "dotprod">; +def HasCRC : Predicate<"Subtarget->hasCRC()">, + AssemblerPredicate<"FeatureCRC", "crc">; +def HasRAS : Predicate<"Subtarget->hasRAS()">, + AssemblerPredicate<"FeatureRAS", "ras">; +def HasFP16 : Predicate<"Subtarget->hasFP16()">, + AssemblerPredicate<"FeatureFP16","half-float conversions">; +def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, + AssemblerPredicate<"FeatureFullFP16","full half-float">; +def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">, + AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">; +def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">, + AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">; +def HasDSP : Predicate<"Subtarget->hasDSP()">, + AssemblerPredicate<"FeatureDSP", "dsp">; +def HasDB : Predicate<"Subtarget->hasDataBarrier()">, + AssemblerPredicate<"FeatureDB", + "data-barriers">; +def HasDFB : Predicate<"Subtarget->hasFullDataBarrier()">, + AssemblerPredicate<"FeatureDFB", + "full-data-barrier">; +def HasV7Clrex : Predicate<"Subtarget->hasV7Clrex()">, + AssemblerPredicate<"FeatureV7Clrex", + "v7 clrex">; +def HasAcquireRelease : Predicate<"Subtarget->hasAcquireRelease()">, + AssemblerPredicate<"FeatureAcquireRelease", + "acquire/release">; +def HasMP : Predicate<"Subtarget->hasMPExtension()">, + AssemblerPredicate<"FeatureMP", + "mp-extensions">; +def HasVirtualization: Predicate<"false">, + AssemblerPredicate<"FeatureVirtualization", + "virtualization-extensions">; +def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">, + AssemblerPredicate<"FeatureTrustZone", + "TrustZone">; +def Has8MSecExt : Predicate<"Subtarget->has8MSecExt()">, + AssemblerPredicate<"Feature8MSecExt", + "ARMv8-M Security Extensions">; +def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">; +def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; +def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; +def IsThumb : Predicate<"Subtarget->isThumb()">, + AssemblerPredicate<"ModeThumb", "thumb">; +def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">; +def IsThumb2 : Predicate<"Subtarget->isThumb2()">, + AssemblerPredicate<"ModeThumb,FeatureThumb2", + "thumb2">; +def IsMClass : Predicate<"Subtarget->isMClass()">, + AssemblerPredicate<"FeatureMClass", "armv*m">; +def IsNotMClass : Predicate<"!Subtarget->isMClass()">, + AssemblerPredicate<"!FeatureMClass", + "!armv*m">; +def IsARM : Predicate<"!Subtarget->isThumb()">, + AssemblerPredicate<"!ModeThumb", "arm-mode">; +def IsMachO : Predicate<"Subtarget->isTargetMachO()">; +def IsNotMachO : Predicate<"!Subtarget->isTargetMachO()">; +def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; +def IsWindows : Predicate<"Subtarget->isTargetWindows()">; +def IsNotWindows : Predicate<"!Subtarget->isTargetWindows()">; +def IsReadTPHard : Predicate<"Subtarget->isReadTPHard()">; +def IsReadTPSoft : Predicate<"!Subtarget->isReadTPHard()">; +def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">, + AssemblerPredicate<"FeatureNaClTrap", "NaCl">; +def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">; + +def UseNegativeImmediates : + Predicate<"false">, + AssemblerPredicate<"!FeatureNoNegativeImmediates", + "NegativeImmediates">; + +// FIXME: Eventually this will be just "hasV6T2Ops". +let RecomputePerFunction = 1 in { + def UseMovt : Predicate<"Subtarget->useMovt(*MF)">; + def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">; + def UseMovtInPic : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">; + def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">; +} +def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; +def UseMulOps : Predicate<"Subtarget->useMulOps()">; + +// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available. +// But only select them if more precision in FP computation is allowed. +// Do not use them for Darwin platforms. +def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" + " FPOpFusion::Fast && " + " Subtarget->hasVFP4()) && " + "!Subtarget->isTargetDarwin()">; +def DontUseFusedMAC : Predicate<"!(TM.Options.AllowFPOpFusion ==" + " FPOpFusion::Fast &&" + " Subtarget->hasVFP4()) || " + "Subtarget->isTargetDarwin()">; + +def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">; +def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">; + +def HasFastVDUP32 : Predicate<"!Subtarget->hasSlowVDUP32()">; +def HasSlowVDUP32 : Predicate<"Subtarget->hasSlowVDUP32()">; + +def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() ||" + "!Subtarget->useNEONForSinglePrecisionFP()">; +def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&" + "Subtarget->useNEONForSinglePrecisionFP()">; + +let RecomputePerFunction = 1 in { + def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">; + def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">; +} + +def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">; + +//===----------------------------------------------------------------------===// +// ARM Flag Definitions. + +class RegConstraint { + string Constraints = C; +} + +//===----------------------------------------------------------------------===// +// ARM specific transformation functions and pattern fragments. +// + +// imm_neg_XFORM - Return the negation of an i32 immediate value. +def imm_neg_XFORM : SDNodeXFormgetTargetConstant(-(int)N->getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +// imm_not_XFORM - Return the complement of a i32 immediate value. +def imm_not_XFORM : SDNodeXFormgetTargetConstant(~(int)N->getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31]. +def imm16_31 : ImmLeaf= 16 && (int32_t)Imm < 32; +}]>; + +// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits. +def sext_16_node : PatLeaf<(i32 GPR:$a), [{ + if (CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17) + return true; + + if (N->getOpcode() != ISD::SRA) + return false; + if (N->getOperand(0).getOpcode() != ISD::SHL) + return false; + + auto *ShiftVal = dyn_cast(N->getOperand(1)); + if (!ShiftVal || ShiftVal->getZExtValue() != 16) + return false; + + ShiftVal = dyn_cast(N->getOperand(0)->getOperand(1)); + if (!ShiftVal || ShiftVal->getZExtValue() != 16) + return false; + + return true; +}]>; + +/// Split a 32-bit immediate into two 16 bit parts. +def hi16 : SDNodeXFormgetTargetConstant((uint32_t)N->getZExtValue() >> 16, SDLoc(N), + MVT::i32); +}]>; + +def lo16AllZero : PatLeaf<(i32 imm), [{ + // Returns true if all low 16-bits are 0. + return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0; +}], hi16>; + +class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; +class UnOpFrag : PatFrag<(ops node:$Src), res>; + +// An 'and' node with a single use. +def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{ + return N->hasOneUse(); +}]>; + +// An 'xor' node with a single use. +def xor_su : PatFrag<(ops node:$lhs, node:$rhs), (xor node:$lhs, node:$rhs), [{ + return N->hasOneUse(); +}]>; + +// An 'fmul' node with a single use. +def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{ + return N->hasOneUse(); +}]>; + +// An 'fadd' node which checks for single non-hazardous use. +def fadd_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{ + return hasNoVMLxHazardUse(N); +}]>; + +// An 'fsub' node which checks for single non-hazardous use. +def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{ + return hasNoVMLxHazardUse(N); +}]>; + +//===----------------------------------------------------------------------===// +// Operand Definitions. +// + +// Immediate operands with a shared generic asm render method. +class ImmAsmOperand : AsmOperandClass { + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isImmediate<" # Low # "," # High # ">"; + let DiagnosticString = "operand must be an immediate in the range [" # Low # "," # High # "]"; +} + +class ImmAsmOperandMinusOne : AsmOperandClass { + let PredicateMethod = "isImmediate<" # Low # "," # High # ">"; + let DiagnosticType = "ImmRange" # Low # "_" # High; + let DiagnosticString = "operand must be an immediate in the range [" # Low # "," # High # "]"; +} + +// Operands that are part of a memory addressing mode. +class MemOperand : Operand { let OperandType = "OPERAND_MEMORY"; } + +// Branch target. +// FIXME: rename brtarget to t2_brtarget +def brtarget : Operand { + let EncoderMethod = "getBranchTargetOpValue"; + let OperandType = "OPERAND_PCREL"; + let DecoderMethod = "DecodeT2BROperand"; +} + +// Branches targeting ARM-mode must be divisible by 4 if they're a raw +// immediate. +def ARMBranchTarget : AsmOperandClass { + let Name = "ARMBranchTarget"; +} + +// Branches targeting Thumb-mode must be divisible by 2 if they're a raw +// immediate. +def ThumbBranchTarget : AsmOperandClass { + let Name = "ThumbBranchTarget"; +} + +def arm_br_target : Operand { + let ParserMatchClass = ARMBranchTarget; + let EncoderMethod = "getARMBranchTargetOpValue"; + let OperandType = "OPERAND_PCREL"; +} + +// Call target for ARM. Handles conditional/unconditional +// FIXME: rename bl_target to t2_bltarget? +def arm_bl_target : Operand { + let ParserMatchClass = ARMBranchTarget; + let EncoderMethod = "getARMBLTargetOpValue"; + let OperandType = "OPERAND_PCREL"; +} + +// Target for BLX *from* ARM mode. +def arm_blx_target : Operand { + let ParserMatchClass = ThumbBranchTarget; + let EncoderMethod = "getARMBLXTargetOpValue"; + let OperandType = "OPERAND_PCREL"; +} + +// A list of registers separated by comma. Used by load/store multiple. +def RegListAsmOperand : AsmOperandClass { let Name = "RegList"; } +def reglist : Operand { + let EncoderMethod = "getRegisterListOpValue"; + let ParserMatchClass = RegListAsmOperand; + let PrintMethod = "printRegisterList"; + let DecoderMethod = "DecodeRegListOperand"; +} + +def GPRPairOp : RegisterOperand; + +def DPRRegListAsmOperand : AsmOperandClass { + let Name = "DPRRegList"; + let DiagnosticType = "DPR_RegList"; +} +def dpr_reglist : Operand { + let EncoderMethod = "getRegisterListOpValue"; + let ParserMatchClass = DPRRegListAsmOperand; + let PrintMethod = "printRegisterList"; + let DecoderMethod = "DecodeDPRRegListOperand"; +} + +def SPRRegListAsmOperand : AsmOperandClass { + let Name = "SPRRegList"; + let DiagnosticString = "operand must be a list of registers in range [s0, s31]"; +} +def spr_reglist : Operand { + let EncoderMethod = "getRegisterListOpValue"; + let ParserMatchClass = SPRRegListAsmOperand; + let PrintMethod = "printRegisterList"; + let DecoderMethod = "DecodeSPRRegListOperand"; +} + +// An operand for the CONSTPOOL_ENTRY pseudo-instruction. +def cpinst_operand : Operand { + let PrintMethod = "printCPInstOperand"; +} + +// Local PC labels. +def pclabel : Operand { + let PrintMethod = "printPCLabel"; +} + +// ADR instruction labels. +def AdrLabelAsmOperand : AsmOperandClass { let Name = "AdrLabel"; } +def adrlabel : Operand { + let EncoderMethod = "getAdrLabelOpValue"; + let ParserMatchClass = AdrLabelAsmOperand; + let PrintMethod = "printAdrLabelOperand<0>"; +} + +def neon_vcvt_imm32 : Operand { + let EncoderMethod = "getNEONVcvtImm32OpValue"; + let DecoderMethod = "DecodeVCVTImmOperand"; +} + +// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24. +def rot_imm_XFORM: SDNodeXFormgetZExtValue()){ + default: llvm_unreachable(nullptr); + case 0: return CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); + case 8: return CurDAG->getTargetConstant(1, SDLoc(N), MVT::i32); + case 16: return CurDAG->getTargetConstant(2, SDLoc(N), MVT::i32); + case 24: return CurDAG->getTargetConstant(3, SDLoc(N), MVT::i32); + } +}]>; +def RotImmAsmOperand : AsmOperandClass { + let Name = "RotImm"; + let ParserMethod = "parseRotImm"; +} +def rot_imm : Operand, PatLeaf<(i32 imm), [{ + int32_t v = N->getZExtValue(); + return v == 8 || v == 16 || v == 24; }], + rot_imm_XFORM> { + let PrintMethod = "printRotImmOperand"; + let ParserMatchClass = RotImmAsmOperand; +} + +// shift_imm: An integer that encodes a shift amount and the type of shift +// (asr or lsl). The 6-bit immediate encodes as: +// {5} 0 ==> lsl +// 1 asr +// {4-0} imm5 shift amount. +// asr #32 encoded as imm5 == 0. +def ShifterImmAsmOperand : AsmOperandClass { + let Name = "ShifterImm"; + let ParserMethod = "parseShifterImm"; +} +def shift_imm : Operand { + let PrintMethod = "printShiftImmOperand"; + let ParserMatchClass = ShifterImmAsmOperand; +} + +// shifter_operand operands: so_reg_reg, so_reg_imm, and mod_imm. +def ShiftedRegAsmOperand : AsmOperandClass { let Name = "RegShiftedReg"; } +def so_reg_reg : Operand, // reg reg imm + ComplexPattern { + let EncoderMethod = "getSORegRegOpValue"; + let PrintMethod = "printSORegRegOperand"; + let DecoderMethod = "DecodeSORegRegOperand"; + let ParserMatchClass = ShiftedRegAsmOperand; + let MIOperandInfo = (ops GPRnopc, GPRnopc, i32imm); +} + +def ShiftedImmAsmOperand : AsmOperandClass { let Name = "RegShiftedImm"; } +def so_reg_imm : Operand, // reg imm + ComplexPattern { + let EncoderMethod = "getSORegImmOpValue"; + let PrintMethod = "printSORegImmOperand"; + let DecoderMethod = "DecodeSORegImmOperand"; + let ParserMatchClass = ShiftedImmAsmOperand; + let MIOperandInfo = (ops GPR, i32imm); +} + +// FIXME: Does this need to be distinct from so_reg? +def shift_so_reg_reg : Operand, // reg reg imm + ComplexPattern { + let EncoderMethod = "getSORegRegOpValue"; + let PrintMethod = "printSORegRegOperand"; + let DecoderMethod = "DecodeSORegRegOperand"; + let ParserMatchClass = ShiftedRegAsmOperand; + let MIOperandInfo = (ops GPR, GPR, i32imm); +} + +// FIXME: Does this need to be distinct from so_reg? +def shift_so_reg_imm : Operand, // reg reg imm + ComplexPattern { + let EncoderMethod = "getSORegImmOpValue"; + let PrintMethod = "printSORegImmOperand"; + let DecoderMethod = "DecodeSORegImmOperand"; + let ParserMatchClass = ShiftedImmAsmOperand; + let MIOperandInfo = (ops GPR, i32imm); +} + +// mod_imm: match a 32-bit immediate operand, which can be encoded into +// a 12-bit immediate; an 8-bit integer and a 4-bit rotator (See ARMARM +// - "Modified Immediate Constants"). Within the MC layer we keep this +// immediate in its encoded form. +def ModImmAsmOperand: AsmOperandClass { + let Name = "ModImm"; + let ParserMethod = "parseModImm"; +} +def mod_imm : Operand, ImmLeaf { + let EncoderMethod = "getModImmOpValue"; + let PrintMethod = "printModImmOperand"; + let ParserMatchClass = ModImmAsmOperand; +} + +// Note: the patterns mod_imm_not and mod_imm_neg do not require an encoder +// method and such, as they are only used on aliases (Pat<> and InstAlias<>). +// The actual parsing, encoding, decoding are handled by the destination +// instructions, which use mod_imm. + +def ModImmNotAsmOperand : AsmOperandClass { let Name = "ModImmNot"; } +def mod_imm_not : Operand, PatLeaf<(imm), [{ + return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1; + }], imm_not_XFORM> { + let ParserMatchClass = ModImmNotAsmOperand; +} + +def ModImmNegAsmOperand : AsmOperandClass { let Name = "ModImmNeg"; } +def mod_imm_neg : Operand, PatLeaf<(imm), [{ + unsigned Value = -(unsigned)N->getZExtValue(); + return Value && ARM_AM::getSOImmVal(Value) != -1; + }], imm_neg_XFORM> { + let ParserMatchClass = ModImmNegAsmOperand; +} + +/// arm_i32imm - True for +V6T2, or when isSOImmTwoParVal() +def arm_i32imm : PatLeaf<(imm), [{ + if (Subtarget->useMovt(*MF)) + return true; + return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); +}]>; + +/// imm0_1 predicate - Immediate in the range [0,1]. +def Imm0_1AsmOperand: ImmAsmOperand<0,1> { let Name = "Imm0_1"; } +def imm0_1 : Operand { let ParserMatchClass = Imm0_1AsmOperand; } + +/// imm0_3 predicate - Immediate in the range [0,3]. +def Imm0_3AsmOperand: ImmAsmOperand<0,3> { let Name = "Imm0_3"; } +def imm0_3 : Operand { let ParserMatchClass = Imm0_3AsmOperand; } + +/// imm0_7 predicate - Immediate in the range [0,7]. +def Imm0_7AsmOperand: ImmAsmOperand<0,7> { + let Name = "Imm0_7"; +} +def imm0_7 : Operand, ImmLeaf= 0 && Imm < 8; +}]> { + let ParserMatchClass = Imm0_7AsmOperand; +} + +/// imm8_255 predicate - Immediate in the range [8,255]. +def Imm8_255AsmOperand: ImmAsmOperand<8,255> { let Name = "Imm8_255"; } +def imm8_255 : Operand, ImmLeaf= 8 && Imm < 256; +}]> { + let ParserMatchClass = Imm8_255AsmOperand; +} + +/// imm8 predicate - Immediate is exactly 8. +def Imm8AsmOperand: ImmAsmOperand<8,8> { let Name = "Imm8"; } +def imm8 : Operand, ImmLeaf { + let ParserMatchClass = Imm8AsmOperand; +} + +/// imm16 predicate - Immediate is exactly 16. +def Imm16AsmOperand: ImmAsmOperand<16,16> { let Name = "Imm16"; } +def imm16 : Operand, ImmLeaf { + let ParserMatchClass = Imm16AsmOperand; +} + +/// imm32 predicate - Immediate is exactly 32. +def Imm32AsmOperand: ImmAsmOperand<32,32> { let Name = "Imm32"; } +def imm32 : Operand, ImmLeaf { + let ParserMatchClass = Imm32AsmOperand; +} + +def imm8_or_16 : ImmLeaf; + +/// imm1_7 predicate - Immediate in the range [1,7]. +def Imm1_7AsmOperand: ImmAsmOperand<1,7> { let Name = "Imm1_7"; } +def imm1_7 : Operand, ImmLeaf 0 && Imm < 8; }]> { + let ParserMatchClass = Imm1_7AsmOperand; +} + +/// imm1_15 predicate - Immediate in the range [1,15]. +def Imm1_15AsmOperand: ImmAsmOperand<1,15> { let Name = "Imm1_15"; } +def imm1_15 : Operand, ImmLeaf 0 && Imm < 16; }]> { + let ParserMatchClass = Imm1_15AsmOperand; +} + +/// imm1_31 predicate - Immediate in the range [1,31]. +def Imm1_31AsmOperand: ImmAsmOperand<1,31> { let Name = "Imm1_31"; } +def imm1_31 : Operand, ImmLeaf 0 && Imm < 32; }]> { + let ParserMatchClass = Imm1_31AsmOperand; +} + +/// imm0_15 predicate - Immediate in the range [0,15]. +def Imm0_15AsmOperand: ImmAsmOperand<0,15> { + let Name = "Imm0_15"; +} +def imm0_15 : Operand, ImmLeaf= 0 && Imm < 16; +}]> { + let ParserMatchClass = Imm0_15AsmOperand; +} + +/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31]. +def Imm0_31AsmOperand: ImmAsmOperand<0,31> { let Name = "Imm0_31"; } +def imm0_31 : Operand, ImmLeaf= 0 && Imm < 32; +}]> { + let ParserMatchClass = Imm0_31AsmOperand; +} + +/// imm0_32 predicate - True if the 32-bit immediate is in the range [0,32]. +def Imm0_32AsmOperand: ImmAsmOperand<0,32> { let Name = "Imm0_32"; } +def imm0_32 : Operand, ImmLeaf= 0 && Imm < 33; +}]> { + let ParserMatchClass = Imm0_32AsmOperand; +} + +/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63]. +def Imm0_63AsmOperand: ImmAsmOperand<0,63> { let Name = "Imm0_63"; } +def imm0_63 : Operand, ImmLeaf= 0 && Imm < 64; +}]> { + let ParserMatchClass = Imm0_63AsmOperand; +} + +/// imm0_239 predicate - Immediate in the range [0,239]. +def Imm0_239AsmOperand : ImmAsmOperand<0,239> { + let Name = "Imm0_239"; +} +def imm0_239 : Operand, ImmLeaf= 0 && Imm < 240; }]> { + let ParserMatchClass = Imm0_239AsmOperand; +} + +/// imm0_255 predicate - Immediate in the range [0,255]. +def Imm0_255AsmOperand : ImmAsmOperand<0,255> { let Name = "Imm0_255"; } +def imm0_255 : Operand, ImmLeaf= 0 && Imm < 256; }]> { + let ParserMatchClass = Imm0_255AsmOperand; +} + +/// imm0_65535 - An immediate is in the range [0,65535]. +def Imm0_65535AsmOperand: ImmAsmOperand<0,65535> { let Name = "Imm0_65535"; } +def imm0_65535 : Operand, ImmLeaf= 0 && Imm < 65536; +}]> { + let ParserMatchClass = Imm0_65535AsmOperand; +} + +// imm0_65535_neg - An immediate whose negative value is in the range [0.65535]. +def imm0_65535_neg : Operand, ImmLeaf= 0 && -Imm < 65536; +}]>; + +// imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference +// a relocatable expression. +// +// FIXME: This really needs a Thumb version separate from the ARM version. +// While the range is the same, and can thus use the same match class, +// the encoding is different so it should have a different encoder method. +def Imm0_65535ExprAsmOperand: AsmOperandClass { + let Name = "Imm0_65535Expr"; + let RenderMethod = "addImmOperands"; + let DiagnosticString = "operand must be an immediate in the range [0,0xffff] or a relocatable expression"; +} + +def imm0_65535_expr : Operand { + let EncoderMethod = "getHiLo16ImmOpValue"; + let ParserMatchClass = Imm0_65535ExprAsmOperand; +} + +def Imm256_65535ExprAsmOperand: ImmAsmOperand<256,65535> { let Name = "Imm256_65535Expr"; } +def imm256_65535_expr : Operand { + let ParserMatchClass = Imm256_65535ExprAsmOperand; +} + +/// imm24b - True if the 32-bit immediate is encodable in 24 bits. +def Imm24bitAsmOperand: ImmAsmOperand<0,0xffffff> { + let Name = "Imm24bit"; + let DiagnosticString = "operand must be an immediate in the range [0,0xffffff]"; +} +def imm24b : Operand, ImmLeaf= 0 && Imm <= 0xffffff; +}]> { + let ParserMatchClass = Imm24bitAsmOperand; +} + + +/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield +/// e.g., 0xf000ffff +def BitfieldAsmOperand : AsmOperandClass { + let Name = "Bitfield"; + let ParserMethod = "parseBitfield"; +} + +def bf_inv_mask_imm : Operand, + PatLeaf<(imm), [{ + return ARM::isBitFieldInvertedMask(N->getZExtValue()); +}] > { + let EncoderMethod = "getBitfieldInvertedMaskOpValue"; + let PrintMethod = "printBitfieldInvMaskImmOperand"; + let DecoderMethod = "DecodeBitfieldMaskOperand"; + let ParserMatchClass = BitfieldAsmOperand; + let GISelPredicateCode = [{ + // There's better methods of implementing this check. IntImmLeaf<> would be + // equivalent and have less boilerplate but we need a test for C++ + // predicates and this one causes new rules to be imported into GlobalISel + // without requiring additional features first. + const auto &MO = MI.getOperand(1); + if (!MO.isCImm()) + return false; + return ARM::isBitFieldInvertedMask(MO.getCImm()->getZExtValue()); + }]; +} + +def imm1_32_XFORM: SDNodeXFormgetTargetConstant((int)N->getZExtValue() - 1, SDLoc(N), + MVT::i32); +}]>; +def Imm1_32AsmOperand: ImmAsmOperandMinusOne<1,32> { + let Name = "Imm1_32"; +} +def imm1_32 : Operand, PatLeaf<(imm), [{ + uint64_t Imm = N->getZExtValue(); + return Imm > 0 && Imm <= 32; + }], + imm1_32_XFORM> { + let PrintMethod = "printImmPlusOneOperand"; + let ParserMatchClass = Imm1_32AsmOperand; +} + +def imm1_16_XFORM: SDNodeXFormgetTargetConstant((int)N->getZExtValue() - 1, SDLoc(N), + MVT::i32); +}]>; +def Imm1_16AsmOperand: ImmAsmOperandMinusOne<1,16> { let Name = "Imm1_16"; } +def imm1_16 : Operand, ImmLeaf 0 && Imm <= 16; + }], + imm1_16_XFORM> { + let PrintMethod = "printImmPlusOneOperand"; + let ParserMatchClass = Imm1_16AsmOperand; +} + +// Define ARM specific addressing modes. +// addrmode_imm12 := reg +/- imm12 +// +def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; } +class AddrMode_Imm12 : MemOperand, + ComplexPattern { + // 12-bit immediate operand. Note that instructions using this encode + // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other + // immediate values are as normal. + + let EncoderMethod = "getAddrModeImm12OpValue"; + let DecoderMethod = "DecodeAddrModeImm12Operand"; + let ParserMatchClass = MemImm12OffsetAsmOperand; + let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); +} + +def addrmode_imm12 : AddrMode_Imm12 { + let PrintMethod = "printAddrModeImm12Operand"; +} + +def addrmode_imm12_pre : AddrMode_Imm12 { + let PrintMethod = "printAddrModeImm12Operand"; +} + +// ldst_so_reg := reg +/- reg shop imm +// +def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; } +def ldst_so_reg : MemOperand, + ComplexPattern { + let EncoderMethod = "getLdStSORegOpValue"; + // FIXME: Simplify the printer + let PrintMethod = "printAddrMode2Operand"; + let DecoderMethod = "DecodeSORegMemOperand"; + let ParserMatchClass = MemRegOffsetAsmOperand; + let MIOperandInfo = (ops GPR:$base, GPRnopc:$offsreg, i32imm:$shift); +} + +// postidx_imm8 := +/- [0,255] +// +// 9 bit value: +// {8} 1 is imm8 is non-negative. 0 otherwise. +// {7-0} [0,255] imm8 value. +def PostIdxImm8AsmOperand : AsmOperandClass { let Name = "PostIdxImm8"; } +def postidx_imm8 : MemOperand { + let PrintMethod = "printPostIdxImm8Operand"; + let ParserMatchClass = PostIdxImm8AsmOperand; + let MIOperandInfo = (ops i32imm); +} + +// postidx_imm8s4 := +/- [0,1020] +// +// 9 bit value: +// {8} 1 is imm8 is non-negative. 0 otherwise. +// {7-0} [0,255] imm8 value, scaled by 4. +def PostIdxImm8s4AsmOperand : AsmOperandClass { let Name = "PostIdxImm8s4"; } +def postidx_imm8s4 : MemOperand { + let PrintMethod = "printPostIdxImm8s4Operand"; + let ParserMatchClass = PostIdxImm8s4AsmOperand; + let MIOperandInfo = (ops i32imm); +} + + +// postidx_reg := +/- reg +// +def PostIdxRegAsmOperand : AsmOperandClass { + let Name = "PostIdxReg"; + let ParserMethod = "parsePostIdxReg"; +} +def postidx_reg : MemOperand { + let EncoderMethod = "getPostIdxRegOpValue"; + let DecoderMethod = "DecodePostIdxReg"; + let PrintMethod = "printPostIdxRegOperand"; + let ParserMatchClass = PostIdxRegAsmOperand; + let MIOperandInfo = (ops GPRnopc, i32imm); +} + +def PostIdxRegShiftedAsmOperand : AsmOperandClass { + let Name = "PostIdxRegShifted"; + let ParserMethod = "parsePostIdxReg"; +} +def am2offset_reg : MemOperand, + ComplexPattern { + let EncoderMethod = "getAddrMode2OffsetOpValue"; + let PrintMethod = "printAddrMode2OffsetOperand"; + // When using this for assembly, it's always as a post-index offset. + let ParserMatchClass = PostIdxRegShiftedAsmOperand; + let MIOperandInfo = (ops GPRnopc, i32imm); +} + +// FIXME: am2offset_imm should only need the immediate, not the GPR. Having +// the GPR is purely vestigal at this point. +def AM2OffsetImmAsmOperand : AsmOperandClass { let Name = "AM2OffsetImm"; } +def am2offset_imm : MemOperand, + ComplexPattern { + let EncoderMethod = "getAddrMode2OffsetOpValue"; + let PrintMethod = "printAddrMode2OffsetOperand"; + let ParserMatchClass = AM2OffsetImmAsmOperand; + let MIOperandInfo = (ops GPRnopc, i32imm); +} + + +// addrmode3 := reg +/- reg +// addrmode3 := reg +/- imm8 +// +// FIXME: split into imm vs. reg versions. +def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; } +class AddrMode3 : MemOperand, + ComplexPattern { + let EncoderMethod = "getAddrMode3OpValue"; + let ParserMatchClass = AddrMode3AsmOperand; + let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); +} + +def addrmode3 : AddrMode3 +{ + let PrintMethod = "printAddrMode3Operand"; +} + +def addrmode3_pre : AddrMode3 +{ + let PrintMethod = "printAddrMode3Operand"; +} + +// FIXME: split into imm vs. reg versions. +// FIXME: parser method to handle +/- register. +def AM3OffsetAsmOperand : AsmOperandClass { + let Name = "AM3Offset"; + let ParserMethod = "parseAM3Offset"; +} +def am3offset : MemOperand, + ComplexPattern { + let EncoderMethod = "getAddrMode3OffsetOpValue"; + let PrintMethod = "printAddrMode3OffsetOperand"; + let ParserMatchClass = AM3OffsetAsmOperand; + let MIOperandInfo = (ops GPR, i32imm); +} + +// ldstm_mode := {ia, ib, da, db} +// +def ldstm_mode : OptionalDefOperand { + let EncoderMethod = "getLdStmModeOpValue"; + let PrintMethod = "printLdStmModeOperand"; +} + +// addrmode5 := reg +/- imm8*4 +// +def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; } +class AddrMode5 : MemOperand, + ComplexPattern { + let EncoderMethod = "getAddrMode5OpValue"; + let DecoderMethod = "DecodeAddrMode5Operand"; + let ParserMatchClass = AddrMode5AsmOperand; + let MIOperandInfo = (ops GPR:$base, i32imm); +} + +def addrmode5 : AddrMode5 { + let PrintMethod = "printAddrMode5Operand"; +} + +def addrmode5_pre : AddrMode5 { + let PrintMethod = "printAddrMode5Operand"; +} + +// addrmode5fp16 := reg +/- imm8*2 +// +def AddrMode5FP16AsmOperand : AsmOperandClass { let Name = "AddrMode5FP16"; } +class AddrMode5FP16 : Operand, + ComplexPattern { + let EncoderMethod = "getAddrMode5FP16OpValue"; + let DecoderMethod = "DecodeAddrMode5FP16Operand"; + let ParserMatchClass = AddrMode5FP16AsmOperand; + let MIOperandInfo = (ops GPR:$base, i32imm); +} + +def addrmode5fp16 : AddrMode5FP16 { + let PrintMethod = "printAddrMode5FP16Operand"; +} + +// addrmode6 := reg with optional alignment +// +def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; } +def addrmode6 : MemOperand, + ComplexPattern{ + let PrintMethod = "printAddrMode6Operand"; + let MIOperandInfo = (ops GPR:$addr, i32imm:$align); + let EncoderMethod = "getAddrMode6AddressOpValue"; + let DecoderMethod = "DecodeAddrMode6Operand"; + let ParserMatchClass = AddrMode6AsmOperand; +} + +def am6offset : MemOperand, + ComplexPattern { + let PrintMethod = "printAddrMode6OffsetOperand"; + let MIOperandInfo = (ops GPR); + let EncoderMethod = "getAddrMode6OffsetOpValue"; + let DecoderMethod = "DecodeGPRRegisterClass"; +} + +// Special version of addrmode6 to handle alignment encoding for VST1/VLD1 +// (single element from one lane) for size 32. +def addrmode6oneL32 : MemOperand, + ComplexPattern{ + let PrintMethod = "printAddrMode6Operand"; + let MIOperandInfo = (ops GPR:$addr, i32imm); + let EncoderMethod = "getAddrMode6OneLane32AddressOpValue"; +} + +// Base class for addrmode6 with specific alignment restrictions. +class AddrMode6Align : MemOperand, + ComplexPattern{ + let PrintMethod = "printAddrMode6Operand"; + let MIOperandInfo = (ops GPR:$addr, i32imm:$align); + let EncoderMethod = "getAddrMode6AddressOpValue"; + let DecoderMethod = "DecodeAddrMode6Operand"; +} + +// Special version of addrmode6 to handle no allowed alignment encoding for +// VLD/VST instructions and checking the alignment is not specified. +def AddrMode6AlignNoneAsmOperand : AsmOperandClass { + let Name = "AlignedMemoryNone"; + let DiagnosticString = "alignment must be omitted"; +} +def addrmode6alignNone : AddrMode6Align { + // The alignment specifier can only be omitted. + let ParserMatchClass = AddrMode6AlignNoneAsmOperand; +} + +// Special version of addrmode6 to handle 16-bit alignment encoding for +// VLD/VST instructions and checking the alignment value. +def AddrMode6Align16AsmOperand : AsmOperandClass { + let Name = "AlignedMemory16"; + let DiagnosticString = "alignment must be 16 or omitted"; +} +def addrmode6align16 : AddrMode6Align { + // The alignment specifier can only be 16 or omitted. + let ParserMatchClass = AddrMode6Align16AsmOperand; +} + +// Special version of addrmode6 to handle 32-bit alignment encoding for +// VLD/VST instructions and checking the alignment value. +def AddrMode6Align32AsmOperand : AsmOperandClass { + let Name = "AlignedMemory32"; + let DiagnosticString = "alignment must be 32 or omitted"; +} +def addrmode6align32 : AddrMode6Align { + // The alignment specifier can only be 32 or omitted. + let ParserMatchClass = AddrMode6Align32AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit alignment encoding for +// VLD/VST instructions and checking the alignment value. +def AddrMode6Align64AsmOperand : AsmOperandClass { + let Name = "AlignedMemory64"; + let DiagnosticString = "alignment must be 64 or omitted"; +} +def addrmode6align64 : AddrMode6Align { + // The alignment specifier can only be 64 or omitted. + let ParserMatchClass = AddrMode6Align64AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit or 128-bit alignment encoding +// for VLD/VST instructions and checking the alignment value. +def AddrMode6Align64or128AsmOperand : AsmOperandClass { + let Name = "AlignedMemory64or128"; + let DiagnosticString = "alignment must be 64, 128 or omitted"; +} +def addrmode6align64or128 : AddrMode6Align { + // The alignment specifier can only be 64, 128 or omitted. + let ParserMatchClass = AddrMode6Align64or128AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit, 128-bit or 256-bit alignment +// encoding for VLD/VST instructions and checking the alignment value. +def AddrMode6Align64or128or256AsmOperand : AsmOperandClass { + let Name = "AlignedMemory64or128or256"; + let DiagnosticString = "alignment must be 64, 128, 256 or omitted"; +} +def addrmode6align64or128or256 : AddrMode6Align { + // The alignment specifier can only be 64, 128, 256 or omitted. + let ParserMatchClass = AddrMode6Align64or128or256AsmOperand; +} + +// Special version of addrmode6 to handle alignment encoding for VLD-dup +// instructions, specifically VLD4-dup. +def addrmode6dup : MemOperand, + ComplexPattern{ + let PrintMethod = "printAddrMode6Operand"; + let MIOperandInfo = (ops GPR:$addr, i32imm); + let EncoderMethod = "getAddrMode6DupAddressOpValue"; + // FIXME: This is close, but not quite right. The alignment specifier is + // different. + let ParserMatchClass = AddrMode6AsmOperand; +} + +// Base class for addrmode6dup with specific alignment restrictions. +class AddrMode6DupAlign : MemOperand, + ComplexPattern{ + let PrintMethod = "printAddrMode6Operand"; + let MIOperandInfo = (ops GPR:$addr, i32imm); + let EncoderMethod = "getAddrMode6DupAddressOpValue"; +} + +// Special version of addrmode6 to handle no allowed alignment encoding for +// VLD-dup instruction and checking the alignment is not specified. +def AddrMode6dupAlignNoneAsmOperand : AsmOperandClass { + let Name = "DupAlignedMemoryNone"; + let DiagnosticString = "alignment must be omitted"; +} +def addrmode6dupalignNone : AddrMode6DupAlign { + // The alignment specifier can only be omitted. + let ParserMatchClass = AddrMode6dupAlignNoneAsmOperand; +} + +// Special version of addrmode6 to handle 16-bit alignment encoding for VLD-dup +// instruction and checking the alignment value. +def AddrMode6dupAlign16AsmOperand : AsmOperandClass { + let Name = "DupAlignedMemory16"; + let DiagnosticString = "alignment must be 16 or omitted"; +} +def addrmode6dupalign16 : AddrMode6DupAlign { + // The alignment specifier can only be 16 or omitted. + let ParserMatchClass = AddrMode6dupAlign16AsmOperand; +} + +// Special version of addrmode6 to handle 32-bit alignment encoding for VLD-dup +// instruction and checking the alignment value. +def AddrMode6dupAlign32AsmOperand : AsmOperandClass { + let Name = "DupAlignedMemory32"; + let DiagnosticString = "alignment must be 32 or omitted"; +} +def addrmode6dupalign32 : AddrMode6DupAlign { + // The alignment specifier can only be 32 or omitted. + let ParserMatchClass = AddrMode6dupAlign32AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit alignment encoding for VLD +// instructions and checking the alignment value. +def AddrMode6dupAlign64AsmOperand : AsmOperandClass { + let Name = "DupAlignedMemory64"; + let DiagnosticString = "alignment must be 64 or omitted"; +} +def addrmode6dupalign64 : AddrMode6DupAlign { + // The alignment specifier can only be 64 or omitted. + let ParserMatchClass = AddrMode6dupAlign64AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit or 128-bit alignment encoding +// for VLD instructions and checking the alignment value. +def AddrMode6dupAlign64or128AsmOperand : AsmOperandClass { + let Name = "DupAlignedMemory64or128"; + let DiagnosticString = "alignment must be 64, 128 or omitted"; +} +def addrmode6dupalign64or128 : AddrMode6DupAlign { + // The alignment specifier can only be 64, 128 or omitted. + let ParserMatchClass = AddrMode6dupAlign64or128AsmOperand; +} + +// addrmodepc := pc + reg +// +def addrmodepc : MemOperand, + ComplexPattern { + let PrintMethod = "printAddrModePCOperand"; + let MIOperandInfo = (ops GPR, i32imm); +} + +// addr_offset_none := reg +// +def MemNoOffsetAsmOperand : AsmOperandClass { let Name = "MemNoOffset"; } +def addr_offset_none : MemOperand, + ComplexPattern { + let PrintMethod = "printAddrMode7Operand"; + let DecoderMethod = "DecodeAddrMode7Operand"; + let ParserMatchClass = MemNoOffsetAsmOperand; + let MIOperandInfo = (ops GPR:$base); +} + +def nohash_imm : Operand { + let PrintMethod = "printNoHashImmediate"; +} + +def CoprocNumAsmOperand : AsmOperandClass { + let Name = "CoprocNum"; + let ParserMethod = "parseCoprocNumOperand"; +} +def p_imm : Operand { + let PrintMethod = "printPImmediate"; + let ParserMatchClass = CoprocNumAsmOperand; + let DecoderMethod = "DecodeCoprocessor"; +} + +def CoprocRegAsmOperand : AsmOperandClass { + let Name = "CoprocReg"; + let ParserMethod = "parseCoprocRegOperand"; +} +def c_imm : Operand { + let PrintMethod = "printCImmediate"; + let ParserMatchClass = CoprocRegAsmOperand; +} +def CoprocOptionAsmOperand : AsmOperandClass { + let Name = "CoprocOption"; + let ParserMethod = "parseCoprocOptionOperand"; +} +def coproc_option_imm : Operand { + let PrintMethod = "printCoprocOptionImm"; + let ParserMatchClass = CoprocOptionAsmOperand; +} + +//===----------------------------------------------------------------------===// + +include "ARMInstrFormats.td" + +//===----------------------------------------------------------------------===// +// Multiclass helpers... +// + +/// AsI1_bin_irs - Defines a set of (op r, {mod_imm|r|so_reg}) patterns for a +/// binop that produces a value. +let TwoOperandAliasConstraint = "$Rn = $Rd" in +multiclass AsI1_bin_irs opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + SDPatternOperator opnode, bit Commutable = 0> { + // The register-immediate version is re-materializable. This is useful + // in particular for taking the address of a local. + let isReMaterializable = 1 in { + def ri : AsI1, + Sched<[WriteALU, ReadALU]> { + bits<4> Rd; + bits<4> Rn; + bits<12> imm; + let Inst{25} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-0} = imm; + } + } + def rr : AsI1, + Sched<[WriteALU, ReadALU, ReadALU]> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{25} = 0; + let isCommutable = Commutable; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-4} = 0b00000000; + let Inst{3-0} = Rm; + } + + def rsi : AsI1, + Sched<[WriteALUsi, ReadALU]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; + } + + def rsr : AsI1, + Sched<[WriteALUsr, ReadALUsr]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; + } +} + +/// AsI1_rbin_irs - Same as AsI1_bin_irs except the order of operands are +/// reversed. The 'rr' form is only defined for the disassembler; for codegen +/// it is equivalent to the AsI1_bin_irs counterpart. +let TwoOperandAliasConstraint = "$Rn = $Rd" in +multiclass AsI1_rbin_irs opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + SDNode opnode, bit Commutable = 0> { + // The register-immediate version is re-materializable. This is useful + // in particular for taking the address of a local. + let isReMaterializable = 1 in { + def ri : AsI1, + Sched<[WriteALU, ReadALU]> { + bits<4> Rd; + bits<4> Rn; + bits<12> imm; + let Inst{25} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-0} = imm; + } + } + def rr : AsI1, + Sched<[WriteALU, ReadALU, ReadALU]> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{11-4} = 0b00000000; + let Inst{25} = 0; + let Inst{3-0} = Rm; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; + } + + def rsi : AsI1, + Sched<[WriteALUsi, ReadALU]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; + } + + def rsr : AsI1, + Sched<[WriteALUsr, ReadALUsr]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; + } +} + +/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default. +/// +/// These opcodes will be converted to the real non-S opcodes by +/// AdjustInstrPostInstrSelection after giving them an optional CPSR operand. +let hasPostISelHook = 1, Defs = [CPSR] in { +multiclass AsI1_bin_s_irs { + def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p), + 4, iii, + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, mod_imm:$imm))]>, + Sched<[WriteALU, ReadALU]>; + + def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p), + 4, iir, + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>, + Sched<[WriteALU, ReadALU, ReadALU]> { + let isCommutable = Commutable; + } + def rsi : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift, pred:$p), + 4, iis, + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, + so_reg_imm:$shift))]>, + Sched<[WriteALUsi, ReadALU]>; + + def rsr : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$Rn, so_reg_reg:$shift, pred:$p), + 4, iis, + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, + so_reg_reg:$shift))]>, + Sched<[WriteALUSsr, ReadALUsr]>; +} +} + +/// AsI1_rbin_s_is - Same as AsI1_bin_s_irs, except selection DAG +/// operands are reversed. +let hasPostISelHook = 1, Defs = [CPSR] in { +multiclass AsI1_rbin_s_is { + def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p), + 4, iii, + [(set GPR:$Rd, CPSR, (opnode mod_imm:$imm, GPR:$Rn))]>, + Sched<[WriteALU, ReadALU]>; + + def rsi : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift, pred:$p), + 4, iis, + [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, + GPR:$Rn))]>, + Sched<[WriteALUsi, ReadALU]>; + + def rsr : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$Rn, so_reg_reg:$shift, pred:$p), + 4, iis, + [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, + GPR:$Rn))]>, + Sched<[WriteALUSsr, ReadALUsr]>; +} +} + +/// AI1_cmp_irs - Defines a set of (op r, {mod_imm|r|so_reg}) cmp / test +/// patterns. Similar to AsI1_bin_irs except the instruction does not produce +/// a explicit result, only implicitly set CPSR. +let isCompare = 1, Defs = [CPSR] in { +multiclass AI1_cmp_irs opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + SDPatternOperator opnode, bit Commutable = 0, + string rrDecoderMethod = ""> { + def ri : AI1, + Sched<[WriteCMP, ReadALU]> { + bits<4> Rn; + bits<12> imm; + let Inst{25} = 1; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b0000; + let Inst{11-0} = imm; + + let Unpredictable{15-12} = 0b1111; + } + def rr : AI1, + Sched<[WriteCMP, ReadALU, ReadALU]> { + bits<4> Rn; + bits<4> Rm; + let isCommutable = Commutable; + let Inst{25} = 0; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b0000; + let Inst{11-4} = 0b00000000; + let Inst{3-0} = Rm; + let DecoderMethod = rrDecoderMethod; + + let Unpredictable{15-12} = 0b1111; + } + def rsi : AI1, + Sched<[WriteCMPsi, ReadALU]> { + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b0000; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; + + let Unpredictable{15-12} = 0b1111; + } + def rsr : AI1, + Sched<[WriteCMPsr, ReadALU]> { + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b0000; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; + + let Unpredictable{15-12} = 0b1111; + } + +} +} + +/// AI_ext_rrot - A unary operation with two forms: one whose operand is a +/// register and one whose operand is a register rotated by 8/16/24. +/// FIXME: Remove the 'r' variant. Its rot_imm is zero. +class AI_ext_rrot opcod, string opc, PatFrag opnode> + : AExtI, + Requires<[IsARM, HasV6]>, Sched<[WriteALUsi]> { + bits<4> Rd; + bits<4> Rm; + bits<2> rot; + let Inst{19-16} = 0b1111; + let Inst{15-12} = Rd; + let Inst{11-10} = rot; + let Inst{3-0} = Rm; +} + +class AI_ext_rrot_np opcod, string opc> + : AExtI, + Requires<[IsARM, HasV6]>, Sched<[WriteALUsi]> { + bits<2> rot; + let Inst{19-16} = 0b1111; + let Inst{11-10} = rot; + } + +/// AI_exta_rrot - A binary operation with two forms: one whose operand is a +/// register and one whose operand is a register rotated by 8/16/24. +class AI_exta_rrot opcod, string opc, PatFrag opnode> + : AExtI, + Requires<[IsARM, HasV6]>, Sched<[WriteALUsr]> { + bits<4> Rd; + bits<4> Rm; + bits<4> Rn; + bits<2> rot; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-10} = rot; + let Inst{9-4} = 0b000111; + let Inst{3-0} = Rm; +} + +class AI_exta_rrot_np opcod, string opc> + : AExtI, + Requires<[IsARM, HasV6]>, Sched<[WriteALUsr]> { + bits<4> Rn; + bits<2> rot; + let Inst{19-16} = Rn; + let Inst{11-10} = rot; +} + +/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube. +let TwoOperandAliasConstraint = "$Rn = $Rd" in +multiclass AI1_adde_sube_irs opcod, string opc, SDNode opnode, + bit Commutable = 0> { + let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { + def ri : AsI1, + Requires<[IsARM]>, + Sched<[WriteALU, ReadALU]> { + bits<4> Rd; + bits<4> Rn; + bits<12> imm; + let Inst{25} = 1; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; + let Inst{11-0} = imm; + } + def rr : AsI1, + Requires<[IsARM]>, + Sched<[WriteALU, ReadALU, ReadALU]> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{11-4} = 0b00000000; + let Inst{25} = 0; + let isCommutable = Commutable; + let Inst{3-0} = Rm; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; + } + def rsi : AsI1, + Requires<[IsARM]>, + Sched<[WriteALUsi, ReadALU]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; + } + def rsr : AsI1, + Requires<[IsARM]>, + Sched<[WriteALUsr, ReadALUsr]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; + } + } +} + +/// AI1_rsc_irs - Define instructions and patterns for rsc +let TwoOperandAliasConstraint = "$Rn = $Rd" in +multiclass AI1_rsc_irs opcod, string opc, SDNode opnode> { + let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { + def ri : AsI1, + Requires<[IsARM]>, + Sched<[WriteALU, ReadALU]> { + bits<4> Rd; + bits<4> Rn; + bits<12> imm; + let Inst{25} = 1; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; + let Inst{11-0} = imm; + } + def rr : AsI1, + Sched<[WriteALU, ReadALU, ReadALU]> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{11-4} = 0b00000000; + let Inst{25} = 0; + let Inst{3-0} = Rm; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; + } + def rsi : AsI1, + Requires<[IsARM]>, + Sched<[WriteALUsi, ReadALU]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; + } + def rsr : AsI1, + Requires<[IsARM]>, + Sched<[WriteALUsr, ReadALUsr]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; + } + } +} + +let canFoldAsLoad = 1, isReMaterializable = 1 in { +multiclass AI_ldr1 { + // Note: We use the complex addrmode_imm12 rather than just an input + // GPR and a constrained immediate so that we can use this to match + // frame index references and avoid matching constant pool references. + def i12: AI2ldst<0b010, 1, isByte, (outs GPR:$Rt), (ins addrmode_imm12:$addr), + AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr", + [(set GPR:$Rt, (opnode addrmode_imm12:$addr))]> { + bits<4> Rt; + bits<17> addr; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = addr{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = addr{11-0}; // imm12 + } + def rs : AI2ldst<0b011, 1, isByte, (outs GPR:$Rt), (ins ldst_so_reg:$shift), + AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift", + [(set GPR:$Rt, (opnode ldst_so_reg:$shift))]> { + bits<4> Rt; + bits<17> shift; + let shift{4} = 0; // Inst{4} = 0 + let Inst{23} = shift{12}; // U (add = ('U' == 1)) + let Inst{19-16} = shift{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = shift{11-0}; + } +} +} + +let canFoldAsLoad = 1, isReMaterializable = 1 in { +multiclass AI_ldr1nopc { + // Note: We use the complex addrmode_imm12 rather than just an input + // GPR and a constrained immediate so that we can use this to match + // frame index references and avoid matching constant pool references. + def i12: AI2ldst<0b010, 1, isByte, (outs GPRnopc:$Rt), + (ins addrmode_imm12:$addr), + AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr", + [(set GPRnopc:$Rt, (opnode addrmode_imm12:$addr))]> { + bits<4> Rt; + bits<17> addr; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = addr{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = addr{11-0}; // imm12 + } + def rs : AI2ldst<0b011, 1, isByte, (outs GPRnopc:$Rt), + (ins ldst_so_reg:$shift), + AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift", + [(set GPRnopc:$Rt, (opnode ldst_so_reg:$shift))]> { + bits<4> Rt; + bits<17> shift; + let shift{4} = 0; // Inst{4} = 0 + let Inst{23} = shift{12}; // U (add = ('U' == 1)) + let Inst{19-16} = shift{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = shift{11-0}; + } +} +} + + +multiclass AI_str1 { + // Note: We use the complex addrmode_imm12 rather than just an input + // GPR and a constrained immediate so that we can use this to match + // frame index references and avoid matching constant pool references. + def i12 : AI2ldst<0b010, 0, isByte, (outs), + (ins GPR:$Rt, addrmode_imm12:$addr), + AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr", + [(opnode GPR:$Rt, addrmode_imm12:$addr)]> { + bits<4> Rt; + bits<17> addr; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = addr{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = addr{11-0}; // imm12 + } + def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPR:$Rt, ldst_so_reg:$shift), + AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift", + [(opnode GPR:$Rt, ldst_so_reg:$shift)]> { + bits<4> Rt; + bits<17> shift; + let shift{4} = 0; // Inst{4} = 0 + let Inst{23} = shift{12}; // U (add = ('U' == 1)) + let Inst{19-16} = shift{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = shift{11-0}; + } +} + +multiclass AI_str1nopc { + // Note: We use the complex addrmode_imm12 rather than just an input + // GPR and a constrained immediate so that we can use this to match + // frame index references and avoid matching constant pool references. + def i12 : AI2ldst<0b010, 0, isByte, (outs), + (ins GPRnopc:$Rt, addrmode_imm12:$addr), + AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr", + [(opnode GPRnopc:$Rt, addrmode_imm12:$addr)]> { + bits<4> Rt; + bits<17> addr; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = addr{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = addr{11-0}; // imm12 + } + def rs : AI2ldst<0b011, 0, isByte, (outs), + (ins GPRnopc:$Rt, ldst_so_reg:$shift), + AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift", + [(opnode GPRnopc:$Rt, ldst_so_reg:$shift)]> { + bits<4> Rt; + bits<17> shift; + let shift{4} = 0; // Inst{4} = 0 + let Inst{23} = shift{12}; // U (add = ('U' == 1)) + let Inst{19-16} = shift{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = shift{11-0}; + } +} + + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +// + +/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in +/// the function. The first operand is the ID# for this instruction, the second +/// is the index into the MachineConstantPool that this is, the third is the +/// size in bytes of this constant pool entry. +let hasSideEffects = 0, isNotDuplicable = 1 in +def CONSTPOOL_ENTRY : +PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, + i32imm:$size), NoItinerary, []>; + +/// A jumptable consisting of direct 32-bit addresses of the destination basic +/// blocks (either absolute, or relative to the start of the jump-table in PIC +/// mode). Used mostly in ARM and Thumb-1 modes. +def JUMPTABLE_ADDRS : +PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, + i32imm:$size), NoItinerary, []>; + +/// A jumptable consisting of 32-bit jump instructions. Used for Thumb-2 tables +/// that cannot be optimised to use TBB or TBH. +def JUMPTABLE_INSTS : +PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, + i32imm:$size), NoItinerary, []>; + +/// A jumptable consisting of 8-bit unsigned integers representing offsets from +/// a TBB instruction. +def JUMPTABLE_TBB : +PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, + i32imm:$size), NoItinerary, []>; + +/// A jumptable consisting of 16-bit unsigned integers representing offsets from +/// a TBH instruction. +def JUMPTABLE_TBH : +PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, + i32imm:$size), NoItinerary, []>; + + +// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE +// from removing one half of the matched pairs. That breaks PEI, which assumes +// these will always be in pairs, and asserts if it finds otherwise. Better way? +let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { +def ADJCALLSTACKUP : +PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary, + [(ARMcallseq_end timm:$amt1, timm:$amt2)]>; + +def ADJCALLSTACKDOWN : +PseudoInst<(outs), (ins i32imm:$amt, i32imm:$amt2, pred:$p), NoItinerary, + [(ARMcallseq_start timm:$amt, timm:$amt2)]>; +} + +def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary, + "hint", "\t$imm", [(int_arm_hint imm0_239:$imm)]>, + Requires<[IsARM, HasV6]> { + bits<8> imm; + let Inst{27-8} = 0b00110010000011110000; + let Inst{7-0} = imm; + let DecoderMethod = "DecodeHINTInstruction"; +} + +def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6K]>; +def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6K]>; +def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6K]>; +def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6K]>; +def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6K]>; +def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>; +def : InstAlias<"esb$p", (HINT 16, pred:$p)>, Requires<[IsARM, HasRAS]>; +def : InstAlias<"csdb$p", (HINT 20, pred:$p)>, Requires<[IsARM, HasV6K]>; + +def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", + "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (int_arm_sel GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV6]> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{3-0} = Rm; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; + let Inst{27-20} = 0b01101000; + let Inst{7-4} = 0b1011; + let Inst{11-8} = 0b1111; + let Unpredictable{11-8} = 0b1111; +} + +// The 16-bit operand $val can be used by a debugger to store more information +// about the breakpoint. +def BKPT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, + "bkpt", "\t$val", []>, Requires<[IsARM]> { + bits<16> val; + let Inst{3-0} = val{3-0}; + let Inst{19-8} = val{15-4}; + let Inst{27-20} = 0b00010010; + let Inst{31-28} = 0xe; // AL + let Inst{7-4} = 0b0111; +} +// default immediate for breakpoint mnemonic +def : InstAlias<"bkpt", (BKPT 0), 0>, Requires<[IsARM]>; + +def HLT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, + "hlt", "\t$val", []>, Requires<[IsARM, HasV8]> { + bits<16> val; + let Inst{3-0} = val{3-0}; + let Inst{19-8} = val{15-4}; + let Inst{27-20} = 0b00010000; + let Inst{31-28} = 0xe; // AL + let Inst{7-4} = 0b0111; +} + +// Change Processor State +// FIXME: We should use InstAlias to handle the optional operands. +class CPS + : AXI<(outs), iops, MiscFrm, NoItinerary, !strconcat("cps", asm_ops), + []>, Requires<[IsARM]> { + bits<2> imod; + bits<3> iflags; + bits<5> mode; + bit M; + + let Inst{31-28} = 0b1111; + let Inst{27-20} = 0b00010000; + let Inst{19-18} = imod; + let Inst{17} = M; // Enabled if mode is set; + let Inst{16-9} = 0b00000000; + let Inst{8-6} = iflags; + let Inst{5} = 0; + let Inst{4-0} = mode; +} + +let DecoderMethod = "DecodeCPSInstruction" in { +let M = 1 in + def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, imm0_31:$mode), + "$imod\t$iflags, $mode">; +let mode = 0, M = 0 in + def CPS2p : CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod\t$iflags">; + +let imod = 0, iflags = 0, M = 1 in + def CPS1p : CPS<(ins imm0_31:$mode), "\t$mode">; +} + +// Preload signals the memory system of possible future data/instruction access. +multiclass APreLoad read, bits<1> data, string opc> { + + def i12 : AXIM<(outs), (ins addrmode_imm12:$addr), AddrMode_i12, MiscFrm, + IIC_Preload, !strconcat(opc, "\t$addr"), + [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]>, + Sched<[WritePreLd]> { + bits<4> Rt; + bits<17> addr; + let Inst{31-26} = 0b111101; + let Inst{25} = 0; // 0 for immediate form + let Inst{24} = data; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{22} = read; + let Inst{21-20} = 0b01; + let Inst{19-16} = addr{16-13}; // Rn + let Inst{15-12} = 0b1111; + let Inst{11-0} = addr{11-0}; // imm12 + } + + def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload, + !strconcat(opc, "\t$shift"), + [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]>, + Sched<[WritePreLd]> { + bits<17> shift; + let Inst{31-26} = 0b111101; + let Inst{25} = 1; // 1 for register form + let Inst{24} = data; + let Inst{23} = shift{12}; // U (add = ('U' == 1)) + let Inst{22} = read; + let Inst{21-20} = 0b01; + let Inst{19-16} = shift{16-13}; // Rn + let Inst{15-12} = 0b1111; + let Inst{11-0} = shift{11-0}; + let Inst{4} = 0; + } +} + +defm PLD : APreLoad<1, 1, "pld">, Requires<[IsARM]>; +defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>; +defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>; + +def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary, + "setend\t$end", []>, Requires<[IsARM]>, Deprecated { + bits<1> end; + let Inst{31-10} = 0b1111000100000001000000; + let Inst{9} = end; + let Inst{8-0} = 0; +} + +def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", + [(int_arm_dbg imm0_15:$opt)]>, Requires<[IsARM, HasV7]> { + bits<4> opt; + let Inst{27-4} = 0b001100100000111100001111; + let Inst{3-0} = opt; +} + +// A8.8.247 UDF - Undefined (Encoding A1) +def UDF : AInoP<(outs), (ins imm0_65535:$imm16), MiscFrm, NoItinerary, + "udf", "\t$imm16", [(int_arm_undefined imm0_65535:$imm16)]> { + bits<16> imm16; + let Inst{31-28} = 0b1110; // AL + let Inst{27-25} = 0b011; + let Inst{24-20} = 0b11111; + let Inst{19-8} = imm16{15-4}; + let Inst{7-4} = 0b1111; + let Inst{3-0} = imm16{3-0}; +} + +/* + * A5.4 Permanently UNDEFINED instructions. + * + * For most targets use UDF #65006, for which the OS will generate SIGTRAP. + * Other UDF encodings generate SIGILL. + * + * NaCl's OS instead chooses an ARM UDF encoding that's also a UDF in Thumb. + * Encoding A1: + * 1110 0111 1111 iiii iiii iiii 1111 iiii + * Encoding T1: + * 1101 1110 iiii iiii + * It uses the following encoding: + * 1110 0111 1111 1110 1101 1110 1111 0000 + * - In ARM: UDF #60896; + * - In Thumb: UDF #254 followed by a branch-to-self. + */ +let isBarrier = 1, isTerminator = 1 in +def TRAPNaCl : AXI<(outs), (ins), MiscFrm, NoItinerary, + "trap", [(trap)]>, + Requires<[IsARM,UseNaClTrap]> { + let Inst = 0xe7fedef0; +} +let isBarrier = 1, isTerminator = 1 in +def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, + "trap", [(trap)]>, + Requires<[IsARM,DontUseNaClTrap]> { + let Inst = 0xe7ffdefe; +} + +// Address computation and loads and stores in PIC mode. +let isNotDuplicable = 1 in { +def PICADD : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), + 4, IIC_iALUr, + [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>, + Sched<[WriteALU, ReadALU]>; + +let AddedComplexity = 10 in { +def PICLDR : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), + 4, IIC_iLoad_r, + [(set GPR:$dst, (load addrmodepc:$addr))]>; + +def PICLDRH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p), + 4, IIC_iLoad_bh_r, + [(set GPR:$Rt, (zextloadi16 addrmodepc:$addr))]>; + +def PICLDRB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p), + 4, IIC_iLoad_bh_r, + [(set GPR:$Rt, (zextloadi8 addrmodepc:$addr))]>; + +def PICLDRSH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p), + 4, IIC_iLoad_bh_r, + [(set GPR:$Rt, (sextloadi16 addrmodepc:$addr))]>; + +def PICLDRSB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p), + 4, IIC_iLoad_bh_r, + [(set GPR:$Rt, (sextloadi8 addrmodepc:$addr))]>; +} +let AddedComplexity = 10 in { +def PICSTR : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), + 4, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>; + +def PICSTRH : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), + 4, IIC_iStore_bh_r, [(truncstorei16 GPR:$src, + addrmodepc:$addr)]>; + +def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), + 4, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>; +} +} // isNotDuplicable = 1 + + +// LEApcrel - Load a pc-relative address into a register without offending the +// assembler. +let hasSideEffects = 0, isReMaterializable = 1 in +// The 'adr' mnemonic encodes differently if the label is before or after +// the instruction. The {24-21} opcode bits are set by the fixup, as we don't +// know until then which form of the instruction will be used. +def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label), + MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []>, + Sched<[WriteALU, ReadALU]> { + bits<4> Rd; + bits<14> label; + let Inst{27-25} = 0b001; + let Inst{24} = 0; + let Inst{23-22} = label{13-12}; + let Inst{21} = 0; + let Inst{20} = 0; + let Inst{19-16} = 0b1111; + let Inst{15-12} = Rd; + let Inst{11-0} = label{11-0}; +} + +let hasSideEffects = 1 in { +def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p), + 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; + +def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd), + (ins i32imm:$label, pred:$p), + 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; +} + +//===----------------------------------------------------------------------===// +// Control Flow Instructions. +// + +let isReturn = 1, isTerminator = 1, isBarrier = 1 in { + // ARMV4T and above + def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, + "bx", "\tlr", [(ARMretflag)]>, + Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> { + let Inst{27-0} = 0b0001001011111111111100011110; + } + + // ARMV4 only + def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br, + "mov", "\tpc, lr", [(ARMretflag)]>, + Requires<[IsARM, NoV4T]>, Sched<[WriteBr]> { + let Inst{27-0} = 0b0001101000001111000000001110; + } + + // Exception return: N.b. doesn't set CPSR as far as we're concerned (it sets + // the user-space one). + def SUBS_PC_LR : ARMPseudoInst<(outs), (ins i32imm:$offset, pred:$p), + 4, IIC_Br, + [(ARMintretflag imm:$offset)]>; +} + +// Indirect branches +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { + // ARMV4T and above + def BX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst", + [(brind GPR:$dst)]>, + Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> { + bits<4> dst; + let Inst{31-4} = 0b1110000100101111111111110001; + let Inst{3-0} = dst; + } + + def BX_pred : AI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, + "bx", "\t$dst", [/* pattern left blank */]>, + Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> { + bits<4> dst; + let Inst{27-4} = 0b000100101111111111110001; + let Inst{3-0} = dst; + } +} + +// SP is marked as a use to prevent stack-pointer assignments that appear +// immediately before calls from potentially appearing dead. +let isCall = 1, + // FIXME: Do we really need a non-predicated version? If so, it should + // at least be a pseudo instruction expanding to the predicated version + // at MC lowering time. + Defs = [LR], Uses = [SP] in { + def BL : ABXI<0b1011, (outs), (ins arm_bl_target:$func), + IIC_Br, "bl\t$func", + [(ARMcall tglobaladdr:$func)]>, + Requires<[IsARM]>, Sched<[WriteBrL]> { + let Inst{31-28} = 0b1110; + bits<24> func; + let Inst{23-0} = func; + let DecoderMethod = "DecodeBranchImmInstruction"; + } + + def BL_pred : ABI<0b1011, (outs), (ins arm_bl_target:$func), + IIC_Br, "bl", "\t$func", + [(ARMcall_pred tglobaladdr:$func)]>, + Requires<[IsARM]>, Sched<[WriteBrL]> { + bits<24> func; + let Inst{23-0} = func; + let DecoderMethod = "DecodeBranchImmInstruction"; + } + + // ARMv5T and above + def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm, + IIC_Br, "blx\t$func", + [(ARMcall GPR:$func)]>, + Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { + bits<4> func; + let Inst{31-4} = 0b1110000100101111111111110011; + let Inst{3-0} = func; + } + + def BLX_pred : AI<(outs), (ins GPR:$func), BrMiscFrm, + IIC_Br, "blx", "\t$func", + [(ARMcall_pred GPR:$func)]>, + Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { + bits<4> func; + let Inst{27-4} = 0b000100101111111111110011; + let Inst{3-0} = func; + } + + // ARMv4T + // Note: Restrict $func to the tGPR regclass to prevent it being in LR. + def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func), + 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, + Requires<[IsARM, HasV4T]>, Sched<[WriteBr]>; + + // ARMv4 + def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func), + 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, + Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>; + + // mov lr, pc; b if callee is marked noreturn to avoid confusing the + // return stack predictor. + def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins arm_bl_target:$func), + 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, + Requires<[IsARM]>, Sched<[WriteBr]>; +} + +let isBranch = 1, isTerminator = 1 in { + // FIXME: should be able to write a pattern for ARMBrcond, but can't use + // a two-value operand where a dag node expects two operands. :( + def Bcc : ABI<0b1010, (outs), (ins arm_br_target:$target), + IIC_Br, "b", "\t$target", + [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>, + Sched<[WriteBr]> { + bits<24> target; + let Inst{23-0} = target; + let DecoderMethod = "DecodeBranchImmInstruction"; + } + + let isBarrier = 1 in { + // B is "predicable" since it's just a Bcc with an 'always' condition. + let isPredicable = 1 in + // FIXME: We shouldn't need this pseudo at all. Just using Bcc directly + // should be sufficient. + // FIXME: Is B really a Barrier? That doesn't seem right. + def B : ARMPseudoExpand<(outs), (ins arm_br_target:$target), 4, IIC_Br, + [(br bb:$target)], (Bcc arm_br_target:$target, + (ops 14, zero_reg))>, + Sched<[WriteBr]>; + + let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in { + def BR_JTr : ARMPseudoInst<(outs), + (ins GPR:$target, i32imm:$jt), + 0, IIC_Br, + [(ARMbrjt GPR:$target, tjumptable:$jt)]>, + Sched<[WriteBr]>; + def BR_JTm_i12 : ARMPseudoInst<(outs), + (ins addrmode_imm12:$target, i32imm:$jt), + 0, IIC_Br, + [(ARMbrjt (i32 (load addrmode_imm12:$target)), + tjumptable:$jt)]>, Sched<[WriteBrTbl]>; + def BR_JTm_rs : ARMPseudoInst<(outs), + (ins ldst_so_reg:$target, i32imm:$jt), + 0, IIC_Br, + [(ARMbrjt (i32 (load ldst_so_reg:$target)), + tjumptable:$jt)]>, Sched<[WriteBrTbl]>; + def BR_JTadd : ARMPseudoInst<(outs), + (ins GPR:$target, GPR:$idx, i32imm:$jt), + 0, IIC_Br, + [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt)]>, + Sched<[WriteBrTbl]>; + } // isNotDuplicable = 1, isIndirectBranch = 1 + } // isBarrier = 1 + +} + +// BLX (immediate) +def BLXi : AXI<(outs), (ins arm_blx_target:$target), BrMiscFrm, NoItinerary, + "blx\t$target", []>, + Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { + let Inst{31-25} = 0b1111101; + bits<25> target; + let Inst{23-0} = target{24-1}; + let Inst{24} = target{0}; + let isCall = 1; +} + +// Branch and Exchange Jazelle +def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", + [/* pattern left blank */]>, Sched<[WriteBr]> { + bits<4> func; + let Inst{23-20} = 0b0010; + let Inst{19-8} = 0xfff; + let Inst{7-4} = 0b0010; + let Inst{3-0} = func; + let isBranch = 1; +} + +// Tail calls. + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { + def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst), IIC_Br, []>, + Sched<[WriteBr]>; + + def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>, + Sched<[WriteBr]>; + + def TAILJMPd : ARMPseudoExpand<(outs), (ins arm_br_target:$dst), + 4, IIC_Br, [], + (Bcc arm_br_target:$dst, (ops 14, zero_reg))>, + Requires<[IsARM]>, Sched<[WriteBr]>; + + def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst), + 4, IIC_Br, [], + (BX GPR:$dst)>, Sched<[WriteBr]>, + Requires<[IsARM, HasV4T]>; +} + +// Secure Monitor Call is a system instruction. +def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", + []>, Requires<[IsARM, HasTrustZone]> { + bits<4> opt; + let Inst{23-4} = 0b01100000000000000111; + let Inst{3-0} = opt; +} +def : MnemonicAlias<"smi", "smc">; + +// Supervisor Call (Software Interrupt) +let isCall = 1, Uses = [SP] in { +def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []>, + Sched<[WriteBr]> { + bits<24> svc; + let Inst{23-0} = svc; +} +} + +// Store Return State +class SRSI + : XI<(outs), (ins imm0_31:$mode), AddrModeNone, 4, IndexModeNone, BrFrm, + NoItinerary, asm, "", []> { + bits<5> mode; + let Inst{31-28} = 0b1111; + let Inst{27-25} = 0b100; + let Inst{22} = 1; + let Inst{21} = wb; + let Inst{20} = 0; + let Inst{19-16} = 0b1101; // SP + let Inst{15-5} = 0b00000101000; + let Inst{4-0} = mode; +} + +def SRSDA : SRSI<0, "srsda\tsp, $mode"> { + let Inst{24-23} = 0; +} +def SRSDA_UPD : SRSI<1, "srsda\tsp!, $mode"> { + let Inst{24-23} = 0; +} +def SRSDB : SRSI<0, "srsdb\tsp, $mode"> { + let Inst{24-23} = 0b10; +} +def SRSDB_UPD : SRSI<1, "srsdb\tsp!, $mode"> { + let Inst{24-23} = 0b10; +} +def SRSIA : SRSI<0, "srsia\tsp, $mode"> { + let Inst{24-23} = 0b01; +} +def SRSIA_UPD : SRSI<1, "srsia\tsp!, $mode"> { + let Inst{24-23} = 0b01; +} +def SRSIB : SRSI<0, "srsib\tsp, $mode"> { + let Inst{24-23} = 0b11; +} +def SRSIB_UPD : SRSI<1, "srsib\tsp!, $mode"> { + let Inst{24-23} = 0b11; +} + +def : ARMInstAlias<"srsda $mode", (SRSDA imm0_31:$mode)>; +def : ARMInstAlias<"srsda $mode!", (SRSDA_UPD imm0_31:$mode)>; + +def : ARMInstAlias<"srsdb $mode", (SRSDB imm0_31:$mode)>; +def : ARMInstAlias<"srsdb $mode!", (SRSDB_UPD imm0_31:$mode)>; + +def : ARMInstAlias<"srsia $mode", (SRSIA imm0_31:$mode)>; +def : ARMInstAlias<"srsia $mode!", (SRSIA_UPD imm0_31:$mode)>; + +def : ARMInstAlias<"srsib $mode", (SRSIB imm0_31:$mode)>; +def : ARMInstAlias<"srsib $mode!", (SRSIB_UPD imm0_31:$mode)>; + +// Return From Exception +class RFEI + : XI<(outs), (ins GPR:$Rn), AddrModeNone, 4, IndexModeNone, BrFrm, + NoItinerary, asm, "", []> { + bits<4> Rn; + let Inst{31-28} = 0b1111; + let Inst{27-25} = 0b100; + let Inst{22} = 0; + let Inst{21} = wb; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-0} = 0xa00; +} + +def RFEDA : RFEI<0, "rfeda\t$Rn"> { + let Inst{24-23} = 0; +} +def RFEDA_UPD : RFEI<1, "rfeda\t$Rn!"> { + let Inst{24-23} = 0; +} +def RFEDB : RFEI<0, "rfedb\t$Rn"> { + let Inst{24-23} = 0b10; +} +def RFEDB_UPD : RFEI<1, "rfedb\t$Rn!"> { + let Inst{24-23} = 0b10; +} +def RFEIA : RFEI<0, "rfeia\t$Rn"> { + let Inst{24-23} = 0b01; +} +def RFEIA_UPD : RFEI<1, "rfeia\t$Rn!"> { + let Inst{24-23} = 0b01; +} +def RFEIB : RFEI<0, "rfeib\t$Rn"> { + let Inst{24-23} = 0b11; +} +def RFEIB_UPD : RFEI<1, "rfeib\t$Rn!"> { + let Inst{24-23} = 0b11; +} + +// Hypervisor Call is a system instruction +let isCall = 1 in { +def HVC : AInoP< (outs), (ins imm0_65535:$imm), BrFrm, NoItinerary, + "hvc", "\t$imm", []>, + Requires<[IsARM, HasVirtualization]> { + bits<16> imm; + + // Even though HVC isn't predicable, it's encoding includes a condition field. + // The instruction is undefined if the condition field is 0xf otherwise it is + // unpredictable if it isn't condition AL (0xe). + let Inst{31-28} = 0b1110; + let Unpredictable{31-28} = 0b1111; + let Inst{27-24} = 0b0001; + let Inst{23-20} = 0b0100; + let Inst{19-8} = imm{15-4}; + let Inst{7-4} = 0b0111; + let Inst{3-0} = imm{3-0}; +} +} + +// Return from exception in Hypervisor mode. +let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in +def ERET : ABI<0b0001, (outs), (ins), NoItinerary, "eret", "", []>, + Requires<[IsARM, HasVirtualization]> { + let Inst{23-0} = 0b011000000000000001101110; +} + +//===----------------------------------------------------------------------===// +// Load / Store Instructions. +// + +// Load + + +defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si, load>; +defm LDRB : AI_ldr1nopc<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si, + zextloadi8>; +defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si, store>; +defm STRB : AI_str1nopc<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si, + truncstorei8>; + +// Special LDR for loads from non-pc-relative constpools. +let canFoldAsLoad = 1, mayLoad = 1, hasSideEffects = 0, + isReMaterializable = 1, isCodeGenOnly = 1 in +def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr), + AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr", + []> { + bits<4> Rt; + bits<17> addr; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = 0b1111; + let Inst{15-12} = Rt; + let Inst{11-0} = addr{11-0}; // imm12 +} + +// Loads with zero extension +def LDRH : AI3ld<0b1011, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm, + IIC_iLoad_bh_r, "ldrh", "\t$Rt, $addr", + [(set GPR:$Rt, (zextloadi16 addrmode3:$addr))]>; + +// Loads with sign extension +def LDRSH : AI3ld<0b1111, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm, + IIC_iLoad_bh_r, "ldrsh", "\t$Rt, $addr", + [(set GPR:$Rt, (sextloadi16 addrmode3:$addr))]>; + +def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm, + IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr", + [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>; + +let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { + // Load doubleword + def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode3:$addr), + LdMiscFrm, IIC_iLoad_d_r, "ldrd", "\t$Rt, $Rt2, $addr", []>, + Requires<[IsARM, HasV5TE]>; +} + +def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "lda", "\t$Rt, $addr", []>; +def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldab", "\t$Rt, $addr", []>; +def LDAH : AIldracq<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldah", "\t$Rt, $addr", []>; + +// Indexed loads +multiclass AI2_ldridx { + def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addrmode_imm12_pre:$addr), IndexModePre, LdFrm, iii, + opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + bits<17> addr; + let Inst{25} = 0; + let Inst{23} = addr{12}; + let Inst{19-16} = addr{16-13}; + let Inst{11-0} = addr{11-0}; + let DecoderMethod = "DecodeLDRPreImm"; + } + + def _PRE_REG : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), + (ins ldst_so_reg:$addr), IndexModePre, LdFrm, iir, + opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + bits<17> addr; + let Inst{25} = 1; + let Inst{23} = addr{12}; + let Inst{19-16} = addr{16-13}; + let Inst{11-0} = addr{11-0}; + let Inst{4} = 0; + let DecoderMethod = "DecodeLDRPreReg"; + } + + def _POST_REG : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, LdFrm, iir, + opc, "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + let Inst{4} = 0; + + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; + } + + def _POST_IMM : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, LdFrm, iii, + opc, "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 0; + let Inst{23} = offset{12}; + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; + } + +} + +let mayLoad = 1, hasSideEffects = 0 in { +// FIXME: for LDR_PRE_REG etc. the itineray should be either IIC_iLoad_ru or +// IIC_iLoad_siu depending on whether it the offset register is shifted. +defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_iu, IIC_iLoad_ru>; +defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>; +} + +multiclass AI3_ldridx op, string opc, InstrItinClass itin> { + def _PRE : AI3ldstidx { + bits<14> addr; + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr{12-9}; // Rn + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{3-0} = addr{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; + } + def _POST : AI3ldstidx { + bits<10> offset; + bits<4> addr; + let Inst{23} = offset{8}; // U bit + let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr; + let Inst{11-8} = offset{7-4}; // imm7_4/zero + let Inst{3-0} = offset{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; + } +} + +let mayLoad = 1, hasSideEffects = 0 in { +defm LDRH : AI3_ldridx<0b1011, "ldrh", IIC_iLoad_bh_ru>; +defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>; +defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>; +let hasExtraDefRegAllocReq = 1 in { +def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), + (ins addrmode3_pre:$addr), IndexModePre, + LdMiscFrm, IIC_iLoad_d_ru, + "ldrd", "\t$Rt, $Rt2, $addr!", + "$addr.base = $Rn_wb", []> { + bits<14> addr; + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr{12-9}; // Rn + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{3-0} = addr{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; +} +def LDRD_POST: AI3ldstidx<0b1101, 0, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am3offset:$offset), + IndexModePost, LdMiscFrm, IIC_iLoad_d_ru, + "ldrd", "\t$Rt, $Rt2, $addr, $offset", + "$addr.base = $Rn_wb", []> { + bits<10> offset; + bits<4> addr; + let Inst{23} = offset{8}; // U bit + let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr; + let Inst{11-8} = offset{7-4}; // imm7_4/zero + let Inst{3-0} = offset{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; +} +} // hasExtraDefRegAllocReq = 1 +} // mayLoad = 1, hasSideEffects = 0 + +// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT. +let mayLoad = 1, hasSideEffects = 0 in { +def LDRT_POST_REG : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, LdFrm, IIC_iLoad_ru, + "ldrt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; + let Inst{21} = 1; // overwrite + let Inst{19-16} = addr; + let Inst{11-5} = offset{11-5}; + let Inst{4} = 0; + let Inst{3-0} = offset{3-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} + +def LDRT_POST_IMM + : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, LdFrm, IIC_iLoad_ru, + "ldrt", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 0; + let Inst{23} = offset{12}; + let Inst{21} = 1; // overwrite + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} + +def LDRBT_POST_REG : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, LdFrm, IIC_iLoad_bh_ru, + "ldrbt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; + let Inst{21} = 1; // overwrite + let Inst{19-16} = addr; + let Inst{11-5} = offset{11-5}; + let Inst{4} = 0; + let Inst{3-0} = offset{3-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} + +def LDRBT_POST_IMM + : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, LdFrm, IIC_iLoad_bh_ru, + "ldrbt", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 0; + let Inst{23} = offset{12}; + let Inst{21} = 1; // overwrite + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} + +multiclass AI3ldrT op, string opc> { + def i : AI3ldstidxT { + bits<9> offset; + let Inst{23} = offset{8}; + let Inst{22} = 1; + let Inst{11-8} = offset{7-4}; + let Inst{3-0} = offset{3-0}; + } + def r : AI3ldstidxT { + bits<5> Rm; + let Inst{23} = Rm{4}; + let Inst{22} = 0; + let Inst{11-8} = 0; + let Unpredictable{11-8} = 0b1111; + let Inst{3-0} = Rm{3-0}; + let DecoderMethod = "DecodeLDR"; + } +} + +defm LDRSBT : AI3ldrT<0b1101, "ldrsbt">; +defm LDRHT : AI3ldrT<0b1011, "ldrht">; +defm LDRSHT : AI3ldrT<0b1111, "ldrsht">; +} + +def LDRT_POST + : ARMAsmPseudo<"ldrt${q} $Rt, $addr", (ins addr_offset_none:$addr, pred:$q), + (outs GPR:$Rt)>; + +def LDRBT_POST + : ARMAsmPseudo<"ldrbt${q} $Rt, $addr", (ins addr_offset_none:$addr, pred:$q), + (outs GPR:$Rt)>; + +// Pseudo instruction ldr Rt, =immediate +def LDRConstPool + : ARMAsmPseudo<"ldr${q} $Rt, $immediate", + (ins const_pool_asm_imm:$immediate, pred:$q), + (outs GPR:$Rt)>; + +// Store + +// Stores with truncate +def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm, + IIC_iStore_bh_r, "strh", "\t$Rt, $addr", + [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>; + +// Store doubleword +let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { + def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr), + StMiscFrm, IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>, + Requires<[IsARM, HasV5TE]> { + let Inst{21} = 0; + } +} + +// Indexed stores +multiclass AI2_stridx { + def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addrmode_imm12_pre:$addr), IndexModePre, + StFrm, iii, + opc, "\t$Rt, $addr!", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { + bits<17> addr; + let Inst{25} = 0; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = addr{16-13}; // Rn + let Inst{11-0} = addr{11-0}; // imm12 + let DecoderMethod = "DecodeSTRPreImm"; + } + + def _PRE_REG : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), + (ins GPR:$Rt, ldst_so_reg:$addr), + IndexModePre, StFrm, iir, + opc, "\t$Rt, $addr!", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { + bits<17> addr; + let Inst{25} = 1; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = addr{16-13}; // Rn + let Inst{11-0} = addr{11-0}; + let Inst{4} = 0; // Inst{4} = 0 + let DecoderMethod = "DecodeSTRPreReg"; + } + def _POST_REG : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, StFrm, iir, + opc, "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + let Inst{4} = 0; + + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; + } + + def _POST_IMM : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, StFrm, iii, + opc, "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 0; + let Inst{23} = offset{12}; + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; + } +} + +let mayStore = 1, hasSideEffects = 0 in { +// FIXME: for STR_PRE_REG etc. the itineray should be either IIC_iStore_ru or +// IIC_iStore_siu depending on whether it the offset register is shifted. +defm STR : AI2_stridx<0, "str", IIC_iStore_iu, IIC_iStore_ru>; +defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_iu, IIC_iStore_bh_ru>; +} + +def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr, + am2offset_reg:$offset), + (STR_POST_REG GPR:$Rt, addr_offset_none:$addr, + am2offset_reg:$offset)>; +def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr, + am2offset_imm:$offset), + (STR_POST_IMM GPR:$Rt, addr_offset_none:$addr, + am2offset_imm:$offset)>; +def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr, + am2offset_reg:$offset), + (STRB_POST_REG GPR:$Rt, addr_offset_none:$addr, + am2offset_reg:$offset)>; +def : ARMPat<(post_truncsti8 GPR:$Rt, addr_offset_none:$addr, + am2offset_imm:$offset), + (STRB_POST_IMM GPR:$Rt, addr_offset_none:$addr, + am2offset_imm:$offset)>; + +// Pseudo-instructions for pattern matching the pre-indexed stores. We can't +// put the patterns on the instruction definitions directly as ISel wants +// the address base and offset to be separate operands, not a single +// complex operand like we represent the instructions themselves. The +// pseudos map between the two. +let usesCustomInserter = 1, + Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in { +def STRi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPR:$Rn_wb, + (pre_store GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>; +def STRr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPR:$Rn_wb, + (pre_store GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>; +def STRBi_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am2offset_imm:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPR:$Rn_wb, + (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_imm:$offset))]>; +def STRBr_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am2offset_reg:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPR:$Rn_wb, + (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset_reg:$offset))]>; +def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am3offset:$offset, pred:$p), + 4, IIC_iStore_ru, + [(set GPR:$Rn_wb, + (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>; +} + + + +def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addrmode3_pre:$addr), IndexModePre, + StMiscFrm, IIC_iStore_bh_ru, + "strh", "\t$Rt, $addr!", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { + bits<14> addr; + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr{12-9}; // Rn + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{3-0} = addr{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; +} + +def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am3offset:$offset), + IndexModePost, StMiscFrm, IIC_iStore_bh_ru, + "strh", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", + [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt, + addr_offset_none:$addr, + am3offset:$offset))]> { + bits<10> offset; + bits<4> addr; + let Inst{23} = offset{8}; // U bit + let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr; + let Inst{11-8} = offset{7-4}; // imm7_4/zero + let Inst{3-0} = offset{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; +} + +let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { +def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rt2, addrmode3_pre:$addr), + IndexModePre, StMiscFrm, IIC_iStore_d_ru, + "strd", "\t$Rt, $Rt2, $addr!", + "$addr.base = $Rn_wb", []> { + bits<14> addr; + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr{12-9}; // Rn + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{3-0} = addr{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; +} + +def STRD_POST: AI3ldstidx<0b1111, 0, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr, + am3offset:$offset), + IndexModePost, StMiscFrm, IIC_iStore_d_ru, + "strd", "\t$Rt, $Rt2, $addr, $offset", + "$addr.base = $Rn_wb", []> { + bits<10> offset; + bits<4> addr; + let Inst{23} = offset{8}; // U bit + let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr; + let Inst{11-8} = offset{7-4}; // imm7_4/zero + let Inst{3-0} = offset{3-0}; // imm3_0/Rm + let DecoderMethod = "DecodeAddrMode3Instruction"; +} +} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 + +// STRT, STRBT, and STRHT + +def STRBT_POST_REG : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, StFrm, IIC_iStore_bh_ru, + "strbt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; + let Inst{21} = 1; // overwrite + let Inst{19-16} = addr; + let Inst{11-5} = offset{11-5}; + let Inst{4} = 0; + let Inst{3-0} = offset{3-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} + +def STRBT_POST_IMM + : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, StFrm, IIC_iStore_bh_ru, + "strbt", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 0; + let Inst{23} = offset{12}; + let Inst{21} = 1; // overwrite + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} + +def STRBT_POST + : ARMAsmPseudo<"strbt${q} $Rt, $addr", + (ins GPR:$Rt, addr_offset_none:$addr, pred:$q)>; + +let mayStore = 1, hasSideEffects = 0 in { +def STRT_POST_REG : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), + IndexModePost, StFrm, IIC_iStore_ru, + "strt", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 1; + let Inst{23} = offset{12}; + let Inst{21} = 1; // overwrite + let Inst{19-16} = addr; + let Inst{11-5} = offset{11-5}; + let Inst{4} = 0; + let Inst{3-0} = offset{3-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} + +def STRT_POST_IMM + : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, StFrm, IIC_iStore_ru, + "strt", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> addr; + let Inst{25} = 0; + let Inst{23} = offset{12}; + let Inst{21} = 1; // overwrite + let Inst{19-16} = addr; + let Inst{11-0} = offset{11-0}; + let DecoderMethod = "DecodeAddrMode2IdxInstruction"; +} +} + +def STRT_POST + : ARMAsmPseudo<"strt${q} $Rt, $addr", + (ins GPR:$Rt, addr_offset_none:$addr, pred:$q)>; + +multiclass AI3strT op, string opc> { + def i : AI3ldstidxT { + bits<9> offset; + let Inst{23} = offset{8}; + let Inst{22} = 1; + let Inst{11-8} = offset{7-4}; + let Inst{3-0} = offset{3-0}; + } + def r : AI3ldstidxT { + bits<5> Rm; + let Inst{23} = Rm{4}; + let Inst{22} = 0; + let Inst{11-8} = 0; + let Inst{3-0} = Rm{3-0}; + } +} + + +defm STRHT : AI3strT<0b1011, "strht">; + +def STL : AIstrrel<0b00, (outs), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stl", "\t$Rt, $addr", []>; +def STLB : AIstrrel<0b10, (outs), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlb", "\t$Rt, $addr", []>; +def STLH : AIstrrel<0b11, (outs), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlh", "\t$Rt, $addr", []>; + +//===----------------------------------------------------------------------===// +// Load / store multiple Instructions. +// + +multiclass arm_ldst_mult { + // IA is the default, so no need for an explicit suffix on the + // mnemonic here. Without it is the canonical spelling. + def IA : + AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IndexModeNone, f, itin, + !strconcat(asm, "${p}\t$Rn, $regs", sfx), "", []> { + let Inst{24-23} = 0b01; // Increment After + let Inst{22} = P_bit; + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + } + def IA_UPD : + AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IndexModeUpd, f, itin_upd, + !strconcat(asm, "${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { + let Inst{24-23} = 0b01; // Increment After + let Inst{22} = P_bit; + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + + let DecoderMethod = "DecodeMemMultipleWritebackInstruction"; + } + def DA : + AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IndexModeNone, f, itin, + !strconcat(asm, "da${p}\t$Rn, $regs", sfx), "", []> { + let Inst{24-23} = 0b00; // Decrement After + let Inst{22} = P_bit; + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + } + def DA_UPD : + AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IndexModeUpd, f, itin_upd, + !strconcat(asm, "da${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { + let Inst{24-23} = 0b00; // Decrement After + let Inst{22} = P_bit; + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + + let DecoderMethod = "DecodeMemMultipleWritebackInstruction"; + } + def DB : + AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IndexModeNone, f, itin, + !strconcat(asm, "db${p}\t$Rn, $regs", sfx), "", []> { + let Inst{24-23} = 0b10; // Decrement Before + let Inst{22} = P_bit; + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + } + def DB_UPD : + AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IndexModeUpd, f, itin_upd, + !strconcat(asm, "db${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { + let Inst{24-23} = 0b10; // Decrement Before + let Inst{22} = P_bit; + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + + let DecoderMethod = "DecodeMemMultipleWritebackInstruction"; + } + def IB : + AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IndexModeNone, f, itin, + !strconcat(asm, "ib${p}\t$Rn, $regs", sfx), "", []> { + let Inst{24-23} = 0b11; // Increment Before + let Inst{22} = P_bit; + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + } + def IB_UPD : + AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IndexModeUpd, f, itin_upd, + !strconcat(asm, "ib${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { + let Inst{24-23} = 0b11; // Increment Before + let Inst{22} = P_bit; + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + + let DecoderMethod = "DecodeMemMultipleWritebackInstruction"; + } +} + +let hasSideEffects = 0 in { + +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in +defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m, + IIC_iLoad_mu>, ComplexDeprecationPredicate<"ARMLoad">; + +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m, + IIC_iStore_mu>, + ComplexDeprecationPredicate<"ARMStore">; + +} // hasSideEffects + +// FIXME: remove when we have a way to marking a MI with these properties. +// FIXME: Should pc be an implicit operand like PICADD, etc? +let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, + hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in +def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, + reglist:$regs, variable_ops), + 4, IIC_iLoad_mBr, [], + (LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>, + RegConstraint<"$Rn = $wb">; + +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in +defm sysLDM : arm_ldst_mult<"ldm", " ^", 1, 1, LdStMulFrm, IIC_iLoad_m, + IIC_iLoad_mu>; + +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m, + IIC_iStore_mu>; + + + +//===----------------------------------------------------------------------===// +// Move Instructions. +// + +let hasSideEffects = 0, isMoveReg = 1 in +def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, + "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> { + bits<4> Rd; + bits<4> Rm; + + let Inst{19-16} = 0b0000; + let Inst{11-4} = 0b00000000; + let Inst{25} = 0; + let Inst{3-0} = Rm; + let Inst{15-12} = Rd; +} + +// A version for the smaller set of tail call registers. +let hasSideEffects = 0 in +def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, + IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> { + bits<4> Rd; + bits<4> Rm; + + let Inst{11-4} = 0b00000000; + let Inst{25} = 0; + let Inst{3-0} = Rm; + let Inst{15-12} = Rd; +} + +def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src), + DPSoRegRegFrm, IIC_iMOVsr, + "mov", "\t$Rd, $src", + [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP, + Sched<[WriteALU]> { + bits<4> Rd; + bits<12> src; + let Inst{15-12} = Rd; + let Inst{19-16} = 0b0000; + let Inst{11-8} = src{11-8}; + let Inst{7} = 0; + let Inst{6-5} = src{6-5}; + let Inst{4} = 1; + let Inst{3-0} = src{3-0}; + let Inst{25} = 0; +} + +def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src), + DPSoRegImmFrm, IIC_iMOVsr, + "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg_imm:$src)]>, + UnaryDP, Sched<[WriteALU]> { + bits<4> Rd; + bits<12> src; + let Inst{15-12} = Rd; + let Inst{19-16} = 0b0000; + let Inst{11-5} = src{11-5}; + let Inst{4} = 0; + let Inst{3-0} = src{3-0}; + let Inst{25} = 0; +} + +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in +def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm, IIC_iMOVi, + "mov", "\t$Rd, $imm", [(set GPR:$Rd, mod_imm:$imm)]>, UnaryDP, + Sched<[WriteALU]> { + bits<4> Rd; + bits<12> imm; + let Inst{25} = 1; + let Inst{15-12} = Rd; + let Inst{19-16} = 0b0000; + let Inst{11-0} = imm; +} + +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in +def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins imm0_65535_expr:$imm), + DPFrm, IIC_iMOVi, + "movw", "\t$Rd, $imm", + [(set GPR:$Rd, imm0_65535:$imm)]>, + Requires<[IsARM, HasV6T2]>, UnaryDP, Sched<[WriteALU]> { + bits<4> Rd; + bits<16> imm; + let Inst{15-12} = Rd; + let Inst{11-0} = imm{11-0}; + let Inst{19-16} = imm{15-12}; + let Inst{20} = 0; + let Inst{25} = 1; + let DecoderMethod = "DecodeArmMOVTWInstruction"; +} + +def : InstAlias<"mov${p} $Rd, $imm", + (MOVi16 GPR:$Rd, imm0_65535_expr:$imm, pred:$p), 0>, + Requires<[IsARM, HasV6T2]>; + +def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), + (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, + Sched<[WriteALU]>; + +let Constraints = "$src = $Rd" in { +def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd), + (ins GPR:$src, imm0_65535_expr:$imm), + DPFrm, IIC_iMOVi, + "movt", "\t$Rd, $imm", + [(set GPRnopc:$Rd, + (or (and GPR:$src, 0xffff), + lo16AllZero:$imm))]>, UnaryDP, + Requires<[IsARM, HasV6T2]>, Sched<[WriteALU]> { + bits<4> Rd; + bits<16> imm; + let Inst{15-12} = Rd; + let Inst{11-0} = imm{11-0}; + let Inst{19-16} = imm{15-12}; + let Inst{20} = 0; + let Inst{25} = 1; + let DecoderMethod = "DecodeArmMOVTWInstruction"; +} + +def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), + (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, + Sched<[WriteALU]>; + +} // Constraints + +def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>, + Requires<[IsARM, HasV6T2]>; + +let Uses = [CPSR] in +def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi, + [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP, + Requires<[IsARM]>, Sched<[WriteALU]>; + +// These aren't really mov instructions, but we have to define them this way +// due to flag operands. + +let Defs = [CPSR] in { +def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, + [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP, + Sched<[WriteALU]>, Requires<[IsARM]>; +def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, + [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP, + Sched<[WriteALU]>, Requires<[IsARM]>; +} + +//===----------------------------------------------------------------------===// +// Extend Instructions. +// + +// Sign extenders + +def SXTB : AI_ext_rrot<0b01101010, + "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; +def SXTH : AI_ext_rrot<0b01101011, + "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; + +def SXTAB : AI_exta_rrot<0b01101010, + "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; +def SXTAH : AI_exta_rrot<0b01101011, + "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; + +def : ARMV6Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, rot_imm:$rot), i8)), + (SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : ARMV6Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), + i16)), + (SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; + +def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">; +def : ARMV6Pat<(int_arm_sxtb16 GPR:$Src), + (SXTB16 GPR:$Src, 0)>; + +def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">; +def : ARMV6Pat<(int_arm_sxtab16 GPR:$LHS, GPR:$RHS), + (SXTAB16 GPR:$LHS, GPR:$RHS, 0)>; + +// Zero extenders + +let AddedComplexity = 16 in { +def UXTB : AI_ext_rrot<0b01101110, + "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>; +def UXTH : AI_ext_rrot<0b01101111, + "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>; +def UXTB16 : AI_ext_rrot<0b01101100, + "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; + +// FIXME: This pattern incorrectly assumes the shl operator is a rotate. +// The transformation should probably be done as a combiner action +// instead so we can include a check for masking back in the upper +// eight bits of the source into the lower eight bits of the result. +//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), +// (UXTB16r_rot GPR:$Src, 3)>; +def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), + (UXTB16 GPR:$Src, 1)>; +def : ARMV6Pat<(int_arm_uxtb16 GPR:$Src), + (UXTB16 GPR:$Src, 0)>; + +def UXTAB : AI_exta_rrot<0b01101110, "uxtab", + BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; +def UXTAH : AI_exta_rrot<0b01101111, "uxtah", + BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; + +def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)), + (UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), + (UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +} + +// This isn't safe in general, the add is two 16-bit units, not a 32-bit add. +def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">; +def : ARMV6Pat<(int_arm_uxtab16 GPR:$LHS, GPR:$RHS), + (UXTAB16 GPR:$LHS, GPR:$RHS, 0)>; + + +def SBFX : I<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, imm0_31:$lsb, imm1_32:$width), + AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, + "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>, + Requires<[IsARM, HasV6T2]> { + bits<4> Rd; + bits<4> Rn; + bits<5> lsb; + bits<5> width; + let Inst{27-21} = 0b0111101; + let Inst{6-4} = 0b101; + let Inst{20-16} = width; + let Inst{15-12} = Rd; + let Inst{11-7} = lsb; + let Inst{3-0} = Rn; +} + +def UBFX : I<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, imm0_31:$lsb, imm1_32:$width), + AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, + "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>, + Requires<[IsARM, HasV6T2]> { + bits<4> Rd; + bits<4> Rn; + bits<5> lsb; + bits<5> width; + let Inst{27-21} = 0b0111111; + let Inst{6-4} = 0b101; + let Inst{20-16} = width; + let Inst{15-12} = Rd; + let Inst{11-7} = lsb; + let Inst{3-0} = Rn; +} + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions. +// + +let isAdd = 1 in +defm ADD : AsI1_bin_irs<0b0100, "add", + IIC_iALUi, IIC_iALUr, IIC_iALUsr, add, 1>; +defm SUB : AsI1_bin_irs<0b0010, "sub", + IIC_iALUi, IIC_iALUr, IIC_iALUsr, sub>; + +// ADD and SUB with 's' bit set. +// +// Currently, ADDS/SUBS are pseudo opcodes that exist only in the +// selection DAG. They are "lowered" to real ADD/SUB opcodes by +// AdjustInstrPostInstrSelection where we determine whether or not to +// set the "s" bit based on CPSR liveness. +// +// FIXME: Eliminate ADDS/SUBS pseudo opcodes after adding tablegen +// support for an optional CPSR definition that corresponds to the DAG +// node's second value. We can then eliminate the implicit def of CPSR. +let isAdd = 1 in +defm ADDS : AsI1_bin_s_irs; +defm SUBS : AsI1_bin_s_irs; + +let isAdd = 1 in +defm ADC : AI1_adde_sube_irs<0b0101, "adc", ARMadde, 1>; +defm SBC : AI1_adde_sube_irs<0b0110, "sbc", ARMsube>; + +defm RSB : AsI1_rbin_irs<0b0011, "rsb", + IIC_iALUi, IIC_iALUr, IIC_iALUsr, + sub>; + +// FIXME: Eliminate them if we can write def : Pat patterns which defines +// CPSR and the implicit def of CPSR is not needed. +defm RSBS : AsI1_rbin_s_is; + +defm RSC : AI1_rsc_irs<0b0111, "rsc", ARMsube>; + +// (sub X, imm) gets canonicalized to (add X, -imm). Match this form. +// The assume-no-carry-in form uses the negation of the input since add/sub +// assume opposite meanings of the carry flag (i.e., carry == !borrow). +// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory +// details. +def : ARMPat<(add GPR:$src, mod_imm_neg:$imm), + (SUBri GPR:$src, mod_imm_neg:$imm)>; +def : ARMPat<(ARMaddc GPR:$src, mod_imm_neg:$imm), + (SUBSri GPR:$src, mod_imm_neg:$imm)>; + +def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm), + (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>, + Requires<[IsARM, HasV6T2]>; +def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm), + (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>, + Requires<[IsARM, HasV6T2]>; + +// The with-carry-in form matches bitwise not instead of the negation. +// Effectively, the inverse interpretation of the carry flag already accounts +// for part of the negation. +def : ARMPat<(ARMadde GPR:$src, mod_imm_not:$imm, CPSR), + (SBCri GPR:$src, mod_imm_not:$imm)>; +def : ARMPat<(ARMadde GPR:$src, imm0_65535_neg:$imm, CPSR), + (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>, + Requires<[IsARM, HasV6T2]>; + +// Note: These are implemented in C++ code, because they have to generate +// ADD/SUBrs instructions, which use a complex pattern that a xform function +// cannot produce. +// (mul X, 2^n+1) -> (add (X << n), X) +// (mul X, 2^n-1) -> (rsb X, (X << n)) + +// ARM Arithmetic Instruction +// GPR:$dst = GPR:$a op GPR:$b +class AAI op27_20, bits<8> op11_4, string opc, + list pattern = [], + dag iops = (ins GPRnopc:$Rn, GPRnopc:$Rm), + string asm = "\t$Rd, $Rn, $Rm"> + : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern>, + Sched<[WriteALU, ReadALU, ReadALU]> { + bits<4> Rn; + bits<4> Rd; + bits<4> Rm; + let Inst{27-20} = op27_20; + let Inst{11-4} = op11_4; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{3-0} = Rm; + + let Unpredictable{11-8} = 0b1111; +} + +// Wrappers around the AAI class +class AAIRevOpr op27_20, bits<8> op11_4, string opc, + list pattern = []> + : AAI; + +class AAIIntrinsic op27_20, bits<8> op11_4, string opc, + Intrinsic intrinsic> + : AAI; + +// Saturating add/subtract +let hasSideEffects = 1 in { +def QADD8 : AAIIntrinsic<0b01100010, 0b11111001, "qadd8", int_arm_qadd8>; +def QADD16 : AAIIntrinsic<0b01100010, 0b11110001, "qadd16", int_arm_qadd16>; +def QSUB16 : AAIIntrinsic<0b01100010, 0b11110111, "qsub16", int_arm_qsub16>; +def QSUB8 : AAIIntrinsic<0b01100010, 0b11111111, "qsub8", int_arm_qsub8>; + +def QDADD : AAIRevOpr<0b00010100, 0b00000101, "qdadd", + [(set GPRnopc:$Rd, (int_arm_qadd (int_arm_qadd GPRnopc:$Rm, + GPRnopc:$Rm), + GPRnopc:$Rn))]>; +def QDSUB : AAIRevOpr<0b00010110, 0b00000101, "qdsub", + [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, + (int_arm_qadd GPRnopc:$Rn, GPRnopc:$Rn)))]>; +def QSUB : AAIRevOpr<0b00010010, 0b00000101, "qsub", + [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))]>; +let DecoderMethod = "DecodeQADDInstruction" in + def QADD : AAIRevOpr<0b00010000, 0b00000101, "qadd", + [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))]>; +} + +def UQADD16 : AAIIntrinsic<0b01100110, 0b11110001, "uqadd16", int_arm_uqadd16>; +def UQADD8 : AAIIntrinsic<0b01100110, 0b11111001, "uqadd8", int_arm_uqadd8>; +def UQSUB16 : AAIIntrinsic<0b01100110, 0b11110111, "uqsub16", int_arm_uqsub16>; +def UQSUB8 : AAIIntrinsic<0b01100110, 0b11111111, "uqsub8", int_arm_uqsub8>; +def QASX : AAIIntrinsic<0b01100010, 0b11110011, "qasx", int_arm_qasx>; +def QSAX : AAIIntrinsic<0b01100010, 0b11110101, "qsax", int_arm_qsax>; +def UQASX : AAIIntrinsic<0b01100110, 0b11110011, "uqasx", int_arm_uqasx>; +def UQSAX : AAIIntrinsic<0b01100110, 0b11110101, "uqsax", int_arm_uqsax>; + +// Signed/Unsigned add/subtract + +def SASX : AAIIntrinsic<0b01100001, 0b11110011, "sasx", int_arm_sasx>; +def SADD16 : AAIIntrinsic<0b01100001, 0b11110001, "sadd16", int_arm_sadd16>; +def SADD8 : AAIIntrinsic<0b01100001, 0b11111001, "sadd8", int_arm_sadd8>; +def SSAX : AAIIntrinsic<0b01100001, 0b11110101, "ssax", int_arm_ssax>; +def SSUB16 : AAIIntrinsic<0b01100001, 0b11110111, "ssub16", int_arm_ssub16>; +def SSUB8 : AAIIntrinsic<0b01100001, 0b11111111, "ssub8", int_arm_ssub8>; +def UASX : AAIIntrinsic<0b01100101, 0b11110011, "uasx", int_arm_uasx>; +def UADD16 : AAIIntrinsic<0b01100101, 0b11110001, "uadd16", int_arm_uadd16>; +def UADD8 : AAIIntrinsic<0b01100101, 0b11111001, "uadd8", int_arm_uadd8>; +def USAX : AAIIntrinsic<0b01100101, 0b11110101, "usax", int_arm_usax>; +def USUB16 : AAIIntrinsic<0b01100101, 0b11110111, "usub16", int_arm_usub16>; +def USUB8 : AAIIntrinsic<0b01100101, 0b11111111, "usub8", int_arm_usub8>; + +// Signed/Unsigned halving add/subtract + +def SHASX : AAIIntrinsic<0b01100011, 0b11110011, "shasx", int_arm_shasx>; +def SHADD16 : AAIIntrinsic<0b01100011, 0b11110001, "shadd16", int_arm_shadd16>; +def SHADD8 : AAIIntrinsic<0b01100011, 0b11111001, "shadd8", int_arm_shadd8>; +def SHSAX : AAIIntrinsic<0b01100011, 0b11110101, "shsax", int_arm_shsax>; +def SHSUB16 : AAIIntrinsic<0b01100011, 0b11110111, "shsub16", int_arm_shsub16>; +def SHSUB8 : AAIIntrinsic<0b01100011, 0b11111111, "shsub8", int_arm_shsub8>; +def UHASX : AAIIntrinsic<0b01100111, 0b11110011, "uhasx", int_arm_uhasx>; +def UHADD16 : AAIIntrinsic<0b01100111, 0b11110001, "uhadd16", int_arm_uhadd16>; +def UHADD8 : AAIIntrinsic<0b01100111, 0b11111001, "uhadd8", int_arm_uhadd8>; +def UHSAX : AAIIntrinsic<0b01100111, 0b11110101, "uhsax", int_arm_uhsax>; +def UHSUB16 : AAIIntrinsic<0b01100111, 0b11110111, "uhsub16", int_arm_uhsub16>; +def UHSUB8 : AAIIntrinsic<0b01100111, 0b11111111, "uhsub8", int_arm_uhsub8>; + +// Unsigned Sum of Absolute Differences [and Accumulate]. + +def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + MulFrm /* for convenience */, NoItinerary, "usad8", + "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (int_arm_usad8 GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{27-20} = 0b01111000; + let Inst{15-12} = 0b1111; + let Inst{7-4} = 0b0001; + let Inst{19-16} = Rd; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} +def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + MulFrm /* for convenience */, NoItinerary, "usada8", + "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (int_arm_usada8 GPR:$Rn, GPR:$Rm, GPR:$Ra))]>, + Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]>{ + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + bits<4> Ra; + let Inst{27-20} = 0b01111000; + let Inst{7-4} = 0b0001; + let Inst{19-16} = Rd; + let Inst{15-12} = Ra; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} + +// Signed/Unsigned saturate +def SSAT : AI<(outs GPRnopc:$Rd), + (ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh), + SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>, + Requires<[IsARM,HasV6]>{ + bits<4> Rd; + bits<5> sat_imm; + bits<4> Rn; + bits<8> sh; + let Inst{27-21} = 0b0110101; + let Inst{5-4} = 0b01; + let Inst{20-16} = sat_imm; + let Inst{15-12} = Rd; + let Inst{11-7} = sh{4-0}; + let Inst{6} = sh{5}; + let Inst{3-0} = Rn; +} + +def SSAT16 : AI<(outs GPRnopc:$Rd), + (ins imm1_16:$sat_imm, GPRnopc:$Rn), SatFrm, + NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", []>, + Requires<[IsARM,HasV6]>{ + bits<4> Rd; + bits<4> sat_imm; + bits<4> Rn; + let Inst{27-20} = 0b01101010; + let Inst{11-4} = 0b11110011; + let Inst{15-12} = Rd; + let Inst{19-16} = sat_imm; + let Inst{3-0} = Rn; +} + +def USAT : AI<(outs GPRnopc:$Rd), + (ins imm0_31:$sat_imm, GPRnopc:$Rn, shift_imm:$sh), + SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []>, + Requires<[IsARM,HasV6]> { + bits<4> Rd; + bits<5> sat_imm; + bits<4> Rn; + bits<8> sh; + let Inst{27-21} = 0b0110111; + let Inst{5-4} = 0b01; + let Inst{15-12} = Rd; + let Inst{11-7} = sh{4-0}; + let Inst{6} = sh{5}; + let Inst{20-16} = sat_imm; + let Inst{3-0} = Rn; +} + +def USAT16 : AI<(outs GPRnopc:$Rd), + (ins imm0_15:$sat_imm, GPRnopc:$Rn), SatFrm, + NoItinerary, "usat16", "\t$Rd, $sat_imm, $Rn", []>, + Requires<[IsARM,HasV6]>{ + bits<4> Rd; + bits<4> sat_imm; + bits<4> Rn; + let Inst{27-20} = 0b01101110; + let Inst{11-4} = 0b11110011; + let Inst{15-12} = Rd; + let Inst{19-16} = sat_imm; + let Inst{3-0} = Rn; +} + +def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm1_32:$pos), + (SSAT imm1_32:$pos, GPRnopc:$a, 0)>; +def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos), + (USAT imm0_31:$pos, GPRnopc:$a, 0)>; +def : ARMPat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), + (SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; +def : ARMPat<(ARMusatnoshift GPRnopc:$Rn, imm0_31:$imm), + (USAT imm0_31:$imm, GPRnopc:$Rn, 0)>; +def : ARMV6Pat<(int_arm_ssat16 GPRnopc:$a, imm1_16:$pos), + (SSAT16 imm1_16:$pos, GPRnopc:$a)>; +def : ARMV6Pat<(int_arm_usat16 GPRnopc:$a, imm0_15:$pos), + (USAT16 imm0_15:$pos, GPRnopc:$a)>; + +//===----------------------------------------------------------------------===// +// Bitwise Instructions. +// + +defm AND : AsI1_bin_irs<0b0000, "and", + IIC_iBITi, IIC_iBITr, IIC_iBITsr, and, 1>; +defm ORR : AsI1_bin_irs<0b1100, "orr", + IIC_iBITi, IIC_iBITr, IIC_iBITsr, or, 1>; +defm EOR : AsI1_bin_irs<0b0001, "eor", + IIC_iBITi, IIC_iBITr, IIC_iBITsr, xor, 1>; +defm BIC : AsI1_bin_irs<0b1110, "bic", + IIC_iBITi, IIC_iBITr, IIC_iBITsr, + BinOpFrag<(and node:$LHS, (not node:$RHS))>>; + +// FIXME: bf_inv_mask_imm should be two operands, the lsb and the msb, just +// like in the actual instruction encoding. The complexity of mapping the mask +// to the lsb/msb pair should be handled by ISel, not encapsulated in the +// instruction description. +def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm), + AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, + "bfc", "\t$Rd, $imm", "$src = $Rd", + [(set GPR:$Rd, (and GPR:$src, bf_inv_mask_imm:$imm))]>, + Requires<[IsARM, HasV6T2]> { + bits<4> Rd; + bits<10> imm; + let Inst{27-21} = 0b0111110; + let Inst{6-0} = 0b0011111; + let Inst{15-12} = Rd; + let Inst{11-7} = imm{4-0}; // lsb + let Inst{20-16} = imm{9-5}; // msb +} + +// A8.6.18 BFI - Bitfield insert (Encoding A1) +def BFI:I<(outs GPRnopc:$Rd), (ins GPRnopc:$src, GPR:$Rn, bf_inv_mask_imm:$imm), + AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, + "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd", + [(set GPRnopc:$Rd, (ARMbfi GPRnopc:$src, GPR:$Rn, + bf_inv_mask_imm:$imm))]>, + Requires<[IsARM, HasV6T2]> { + bits<4> Rd; + bits<4> Rn; + bits<10> imm; + let Inst{27-21} = 0b0111110; + let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15 + let Inst{15-12} = Rd; + let Inst{11-7} = imm{4-0}; // lsb + let Inst{20-16} = imm{9-5}; // width + let Inst{3-0} = Rn; +} + +def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr, + "mvn", "\t$Rd, $Rm", + [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP, Sched<[WriteALU]> { + bits<4> Rd; + bits<4> Rm; + let Inst{25} = 0; + let Inst{19-16} = 0b0000; + let Inst{11-4} = 0b00000000; + let Inst{15-12} = Rd; + let Inst{3-0} = Rm; + + let Unpredictable{19-16} = 0b1111; +} +def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift), + DPSoRegImmFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift", + [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP, + Sched<[WriteALU]> { + bits<4> Rd; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = 0b0000; + let Inst{15-12} = Rd; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; + + let Unpredictable{19-16} = 0b1111; +} +def MVNsr : AsI1<0b1111, (outs GPRnopc:$Rd), (ins so_reg_reg:$shift), + DPSoRegRegFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift", + [(set GPRnopc:$Rd, (not so_reg_reg:$shift))]>, UnaryDP, + Sched<[WriteALU]> { + bits<4> Rd; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = 0b0000; + let Inst{15-12} = Rd; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; + + let Unpredictable{19-16} = 0b1111; +} +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in +def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm, + IIC_iMVNi, "mvn", "\t$Rd, $imm", + [(set GPR:$Rd, mod_imm_not:$imm)]>,UnaryDP, Sched<[WriteALU]> { + bits<4> Rd; + bits<12> imm; + let Inst{25} = 1; + let Inst{19-16} = 0b0000; + let Inst{15-12} = Rd; + let Inst{11-0} = imm; +} + +let AddedComplexity = 1 in +def : ARMPat<(and GPR:$src, mod_imm_not:$imm), + (BICri GPR:$src, mod_imm_not:$imm)>; + +//===----------------------------------------------------------------------===// +// Multiply Instructions. +// +class AsMul1I32 opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AsMul1I { + bits<4> Rd; + bits<4> Rm; + bits<4> Rn; + let Inst{19-16} = Rd; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} +class AsMul1I64 opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AsMul1I { + bits<4> RdLo; + bits<4> RdHi; + bits<4> Rm; + bits<4> Rn; + let Inst{19-16} = RdHi; + let Inst{15-12} = RdLo; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} +class AsMla1I64 opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AsMul1I { + bits<4> RdLo; + bits<4> RdHi; + bits<4> Rm; + bits<4> Rn; + let Inst{19-16} = RdHi; + let Inst{15-12} = RdLo; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} + +// FIXME: The v5 pseudos are only necessary for the additional Constraint +// property. Remove them when it's possible to add those properties +// on an individual MachineInstr, not just an instruction description. +let isCommutable = 1, TwoOperandAliasConstraint = "$Rn = $Rd" in { +def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm), + IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", + [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>, + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { + let Inst{15-12} = 0b0000; + let Unpredictable{15-12} = 0b1111; +} + +let Constraints = "@earlyclobber $Rd" in +def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, + pred:$p, cc_out:$s), + 4, IIC_iMUL32, + [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))], + (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6, UseMulOps]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; +} + +def MLA : AsMul1I32<0b0000001, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra), + IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", + [(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))]>, + Requires<[IsARM, HasV6, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { + bits<4> Ra; + let Inst{15-12} = Ra; +} + +let Constraints = "@earlyclobber $Rd" in +def MLAv5: ARMPseudoExpand<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, + pred:$p, cc_out:$s), 4, IIC_iMAC32, + [(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))], + (MLA GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; + +def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>, + Requires<[IsARM, HasV6T2, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { + bits<4> Rd; + bits<4> Rm; + bits<4> Rn; + bits<4> Ra; + let Inst{19-16} = Rd; + let Inst{15-12} = Ra; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} + +// Extra precision multiplies with low / high results +let hasSideEffects = 0 in { +let isCommutable = 1 in { +def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, + "smull", "\t$RdLo, $RdHi, $Rn, $Rm", + [(set GPR:$RdLo, GPR:$RdHi, + (smullohi GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; + +def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, + "umull", "\t$RdLo, $RdHi, $Rn, $Rm", + [(set GPR:$RdLo, GPR:$RdHi, + (umullohi GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL]>; + +let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { +def SMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + 4, IIC_iMUL64, + [(set GPR:$RdLo, GPR:$RdHi, + (smullohi GPR:$Rn, GPR:$Rm))], + (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; + +def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + 4, IIC_iMUL64, + [(set GPR:$RdLo, GPR:$RdHi, + (umullohi GPR:$Rn, GPR:$Rm))], + (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; +} +} + +// Multiply + accumulate +def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, + "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; +def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, + "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; + +def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + IIC_iMAC64, + "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]> { + bits<4> RdLo; + bits<4> RdHi; + bits<4> Rm; + bits<4> Rn; + let Inst{19-16} = RdHi; + let Inst{15-12} = RdLo; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} + +let Constraints = + "@earlyclobber $RdLo,@earlyclobber $RdHi,$RLo = $RdLo,$RHi = $RdHi" in { +def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s), + 4, IIC_iMAC64, [], + (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, + pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; +def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s), + 4, IIC_iMAC64, [], + (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, + pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; +} + +} // hasSideEffects + +// Most significant word multiply +def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { + let Inst{15-12} = 0b1111; +} + +def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (ARMsmmlar GPR:$Rn, GPR:$Rm, (i32 0)))]>, + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { + let Inst{15-12} = 0b1111; +} + +def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, + Requires<[IsARM, HasV6, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; + +def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (ARMsmmlar GPR:$Rn, GPR:$Rm, GPR:$Ra))]>, + Requires<[IsARM, HasV6]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; + +def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>, + Requires<[IsARM, HasV6, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; + +def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (ARMsmmlsr GPR:$Rn, GPR:$Rm, GPR:$Ra))]>, + Requires<[IsARM, HasV6]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; + +multiclass AI_smul { + def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), + (sext_inreg GPR:$Rm, i16)))]>, + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; + + def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), + (sra GPR:$Rm, (i32 16))))]>, + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; + + def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), + (sext_inreg GPR:$Rm, i16)))]>, + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; + + def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), + (sra GPR:$Rm, (i32 16))))]>, + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; + + def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (ARMsmulwb GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; + + def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (ARMsmulwt GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; +} + + +multiclass AI_smla { + let DecoderMethod = "DecodeSMLAInstruction" in { + def BB : AMulxyIa<0b0001000, 0b00, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra", + [(set GPRnopc:$Rd, (add GPR:$Ra, + (mul (sext_inreg GPRnopc:$Rn, i16), + (sext_inreg GPRnopc:$Rm, i16))))]>, + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; + + def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra", + [(set GPRnopc:$Rd, + (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16), + (sra GPRnopc:$Rm, (i32 16)))))]>, + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; + + def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra", + [(set GPRnopc:$Rd, + (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), + (sext_inreg GPRnopc:$Rm, i16))))]>, + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; + + def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra", + [(set GPRnopc:$Rd, + (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), + (sra GPRnopc:$Rm, (i32 16)))))]>, + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; + + def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", + [(set GPRnopc:$Rd, + (add GPR:$Ra, (ARMsmulwb GPRnopc:$Rn, GPRnopc:$Rm)))]>, + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; + + def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", + [(set GPRnopc:$Rd, + (add GPR:$Ra, (ARMsmulwt GPRnopc:$Rn, GPRnopc:$Rm)))]>, + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; + } +} + +defm SMUL : AI_smul<"smul">; +defm SMLA : AI_smla<"smla">; + +// Halfword multiply accumulate long: SMLAL. +class SMLAL opc1, string asm> + : AMulxyI64<0b0001010, opc1, + (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + IIC_iMAC64, asm, "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; + +def SMLALBB : SMLAL<0b00, "smlalbb">; +def SMLALBT : SMLAL<0b10, "smlalbt">; +def SMLALTB : SMLAL<0b01, "smlaltb">; +def SMLALTT : SMLAL<0b11, "smlaltt">; + +def : ARMV5TEPat<(ARMsmlalbb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (SMLALBB $Rn, $Rm, $RLo, $RHi)>; +def : ARMV5TEPat<(ARMsmlalbt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (SMLALBT $Rn, $Rm, $RLo, $RHi)>; +def : ARMV5TEPat<(ARMsmlaltb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (SMLALTB $Rn, $Rm, $RLo, $RHi)>; +def : ARMV5TEPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (SMLALTT $Rn, $Rm, $RLo, $RHi)>; + +// Helper class for AI_smld. +class AMulDualIbase + : AI, + Requires<[IsARM, HasV6]> { + bits<4> Rn; + bits<4> Rm; + let Inst{27-23} = 0b01110; + let Inst{22} = long; + let Inst{21-20} = 0b00; + let Inst{11-8} = Rm; + let Inst{7} = 0; + let Inst{6} = sub; + let Inst{5} = swap; + let Inst{4} = 1; + let Inst{3-0} = Rn; +} +class AMulDualI + : AMulDualIbase { + bits<4> Rd; + let Inst{15-12} = 0b1111; + let Inst{19-16} = Rd; +} +class AMulDualIa + : AMulDualIbase { + bits<4> Ra; + bits<4> Rd; + let Inst{19-16} = Rd; + let Inst{15-12} = Ra; +} +class AMulDualI64 + : AMulDualIbase { + bits<4> RdLo; + bits<4> RdHi; + let Inst{19-16} = RdHi; + let Inst{15-12} = RdLo; +} + +multiclass AI_smld { + + def D : AMulDualIa<0, sub, 0, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; + + def DX: AMulDualIa<0, sub, 1, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; + + def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + NoItinerary, + !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; + + def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + NoItinerary, + !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; +} + +defm SMLA : AI_smld<0, "smla">; +defm SMLS : AI_smld<1, "smls">; + +def : ARMV6Pat<(int_arm_smlad GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLAD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : ARMV6Pat<(int_arm_smladx GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLADX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : ARMV6Pat<(int_arm_smlsd GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLSD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : ARMV6Pat<(int_arm_smlsdx GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLSDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : ARMV6Pat<(ARMSmlald GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLALD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; +def : ARMV6Pat<(ARMSmlaldx GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLALDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; +def : ARMV6Pat<(ARMSmlsld GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLSLD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; +def : ARMV6Pat<(ARMSmlsldx GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLSLDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; + +multiclass AI_sdml { + + def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), + NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; + def DX:AMulDualI<0, sub, 1, (outs GPRnopc:$Rd),(ins GPRnopc:$Rn, GPRnopc:$Rm), + NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; +} + +defm SMUA : AI_sdml<0, "smua">; +defm SMUS : AI_sdml<1, "smus">; + +def : ARMV6Pat<(int_arm_smuad GPRnopc:$Rn, GPRnopc:$Rm), + (SMUAD GPRnopc:$Rn, GPRnopc:$Rm)>; +def : ARMV6Pat<(int_arm_smuadx GPRnopc:$Rn, GPRnopc:$Rm), + (SMUADX GPRnopc:$Rn, GPRnopc:$Rm)>; +def : ARMV6Pat<(int_arm_smusd GPRnopc:$Rn, GPRnopc:$Rm), + (SMUSD GPRnopc:$Rn, GPRnopc:$Rm)>; +def : ARMV6Pat<(int_arm_smusdx GPRnopc:$Rn, GPRnopc:$Rm), + (SMUSDX GPRnopc:$Rn, GPRnopc:$Rm)>; + +//===----------------------------------------------------------------------===// +// Division Instructions (ARMv7-A with virtualization extension) +// +def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, + "sdiv", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasDivideInARM]>, + Sched<[WriteDIV]>; + +def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, + "udiv", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasDivideInARM]>, + Sched<[WriteDIV]>; + +//===----------------------------------------------------------------------===// +// Misc. Arithmetic Instructions. +// + +def CLZ : AMiscA1I<0b00010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm), + IIC_iUNAr, "clz", "\t$Rd, $Rm", + [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>, + Sched<[WriteALU]>; + +def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm), + IIC_iUNAr, "rbit", "\t$Rd, $Rm", + [(set GPR:$Rd, (bitreverse GPR:$Rm))]>, + Requires<[IsARM, HasV6T2]>, + Sched<[WriteALU]>; + +def REV : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm), + IIC_iUNAr, "rev", "\t$Rd, $Rm", + [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>, + Sched<[WriteALU]>; + +let AddedComplexity = 5 in +def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), + IIC_iUNAr, "rev16", "\t$Rd, $Rm", + [(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>, + Requires<[IsARM, HasV6]>, + Sched<[WriteALU]>; + +def : ARMV6Pat<(srl (bswap (extloadi16 addrmode3:$addr)), (i32 16)), + (REV16 (LDRH addrmode3:$addr))>; +def : ARMV6Pat<(truncstorei16 (srl (bswap GPR:$Rn), (i32 16)), addrmode3:$addr), + (STRH (REV16 GPR:$Rn), addrmode3:$addr)>; + +let AddedComplexity = 5 in +def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), + IIC_iUNAr, "revsh", "\t$Rd, $Rm", + [(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>, + Requires<[IsARM, HasV6]>, + Sched<[WriteALU]>; + +def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)), + (and (srl GPR:$Rm, (i32 8)), 0xFF)), + (REVSH GPR:$Rm)>; + +def PKHBT : APKHI<0b01101000, 0, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_lsl_amt:$sh), + IIC_iALUsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh", + [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF), + (and (shl GPRnopc:$Rm, pkh_lsl_amt:$sh), + 0xFFFF0000)))]>, + Requires<[IsARM, HasV6]>, + Sched<[WriteALUsi, ReadALU]>; + +// Alternate cases for PKHBT where identities eliminate some nodes. +def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (and GPRnopc:$Rm, 0xFFFF0000)), + (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, 0)>; +def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (shl GPRnopc:$Rm, imm16_31:$sh)), + (PKHBT GPRnopc:$Rn, GPRnopc:$Rm, imm16_31:$sh)>; + +// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and +// will match the pattern below. +def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, pkh_asr_amt:$sh), + IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh", + [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF0000), + (and (sra GPRnopc:$Rm, pkh_asr_amt:$sh), + 0xFFFF)))]>, + Requires<[IsARM, HasV6]>, + Sched<[WriteALUsi, ReadALU]>; + +// Alternate cases for PKHTB where identities eliminate some nodes. Note that +// a shift amount of 0 is *not legal* here, it is PKHBT instead. +// We also can not replace a srl (17..31) by an arithmetic shift we would use in +// pkhtb src1, src2, asr (17..31). +def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), + (srl GPRnopc:$src2, imm16:$sh)), + (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16:$sh)>; +def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), + (sra GPRnopc:$src2, imm16_31:$sh)), + (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16_31:$sh)>; +def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), + (and (srl GPRnopc:$src2, imm1_15:$sh), 0xFFFF)), + (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm1_15:$sh)>; + +//===----------------------------------------------------------------------===// +// CRC Instructions +// +// Polynomials: +// + CRC32{B,H,W} 0x04C11DB7 +// + CRC32C{B,H,W} 0x1EDC6F41 +// + +class AI_crc32 sz, string suffix, SDPatternOperator builtin> + : AInoP<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), MiscFrm, NoItinerary, + !strconcat("crc32", suffix), "\t$Rd, $Rn, $Rm", + [(set GPRnopc:$Rd, (builtin GPRnopc:$Rn, GPRnopc:$Rm))]>, + Requires<[IsARM, HasV8, HasCRC]> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + + let Inst{31-28} = 0b1110; + let Inst{27-23} = 0b00010; + let Inst{22-21} = sz; + let Inst{20} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-10} = 0b00; + let Inst{9} = C; + let Inst{8} = 0; + let Inst{7-4} = 0b0100; + let Inst{3-0} = Rm; + + let Unpredictable{11-8} = 0b1101; +} + +def CRC32B : AI_crc32<0, 0b00, "b", int_arm_crc32b>; +def CRC32CB : AI_crc32<1, 0b00, "cb", int_arm_crc32cb>; +def CRC32H : AI_crc32<0, 0b01, "h", int_arm_crc32h>; +def CRC32CH : AI_crc32<1, 0b01, "ch", int_arm_crc32ch>; +def CRC32W : AI_crc32<0, 0b10, "w", int_arm_crc32w>; +def CRC32CW : AI_crc32<1, 0b10, "cw", int_arm_crc32cw>; + +//===----------------------------------------------------------------------===// +// ARMv8.1a Privilege Access Never extension +// +// SETPAN #imm1 + +def SETPAN : AInoP<(outs), (ins imm0_1:$imm), MiscFrm, NoItinerary, "setpan", + "\t$imm", []>, Requires<[IsARM, HasV8, HasV8_1a]> { + bits<1> imm; + + let Inst{31-28} = 0b1111; + let Inst{27-20} = 0b00010001; + let Inst{19-16} = 0b0000; + let Inst{15-10} = 0b000000; + let Inst{9} = imm; + let Inst{8} = 0b0; + let Inst{7-4} = 0b0000; + let Inst{3-0} = 0b0000; + + let Unpredictable{19-16} = 0b1111; + let Unpredictable{15-10} = 0b111111; + let Unpredictable{8} = 0b1; + let Unpredictable{3-0} = 0b1111; +} + +//===----------------------------------------------------------------------===// +// Comparison Instructions... +// + +defm CMP : AI1_cmp_irs<0b1010, "cmp", + IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr, ARMcmp>; + +// ARMcmpZ can re-use the above instruction definitions. +def : ARMPat<(ARMcmpZ GPR:$src, mod_imm:$imm), + (CMPri GPR:$src, mod_imm:$imm)>; +def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs), + (CMPrr GPR:$src, GPR:$rhs)>; +def : ARMPat<(ARMcmpZ GPR:$src, so_reg_imm:$rhs), + (CMPrsi GPR:$src, so_reg_imm:$rhs)>; +def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs), + (CMPrsr GPR:$src, so_reg_reg:$rhs)>; + +// CMN register-integer +let isCompare = 1, Defs = [CPSR] in { +def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iCMPi, + "cmn", "\t$Rn, $imm", + [(ARMcmn GPR:$Rn, mod_imm:$imm)]>, + Sched<[WriteCMP, ReadALU]> { + bits<4> Rn; + bits<12> imm; + let Inst{25} = 1; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b0000; + let Inst{11-0} = imm; + + let Unpredictable{15-12} = 0b1111; +} + +// CMN register-register/shift +def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr, + "cmn", "\t$Rn, $Rm", + [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> + GPR:$Rn, GPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> { + bits<4> Rn; + bits<4> Rm; + let isCommutable = 1; + let Inst{25} = 0; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b0000; + let Inst{11-4} = 0b00000000; + let Inst{3-0} = Rm; + + let Unpredictable{15-12} = 0b1111; +} + +def CMNzrsi : AI1<0b1011, (outs), + (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, IIC_iCMPsr, + "cmn", "\t$Rn, $shift", + [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> + GPR:$Rn, so_reg_imm:$shift)]>, + Sched<[WriteCMPsi, ReadALU]> { + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b0000; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; + + let Unpredictable{15-12} = 0b1111; +} + +def CMNzrsr : AI1<0b1011, (outs), + (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iCMPsr, + "cmn", "\t$Rn, $shift", + [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> + GPRnopc:$Rn, so_reg_reg:$shift)]>, + Sched<[WriteCMPsr, ReadALU]> { + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b0000; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; + + let Unpredictable{15-12} = 0b1111; +} + +} + +def : ARMPat<(ARMcmp GPR:$src, mod_imm_neg:$imm), + (CMNri GPR:$src, mod_imm_neg:$imm)>; + +def : ARMPat<(ARMcmpZ GPR:$src, mod_imm_neg:$imm), + (CMNri GPR:$src, mod_imm_neg:$imm)>; + +// Note that TST/TEQ don't set all the same flags that CMP does! +defm TST : AI1_cmp_irs<0b1000, "tst", + IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr, + BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1, + "DecodeTSTInstruction">; +defm TEQ : AI1_cmp_irs<0b1001, "teq", + IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr, + BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>; + +// Pseudo i64 compares for some floating point compares. +let usesCustomInserter = 1, isBranch = 1, isTerminator = 1, + Defs = [CPSR] in { +def BCCi64 : PseudoInst<(outs), + (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst), + IIC_Br, + [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>, + Sched<[WriteBr]>; + +def BCCZi64 : PseudoInst<(outs), + (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br, + [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>, + Sched<[WriteBr]>; +} // usesCustomInserter + + +// Conditional moves +let hasSideEffects = 0 in { + +let isCommutable = 1, isSelect = 1 in +def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, GPR:$Rm, cmovpred:$p), + 4, IIC_iCMOVr, + [(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; + +def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, so_reg_imm:$shift, cmovpred:$p), + 4, IIC_iCMOVsr, + [(set GPR:$Rd, + (ARMcmov GPR:$false, so_reg_imm:$shift, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; +def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, so_reg_reg:$shift, cmovpred:$p), + 4, IIC_iCMOVsr, + [(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; + + +let isMoveImm = 1 in +def MOVCCi16 + : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, imm0_65535_expr:$imm, cmovpred:$p), + 4, IIC_iMOVi, + [(set GPR:$Rd, (ARMcmov GPR:$false, imm0_65535:$imm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>, + Sched<[WriteALU]>; + +let isMoveImm = 1 in +def MOVCCi : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, mod_imm:$imm, cmovpred:$p), + 4, IIC_iCMOVi, + [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm:$imm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; + +// Two instruction predicate mov immediate. +let isMoveImm = 1 in +def MOVCCi32imm + : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, i32imm:$src, cmovpred:$p), + 8, IIC_iCMOVix2, + [(set GPR:$Rd, (ARMcmov GPR:$false, imm:$src, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>; + +let isMoveImm = 1 in +def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, mod_imm:$imm, cmovpred:$p), + 4, IIC_iCMOVi, + [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm_not:$imm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; + +} // hasSideEffects + + +//===----------------------------------------------------------------------===// +// Atomic operations intrinsics +// + +def MemBarrierOptOperand : AsmOperandClass { + let Name = "MemBarrierOpt"; + let ParserMethod = "parseMemBarrierOptOperand"; +} +def memb_opt : Operand { + let PrintMethod = "printMemBOption"; + let ParserMatchClass = MemBarrierOptOperand; + let DecoderMethod = "DecodeMemBarrierOption"; +} + +def InstSyncBarrierOptOperand : AsmOperandClass { + let Name = "InstSyncBarrierOpt"; + let ParserMethod = "parseInstSyncBarrierOptOperand"; +} +def instsyncb_opt : Operand { + let PrintMethod = "printInstSyncBOption"; + let ParserMatchClass = InstSyncBarrierOptOperand; + let DecoderMethod = "DecodeInstSyncBarrierOption"; +} + +def TraceSyncBarrierOptOperand : AsmOperandClass { + let Name = "TraceSyncBarrierOpt"; + let ParserMethod = "parseTraceSyncBarrierOptOperand"; +} +def tsb_opt : Operand { + let PrintMethod = "printTraceSyncBOption"; + let ParserMatchClass = TraceSyncBarrierOptOperand; +} + +// Memory barriers protect the atomic sequences +let hasSideEffects = 1 in { +def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, + "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>, + Requires<[IsARM, HasDB]> { + bits<4> opt; + let Inst{31-4} = 0xf57ff05; + let Inst{3-0} = opt; +} + +def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, + "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>, + Requires<[IsARM, HasDB]> { + bits<4> opt; + let Inst{31-4} = 0xf57ff04; + let Inst{3-0} = opt; +} + +// ISB has only full system option +def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary, + "isb", "\t$opt", [(int_arm_isb (i32 imm0_15:$opt))]>, + Requires<[IsARM, HasDB]> { + bits<4> opt; + let Inst{31-4} = 0xf57ff06; + let Inst{3-0} = opt; +} + +let hasNoSchedulingInfo = 1 in +def TSB : AInoP<(outs), (ins tsb_opt:$opt), MiscFrm, NoItinerary, + "tsb", "\t$opt", []>, Requires<[IsARM, HasV8_4a]> { + let Inst{31-0} = 0xe320f012; +} + +} + +let usesCustomInserter = 1, Defs = [CPSR] in { + +// Pseudo instruction that combines movs + predicated rsbmi +// to implement integer ABS + def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; +} + +let usesCustomInserter = 1 in { + def COPY_STRUCT_BYVAL_I32 : PseudoInst< + (outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment), + NoItinerary, + [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>; +} + +let hasPostISelHook = 1, Constraints = "$newdst = $dst, $newsrc = $src" in { + // %newsrc, %newdst = MEMCPY %dst, %src, N, ...N scratch regs... + // Copies N registers worth of memory from address %src to address %dst + // and returns the incremented addresses. N scratch register will + // be attached for the copy to use. + def MEMCPY : PseudoInst< + (outs GPR:$newdst, GPR:$newsrc), + (ins GPR:$dst, GPR:$src, i32imm:$nreg, variable_ops), + NoItinerary, + [(set GPR:$newdst, GPR:$newsrc, + (ARMmemcopy GPR:$dst, GPR:$src, imm:$nreg))]>; +} + +def ldrex_1 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def ldrex_2 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def ldrex_4 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +def strex_1 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_strex node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def strex_2 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_strex node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def strex_4 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_strex node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +def ldaex_1 : PatFrag<(ops node:$ptr), (int_arm_ldaex node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def ldaex_2 : PatFrag<(ops node:$ptr), (int_arm_ldaex node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def ldaex_4 : PatFrag<(ops node:$ptr), (int_arm_ldaex node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +def stlex_1 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_stlex node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def stlex_2 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_stlex node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def stlex_4 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_stlex node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +let mayLoad = 1 in { +def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldrexb", "\t$Rt, $addr", + [(set GPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>; +def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldrexh", "\t$Rt, $addr", + [(set GPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>; +def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldrex", "\t$Rt, $addr", + [(set GPR:$Rt, (ldrex_4 addr_offset_none:$addr))]>; +let hasExtraDefRegAllocReq = 1 in +def LDREXD : AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), + NoItinerary, "ldrexd", "\t$Rt, $addr", []> { + let DecoderMethod = "DecodeDoubleRegLoad"; +} + +def LDAEXB : AIldaex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldaexb", "\t$Rt, $addr", + [(set GPR:$Rt, (ldaex_1 addr_offset_none:$addr))]>; +def LDAEXH : AIldaex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldaexh", "\t$Rt, $addr", + [(set GPR:$Rt, (ldaex_2 addr_offset_none:$addr))]>; +def LDAEX : AIldaex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldaex", "\t$Rt, $addr", + [(set GPR:$Rt, (ldaex_4 addr_offset_none:$addr))]>; +let hasExtraDefRegAllocReq = 1 in +def LDAEXD : AIldaex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), + NoItinerary, "ldaexd", "\t$Rt, $addr", []> { + let DecoderMethod = "DecodeDoubleRegLoad"; +} +} + +let mayStore = 1, Constraints = "@earlyclobber $Rd" in { +def STREXB: AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "strexb", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, (strex_1 GPR:$Rt, + addr_offset_none:$addr))]>; +def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "strexh", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, (strex_2 GPR:$Rt, + addr_offset_none:$addr))]>; +def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "strex", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, (strex_4 GPR:$Rt, + addr_offset_none:$addr))]>; +let hasExtraSrcRegAllocReq = 1 in +def STREXD : AIstrex<0b01, (outs GPR:$Rd), + (ins GPRPairOp:$Rt, addr_offset_none:$addr), + NoItinerary, "strexd", "\t$Rd, $Rt, $addr", []> { + let DecoderMethod = "DecodeDoubleRegStore"; +} +def STLEXB: AIstlex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlexb", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, + (stlex_1 GPR:$Rt, addr_offset_none:$addr))]>; +def STLEXH: AIstlex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlexh", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, + (stlex_2 GPR:$Rt, addr_offset_none:$addr))]>; +def STLEX : AIstlex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlex", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, + (stlex_4 GPR:$Rt, addr_offset_none:$addr))]>; +let hasExtraSrcRegAllocReq = 1 in +def STLEXD : AIstlex<0b01, (outs GPR:$Rd), + (ins GPRPairOp:$Rt, addr_offset_none:$addr), + NoItinerary, "stlexd", "\t$Rd, $Rt, $addr", []> { + let DecoderMethod = "DecodeDoubleRegStore"; +} +} + +def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", + [(int_arm_clrex)]>, + Requires<[IsARM, HasV6K]> { + let Inst{31-0} = 0b11110101011111111111000000011111; +} + +def : ARMPat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), + (STREXB GPR:$Rt, addr_offset_none:$addr)>; +def : ARMPat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), + (STREXH GPR:$Rt, addr_offset_none:$addr)>; + +def : ARMPat<(stlex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), + (STLEXB GPR:$Rt, addr_offset_none:$addr)>; +def : ARMPat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), + (STLEXH GPR:$Rt, addr_offset_none:$addr)>; + +class acquiring_load + : PatFrag<(ops node:$ptr), (base node:$ptr), [{ + AtomicOrdering Ordering = cast(N)->getOrdering(); + return isAcquireOrStronger(Ordering); +}]>; + +def atomic_load_acquire_8 : acquiring_load; +def atomic_load_acquire_16 : acquiring_load; +def atomic_load_acquire_32 : acquiring_load; + +class releasing_store + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getOrdering(); + return isReleaseOrStronger(Ordering); +}]>; + +def atomic_store_release_8 : releasing_store; +def atomic_store_release_16 : releasing_store; +def atomic_store_release_32 : releasing_store; + +let AddedComplexity = 8 in { + def : ARMPat<(atomic_load_acquire_8 addr_offset_none:$addr), (LDAB addr_offset_none:$addr)>; + def : ARMPat<(atomic_load_acquire_16 addr_offset_none:$addr), (LDAH addr_offset_none:$addr)>; + def : ARMPat<(atomic_load_acquire_32 addr_offset_none:$addr), (LDA addr_offset_none:$addr)>; + def : ARMPat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (STLB GPR:$val, addr_offset_none:$addr)>; + def : ARMPat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (STLH GPR:$val, addr_offset_none:$addr)>; + def : ARMPat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (STL GPR:$val, addr_offset_none:$addr)>; +} + +// SWP/SWPB are deprecated in V6/V7 and optional in v7VE. +// FIXME Use InstAlias to generate LDREX/STREX pairs instead. +let mayLoad = 1, mayStore = 1 in { +def SWP : AIswp<0, (outs GPRnopc:$Rt), + (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>, + Requires<[IsARM,PreV8]>; +def SWPB: AIswp<1, (outs GPRnopc:$Rt), + (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>, + Requires<[IsARM,PreV8]>; +} + +//===----------------------------------------------------------------------===// +// Coprocessor Instructions. +// + +def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, + c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), + NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", + [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, + imm:$CRm, imm:$opc2)]>, + Requires<[IsARM,PreV8]> { + bits<4> opc1; + bits<4> CRn; + bits<4> CRd; + bits<4> cop; + bits<3> opc2; + bits<4> CRm; + + let Inst{3-0} = CRm; + let Inst{4} = 0; + let Inst{7-5} = opc2; + let Inst{11-8} = cop; + let Inst{15-12} = CRd; + let Inst{19-16} = CRn; + let Inst{23-20} = opc1; + + let DecoderNamespace = "CoProc"; +} + +def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, + c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), + NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", + [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, + imm:$CRm, imm:$opc2)]>, + Requires<[IsARM,PreV8]> { + let Inst{31-28} = 0b1111; + bits<4> opc1; + bits<4> CRn; + bits<4> CRd; + bits<4> cop; + bits<3> opc2; + bits<4> CRm; + + let Inst{3-0} = CRm; + let Inst{4} = 0; + let Inst{7-5} = opc2; + let Inst{11-8} = cop; + let Inst{15-12} = CRd; + let Inst{19-16} = CRn; + let Inst{23-20} = opc1; + + let DecoderNamespace = "CoProc"; +} + +class ACI pattern, IndexMode im = IndexModeNone> + : I { + let Inst{27-25} = 0b110; +} +class ACInoP pattern, IndexMode im = IndexModeNone> + : InoP { + let Inst{31-28} = 0b1111; + let Inst{27-25} = 0b110; +} + +let DecoderNamespace = "CoProc" in { +multiclass LdStCop pattern> { + def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + asm, "\t$cop, $CRd, $addr", pattern> { + bits<13> addr; + bits<4> cop; + bits<4> CRd; + let Inst{24} = 1; // P = 1 + let Inst{23} = addr{8}; + let Inst{22} = Dbit; + let Inst{21} = 0; // W = 0 + let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = addr{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; + } + def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr), + asm, "\t$cop, $CRd, $addr!", [], IndexModePre> { + bits<13> addr; + bits<4> cop; + bits<4> CRd; + let Inst{24} = 1; // P = 1 + let Inst{23} = addr{8}; + let Inst{22} = Dbit; + let Inst{21} = 1; // W = 1 + let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = addr{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; + } + def _POST: ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, + postidx_imm8s4:$offset), + asm, "\t$cop, $CRd, $addr, $offset", [], IndexModePost> { + bits<9> offset; + bits<4> addr; + bits<4> cop; + bits<4> CRd; + let Inst{24} = 0; // P = 0 + let Inst{23} = offset{8}; + let Inst{22} = Dbit; + let Inst{21} = 1; // W = 1 + let Inst{20} = load; + let Inst{19-16} = addr; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = offset{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; + } + def _OPTION : ACI<(outs), + (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, + coproc_option_imm:$option), + asm, "\t$cop, $CRd, $addr, $option", []> { + bits<8> option; + bits<4> addr; + bits<4> cop; + bits<4> CRd; + let Inst{24} = 0; // P = 0 + let Inst{23} = 1; // U = 1 + let Inst{22} = Dbit; + let Inst{21} = 0; // W = 0 + let Inst{20} = load; + let Inst{19-16} = addr; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = option; + let DecoderMethod = "DecodeCopMemInstruction"; + } +} +multiclass LdSt2Cop pattern> { + def _OFFSET : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + asm, "\t$cop, $CRd, $addr", pattern> { + bits<13> addr; + bits<4> cop; + bits<4> CRd; + let Inst{24} = 1; // P = 1 + let Inst{23} = addr{8}; + let Inst{22} = Dbit; + let Inst{21} = 0; // W = 0 + let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = addr{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; + } + def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr), + asm, "\t$cop, $CRd, $addr!", [], IndexModePre> { + bits<13> addr; + bits<4> cop; + bits<4> CRd; + let Inst{24} = 1; // P = 1 + let Inst{23} = addr{8}; + let Inst{22} = Dbit; + let Inst{21} = 1; // W = 1 + let Inst{20} = load; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = addr{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; + } + def _POST: ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, + postidx_imm8s4:$offset), + asm, "\t$cop, $CRd, $addr, $offset", [], IndexModePost> { + bits<9> offset; + bits<4> addr; + bits<4> cop; + bits<4> CRd; + let Inst{24} = 0; // P = 0 + let Inst{23} = offset{8}; + let Inst{22} = Dbit; + let Inst{21} = 1; // W = 1 + let Inst{20} = load; + let Inst{19-16} = addr; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = offset{7-0}; + let DecoderMethod = "DecodeCopMemInstruction"; + } + def _OPTION : ACInoP<(outs), + (ins p_imm:$cop, c_imm:$CRd, addr_offset_none:$addr, + coproc_option_imm:$option), + asm, "\t$cop, $CRd, $addr, $option", []> { + bits<8> option; + bits<4> addr; + bits<4> cop; + bits<4> CRd; + let Inst{24} = 0; // P = 0 + let Inst{23} = 1; // U = 1 + let Inst{22} = Dbit; + let Inst{21} = 0; // W = 0 + let Inst{20} = load; + let Inst{19-16} = addr; + let Inst{15-12} = CRd; + let Inst{11-8} = cop; + let Inst{7-0} = option; + let DecoderMethod = "DecodeCopMemInstruction"; + } +} + +defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>; +defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; +defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; + +defm STC : LdStCop <0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>; +defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; +defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; + +} // DecoderNamespace = "CoProc" + +//===----------------------------------------------------------------------===// +// Move between coprocessor and ARM core register. +// + +class MovRCopro pattern> + : ABI<0b1110, oops, iops, NoItinerary, opc, + "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2", pattern> { + let Inst{20} = direction; + let Inst{4} = 1; + + bits<4> Rt; + bits<4> cop; + bits<3> opc1; + bits<3> opc2; + bits<4> CRm; + bits<4> CRn; + + let Inst{15-12} = Rt; + let Inst{11-8} = cop; + let Inst{23-21} = opc1; + let Inst{7-5} = opc2; + let Inst{3-0} = CRm; + let Inst{19-16} = CRn; + + let DecoderNamespace = "CoProc"; +} + +def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, + (outs), + (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, imm0_7:$opc2), + [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, + imm:$CRm, imm:$opc2)]>, + ComplexDeprecationPredicate<"MCR">; +def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", + (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, 0, pred:$p)>; +def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, + (outs GPRwithAPSR:$Rt), + (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, + imm0_7:$opc2), []>; +def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm", + (MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0, pred:$p)>; + +def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), + (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; + +class MovRCopro2 pattern> + : ABXI<0b1110, oops, iops, NoItinerary, + !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), pattern> { + let Inst{31-24} = 0b11111110; + let Inst{20} = direction; + let Inst{4} = 1; + + bits<4> Rt; + bits<4> cop; + bits<3> opc1; + bits<3> opc2; + bits<4> CRm; + bits<4> CRn; + + let Inst{15-12} = Rt; + let Inst{11-8} = cop; + let Inst{23-21} = opc1; + let Inst{7-5} = opc2; + let Inst{3-0} = CRm; + let Inst{19-16} = CRn; + + let DecoderNamespace = "CoProc"; +} + +def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, + (outs), + (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, imm0_7:$opc2), + [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, + imm:$CRm, imm:$opc2)]>, + Requires<[IsARM,PreV8]>; +def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm", + (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, 0)>; +def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, + (outs GPRwithAPSR:$Rt), + (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, + imm0_7:$opc2), []>, + Requires<[IsARM,PreV8]>; +def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm", + (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0)>; + +def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, + imm:$CRm, imm:$opc2), + (MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; + +class MovRRCopro + pattern = []> + : ABI<0b1100, oops, iops, NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", + pattern> { + + let Inst{23-21} = 0b010; + let Inst{20} = direction; + + bits<4> Rt; + bits<4> Rt2; + bits<4> cop; + bits<4> opc1; + bits<4> CRm; + + let Inst{15-12} = Rt; + let Inst{19-16} = Rt2; + let Inst{11-8} = cop; + let Inst{7-4} = opc1; + let Inst{3-0} = CRm; +} + +def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */, + (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt, + GPRnopc:$Rt2, c_imm:$CRm), + [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt, + GPRnopc:$Rt2, imm:$CRm)]>; +def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */, + (outs GPRnopc:$Rt, GPRnopc:$Rt2), + (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>; + +class MovRRCopro2 pattern = []> + : ABXI<0b1100, oops, iops, NoItinerary, + !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern>, + Requires<[IsARM,PreV8]> { + let Inst{31-28} = 0b1111; + let Inst{23-21} = 0b010; + let Inst{20} = direction; + + bits<4> Rt; + bits<4> Rt2; + bits<4> cop; + bits<4> opc1; + bits<4> CRm; + + let Inst{15-12} = Rt; + let Inst{19-16} = Rt2; + let Inst{11-8} = cop; + let Inst{7-4} = opc1; + let Inst{3-0} = CRm; + + let DecoderMethod = "DecoderForMRRC2AndMCRR2"; +} + +def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */, + (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt, + GPRnopc:$Rt2, c_imm:$CRm), + [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt, + GPRnopc:$Rt2, imm:$CRm)]>; + +def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */, + (outs GPRnopc:$Rt, GPRnopc:$Rt2), + (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>; + +//===----------------------------------------------------------------------===// +// Move between special register and ARM core register +// + +// Move to ARM core register from Special Register +def MRS : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary, + "mrs", "\t$Rd, apsr", []> { + bits<4> Rd; + let Inst{23-16} = 0b00001111; + let Unpredictable{19-17} = 0b111; + + let Inst{15-12} = Rd; + + let Inst{11-0} = 0b000000000000; + let Unpredictable{11-0} = 0b110100001111; +} + +def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPRnopc:$Rd, pred:$p), 0>, + Requires<[IsARM]>; + +// The MRSsys instruction is the MRS instruction from the ARM ARM, +// section B9.3.9, with the R bit set to 1. +def MRSsys : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary, + "mrs", "\t$Rd, spsr", []> { + bits<4> Rd; + let Inst{23-16} = 0b01001111; + let Unpredictable{19-16} = 0b1111; + + let Inst{15-12} = Rd; + + let Inst{11-0} = 0b000000000000; + let Unpredictable{11-0} = 0b110100001111; +} + +// However, the MRS (banked register) system instruction (ARMv7VE) *does* have a +// separate encoding (distinguished by bit 5. +def MRSbanked : ABI<0b0001, (outs GPRnopc:$Rd), (ins banked_reg:$banked), + NoItinerary, "mrs", "\t$Rd, $banked", []>, + Requires<[IsARM, HasVirtualization]> { + bits<6> banked; + bits<4> Rd; + + let Inst{23} = 0; + let Inst{22} = banked{5}; // R bit + let Inst{21-20} = 0b00; + let Inst{19-16} = banked{3-0}; + let Inst{15-12} = Rd; + let Inst{11-9} = 0b001; + let Inst{8} = banked{4}; + let Inst{7-0} = 0b00000000; +} + +// Move from ARM core register to Special Register +// +// No need to have both system and application versions of MSR (immediate) or +// MSR (register), the encodings are the same and the assembly parser has no way +// to distinguish between them. The mask operand contains the special register +// (R Bit) in bit 4 and bits 3-0 contains the mask with the fields to be +// accessed in the special register. +let Defs = [CPSR] in +def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, + "msr", "\t$mask, $Rn", []> { + bits<5> mask; + bits<4> Rn; + + let Inst{23} = 0; + let Inst{22} = mask{4}; // R bit + let Inst{21-20} = 0b10; + let Inst{19-16} = mask{3-0}; + let Inst{15-12} = 0b1111; + let Inst{11-4} = 0b00000000; + let Inst{3-0} = Rn; +} + +let Defs = [CPSR] in +def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, mod_imm:$imm), NoItinerary, + "msr", "\t$mask, $imm", []> { + bits<5> mask; + bits<12> imm; + + let Inst{23} = 0; + let Inst{22} = mask{4}; // R bit + let Inst{21-20} = 0b10; + let Inst{19-16} = mask{3-0}; + let Inst{15-12} = 0b1111; + let Inst{11-0} = imm; +} + +// However, the MSR (banked register) system instruction (ARMv7VE) *does* have a +// separate encoding (distinguished by bit 5. +def MSRbanked : ABI<0b0001, (outs), (ins banked_reg:$banked, GPRnopc:$Rn), + NoItinerary, "msr", "\t$banked, $Rn", []>, + Requires<[IsARM, HasVirtualization]> { + bits<6> banked; + bits<4> Rn; + + let Inst{23} = 0; + let Inst{22} = banked{5}; // R bit + let Inst{21-20} = 0b10; + let Inst{19-16} = banked{3-0}; + let Inst{15-12} = 0b1111; + let Inst{11-9} = 0b001; + let Inst{8} = banked{4}; + let Inst{7-4} = 0b0000; + let Inst{3-0} = Rn; +} + +// Dynamic stack allocation yields a _chkstk for Windows targets. These calls +// are needed to probe the stack when allocating more than +// 4k bytes in one go. Touching the stack at 4K increments is necessary to +// ensure that the guard pages used by the OS virtual memory manager are +// allocated in correct sequence. +// The main point of having separate instruction are extra unmodelled effects +// (compared to ordinary calls) like stack pointer change. + +def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone, + [SDNPHasChain, SDNPSideEffect]>; +let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in + def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>; + +def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK, + [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; +let usesCustomInserter = 1, Defs = [CPSR] in + def WIN__DBZCHK : PseudoInst<(outs), (ins tGPR:$divisor), NoItinerary, + [(win__dbzchk tGPR:$divisor)]>; + +//===----------------------------------------------------------------------===// +// TLS Instructions +// + +// __aeabi_read_tp preserves the registers r1-r3. +// This is a pseudo inst so that we can get the encoding right, +// complete with fixup for the aeabi_read_tp function. +// TPsoft is valid for ARM mode only, in case of Thumb mode a tTPsoft pattern +// is defined in "ARMInstrThumb.td". +let isCall = 1, + Defs = [R0, R12, LR, CPSR], Uses = [SP] in { + def TPsoft : ARMPseudoInst<(outs), (ins), 4, IIC_Br, + [(set R0, ARMthread_pointer)]>, Sched<[WriteBr]>, + Requires<[IsARM, IsReadTPSoft]>; +} + +// Reading thread pointer from coprocessor register +def : ARMPat<(ARMthread_pointer), (MRC 15, 0, 13, 0, 3)>, + Requires<[IsARM, IsReadTPHard]>; + +//===----------------------------------------------------------------------===// +// SJLJ Exception handling intrinsics +// eh_sjlj_setjmp() is an instruction sequence to store the return +// address and save #0 in R0 for the non-longjmp case. +// Since by its nature we may be coming from some other function to get +// here, and we're using the stack frame for the containing function to +// save/restore registers, we can't keep anything live in regs across +// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon +// when we get here from a longjmp(). We force everything out of registers +// except for our own input by listing the relevant registers in Defs. By +// doing so, we also cause the prologue/epilogue code to actively preserve +// all of the callee-saved resgisters, which is exactly what we want. +// A constant value is passed in $val, and we use the location as a scratch. +// +// These are pseudo-instructions and are lowered to individual MC-insts, so +// no encoding information is necessary. +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ], + hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { + def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), + NoItinerary, + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, + Requires<[IsARM, HasVFP2]>; +} + +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ], + hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { + def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), + NoItinerary, + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, + Requires<[IsARM, NoVFP]>; +} + +// FIXME: Non-IOS version(s) +let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, + Defs = [ R7, LR, SP ] in { +def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), + NoItinerary, + [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, + Requires<[IsARM]>; +} + +let isBarrier = 1, hasSideEffects = 1, usesCustomInserter = 1 in +def Int_eh_sjlj_setup_dispatch : PseudoInst<(outs), (ins), NoItinerary, + [(ARMeh_sjlj_setup_dispatch)]>; + +// eh.sjlj.dispatchsetup pseudo-instruction. +// This pseudo is used for both ARM and Thumb. Any differences are handled when +// the pseudo is expanded (which happens before any passes that need the +// instruction size). +let isBarrier = 1 in +def Int_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>; + + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +// + +// ARMv4 indirect branch using (MOVr PC, dst) +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in + def MOVPCRX : ARMPseudoExpand<(outs), (ins GPR:$dst), + 4, IIC_Br, [(brind GPR:$dst)], + (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>, + Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in + def TAILJMPr4 : ARMPseudoExpand<(outs), (ins GPR:$dst), + 4, IIC_Br, [], + (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>, + Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>; + +// Large immediate handling. + +// 32-bit immediate using two piece mod_imms or movw + movt. +// This is a single pseudo instruction, the benefit is that it can be remat'd +// as a single unit instead of having to handle reg inputs. +// FIXME: Remove this when we can do generalized remat. +let isReMaterializable = 1, isMoveImm = 1 in +def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2, + [(set GPR:$dst, (arm_i32imm:$src))]>, + Requires<[IsARM]>; + +def LDRLIT_ga_abs : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iLoad_i, + [(set GPR:$dst, (ARMWrapper tglobaladdr:$src))]>, + Requires<[IsARM, DontUseMovt]>; + +// Pseudo instruction that combines movw + movt + add pc (if PIC). +// It also makes it possible to rematerialize the instructions. +// FIXME: Remove this when we can do generalized remat and when machine licm +// can properly the instructions. +let isReMaterializable = 1 in { +def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2addpc, + [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, + Requires<[IsARM, UseMovtInPic]>; + +def LDRLIT_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iLoadiALU, + [(set GPR:$dst, + (ARMWrapperPIC tglobaladdr:$addr))]>, + Requires<[IsARM, DontUseMovtInPic]>; + +let AddedComplexity = 10 in +def LDRLIT_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + NoItinerary, + [(set GPR:$dst, + (load (ARMWrapperPIC tglobaladdr:$addr)))]>, + Requires<[IsARM, DontUseMovtInPic]>; + +let AddedComplexity = 10 in +def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2ld, + [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>, + Requires<[IsARM, UseMovtInPic]>; +} // isReMaterializable + +// The many different faces of TLS access. +def : ARMPat<(ARMWrapper tglobaltlsaddr :$dst), + (MOVi32imm tglobaltlsaddr :$dst)>, + Requires<[IsARM, UseMovt]>; + +def : Pat<(ARMWrapper tglobaltlsaddr:$src), + (LDRLIT_ga_abs tglobaltlsaddr:$src)>, + Requires<[IsARM, DontUseMovt]>; + +def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr), + (MOV_ga_pcrel tglobaltlsaddr:$addr)>, Requires<[IsARM, UseMovtInPic]>; + +def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr), + (LDRLIT_ga_pcrel tglobaltlsaddr:$addr)>, + Requires<[IsARM, DontUseMovtInPic]>; +let AddedComplexity = 10 in +def : Pat<(load (ARMWrapperPIC tglobaltlsaddr:$addr)), + (MOV_ga_pcrel_ldr tglobaltlsaddr:$addr)>, + Requires<[IsARM, UseMovtInPic]>; + + +// ConstantPool, GlobalAddress, and JumpTable +def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; +def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>, + Requires<[IsARM, UseMovt]>; +def : ARMPat<(ARMWrapper texternalsym :$dst), (MOVi32imm texternalsym :$dst)>, + Requires<[IsARM, UseMovt]>; +def : ARMPat<(ARMWrapperJT tjumptable:$dst), + (LEApcrelJT tjumptable:$dst)>; + +// TODO: add,sub,and, 3-instr forms? + +// Tail calls. These patterns also apply to Thumb mode. +def : Pat<(ARMtcret tcGPR:$dst), (TCRETURNri tcGPR:$dst)>; +def : Pat<(ARMtcret (i32 tglobaladdr:$dst)), (TCRETURNdi texternalsym:$dst)>; +def : Pat<(ARMtcret (i32 texternalsym:$dst)), (TCRETURNdi texternalsym:$dst)>; + +// Direct calls +def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>; +def : ARMPat<(ARMcall_nolink texternalsym:$func), + (BMOVPCB_CALL texternalsym:$func)>; + +// zextload i1 -> zextload i8 +def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; +def : ARMPat<(zextloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; + +// extload -> zextload +def : ARMPat<(extloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; +def : ARMPat<(extloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; +def : ARMPat<(extloadi8 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; +def : ARMPat<(extloadi8 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; + +def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>; + +def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>; +def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>; + +// smul* and smla* +def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), + (SMULBB GPR:$a, GPR:$b)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; +def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))), + (SMULBT GPR:$a, GPR:$b)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; +def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b), + (SMULTB GPR:$a, GPR:$b)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; +def : ARMV5MOPat<(add GPR:$acc, + (mul sext_16_node:$a, sext_16_node:$b)), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; +def : ARMV5MOPat<(add GPR:$acc, + (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; +def : ARMV5MOPat<(add GPR:$acc, + (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; + +def : ARMV5TEPat<(int_arm_smulbb GPR:$a, GPR:$b), + (SMULBB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smulbt GPR:$a, GPR:$b), + (SMULBT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smultb GPR:$a, GPR:$b), + (SMULTB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smultt GPR:$a, GPR:$b), + (SMULTT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smulwb GPR:$a, GPR:$b), + (SMULWB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smulwt GPR:$a, GPR:$b), + (SMULWT GPR:$a, GPR:$b)>; + +def : ARMV5TEPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlabt GPR:$a, GPR:$b, GPR:$acc), + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlatb GPR:$a, GPR:$b, GPR:$acc), + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlatt GPR:$a, GPR:$b, GPR:$acc), + (SMLATT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlawb GPR:$a, GPR:$b, GPR:$acc), + (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlawt GPR:$a, GPR:$b, GPR:$acc), + (SMLAWT GPR:$a, GPR:$b, GPR:$acc)>; + +// Pre-v7 uses MCR for synchronization barriers. +def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>, + Requires<[IsARM, HasV6]>; + +// SXT/UXT with no rotate +let AddedComplexity = 16 in { +def : ARMV6Pat<(and GPR:$Src, 0x000000FF), (UXTB GPR:$Src, 0)>; +def : ARMV6Pat<(and GPR:$Src, 0x0000FFFF), (UXTH GPR:$Src, 0)>; +def : ARMV6Pat<(and GPR:$Src, 0x00FF00FF), (UXTB16 GPR:$Src, 0)>; +def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0x00FF)), + (UXTAB GPR:$Rn, GPR:$Rm, 0)>; +def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0xFFFF)), + (UXTAH GPR:$Rn, GPR:$Rm, 0)>; +} + +def : ARMV6Pat<(sext_inreg GPR:$Src, i8), (SXTB GPR:$Src, 0)>; +def : ARMV6Pat<(sext_inreg GPR:$Src, i16), (SXTH GPR:$Src, 0)>; + +def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i8)), + (SXTAB GPR:$Rn, GPRnopc:$Rm, 0)>; +def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i16)), + (SXTAH GPR:$Rn, GPRnopc:$Rm, 0)>; + +// Atomic load/store patterns +def : ARMPat<(atomic_load_8 ldst_so_reg:$src), + (LDRBrs ldst_so_reg:$src)>; +def : ARMPat<(atomic_load_8 addrmode_imm12:$src), + (LDRBi12 addrmode_imm12:$src)>; +def : ARMPat<(atomic_load_16 addrmode3:$src), + (LDRH addrmode3:$src)>; +def : ARMPat<(atomic_load_32 ldst_so_reg:$src), + (LDRrs ldst_so_reg:$src)>; +def : ARMPat<(atomic_load_32 addrmode_imm12:$src), + (LDRi12 addrmode_imm12:$src)>; +def : ARMPat<(atomic_store_8 ldst_so_reg:$ptr, GPR:$val), + (STRBrs GPR:$val, ldst_so_reg:$ptr)>; +def : ARMPat<(atomic_store_8 addrmode_imm12:$ptr, GPR:$val), + (STRBi12 GPR:$val, addrmode_imm12:$ptr)>; +def : ARMPat<(atomic_store_16 addrmode3:$ptr, GPR:$val), + (STRH GPR:$val, addrmode3:$ptr)>; +def : ARMPat<(atomic_store_32 ldst_so_reg:$ptr, GPR:$val), + (STRrs GPR:$val, ldst_so_reg:$ptr)>; +def : ARMPat<(atomic_store_32 addrmode_imm12:$ptr, GPR:$val), + (STRi12 GPR:$val, addrmode_imm12:$ptr)>; + + +//===----------------------------------------------------------------------===// +// Thumb Support +// + +include "ARMInstrThumb.td" + +//===----------------------------------------------------------------------===// +// Thumb2 Support +// + +include "ARMInstrThumb2.td" + +//===----------------------------------------------------------------------===// +// Floating Point Support +// + +include "ARMInstrVFP.td" + +//===----------------------------------------------------------------------===// +// Advanced SIMD (NEON) Support +// + +include "ARMInstrNEON.td" + +//===----------------------------------------------------------------------===// +// Assembler aliases +// + +// Memory barriers +def : InstAlias<"dmb", (DMB 0xf), 0>, Requires<[IsARM, HasDB]>; +def : InstAlias<"dsb", (DSB 0xf), 0>, Requires<[IsARM, HasDB]>; +def : InstAlias<"isb", (ISB 0xf), 0>, Requires<[IsARM, HasDB]>; +// Armv8-R 'Data Full Barrier' +def : InstAlias<"dfb", (DSB 0xc), 1>, Requires<[IsARM, HasDFB]>; + +// System instructions +def : MnemonicAlias<"swi", "svc">; + +// Load / Store Multiple +def : MnemonicAlias<"ldmfd", "ldm">; +def : MnemonicAlias<"ldmia", "ldm">; +def : MnemonicAlias<"ldmea", "ldmdb">; +def : MnemonicAlias<"stmfd", "stmdb">; +def : MnemonicAlias<"stmia", "stm">; +def : MnemonicAlias<"stmea", "stm">; + +// PKHBT/PKHTB with default shift amount. PKHTB is equivalent to PKHBT with the +// input operands swapped when the shift amount is zero (i.e., unspecified). +def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm", + (PKHBT GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, 0, pred:$p), 0>, + Requires<[IsARM, HasV6]>; +def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm", + (PKHBT GPRnopc:$Rd, GPRnopc:$Rm, GPRnopc:$Rn, 0, pred:$p), 0>, + Requires<[IsARM, HasV6]>; + +// PUSH/POP aliases for STM/LDM +def : ARMInstAlias<"push${p} $regs", (STMDB_UPD SP, pred:$p, reglist:$regs)>; +def : ARMInstAlias<"pop${p} $regs", (LDMIA_UPD SP, pred:$p, reglist:$regs)>; + +// SSAT/USAT optional shift operand. +def : ARMInstAlias<"ssat${p} $Rd, $sat_imm, $Rn", + (SSAT GPRnopc:$Rd, imm1_32:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>; +def : ARMInstAlias<"usat${p} $Rd, $sat_imm, $Rn", + (USAT GPRnopc:$Rd, imm0_31:$sat_imm, GPRnopc:$Rn, 0, pred:$p)>; + + +// Extend instruction optional rotate operand. +def : ARMInstAlias<"sxtab${p} $Rd, $Rn, $Rm", + (SXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"sxtah${p} $Rd, $Rn, $Rm", + (SXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"sxtab16${p} $Rd, $Rn, $Rm", + (SXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"sxtb${p} $Rd, $Rm", + (SXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"sxtb16${p} $Rd, $Rm", + (SXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"sxth${p} $Rd, $Rm", + (SXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; + +def : ARMInstAlias<"uxtab${p} $Rd, $Rn, $Rm", + (UXTAB GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"uxtah${p} $Rd, $Rn, $Rm", + (UXTAH GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"uxtab16${p} $Rd, $Rn, $Rm", + (UXTAB16 GPRnopc:$Rd, GPR:$Rn, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"uxtb${p} $Rd, $Rm", + (UXTB GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"uxtb16${p} $Rd, $Rm", + (UXTB16 GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; +def : ARMInstAlias<"uxth${p} $Rd, $Rm", + (UXTH GPRnopc:$Rd, GPRnopc:$Rm, 0, pred:$p)>; + + +// RFE aliases +def : MnemonicAlias<"rfefa", "rfeda">; +def : MnemonicAlias<"rfeea", "rfedb">; +def : MnemonicAlias<"rfefd", "rfeia">; +def : MnemonicAlias<"rfeed", "rfeib">; +def : MnemonicAlias<"rfe", "rfeia">; + +// SRS aliases +def : MnemonicAlias<"srsfa", "srsib">; +def : MnemonicAlias<"srsea", "srsia">; +def : MnemonicAlias<"srsfd", "srsdb">; +def : MnemonicAlias<"srsed", "srsda">; +def : MnemonicAlias<"srs", "srsia">; + +// QSAX == QSUBADDX +def : MnemonicAlias<"qsubaddx", "qsax">; +// SASX == SADDSUBX +def : MnemonicAlias<"saddsubx", "sasx">; +// SHASX == SHADDSUBX +def : MnemonicAlias<"shaddsubx", "shasx">; +// SHSAX == SHSUBADDX +def : MnemonicAlias<"shsubaddx", "shsax">; +// SSAX == SSUBADDX +def : MnemonicAlias<"ssubaddx", "ssax">; +// UASX == UADDSUBX +def : MnemonicAlias<"uaddsubx", "uasx">; +// UHASX == UHADDSUBX +def : MnemonicAlias<"uhaddsubx", "uhasx">; +// UHSAX == UHSUBADDX +def : MnemonicAlias<"uhsubaddx", "uhsax">; +// UQASX == UQADDSUBX +def : MnemonicAlias<"uqaddsubx", "uqasx">; +// UQSAX == UQSUBADDX +def : MnemonicAlias<"uqsubaddx", "uqsax">; +// USAX == USUBADDX +def : MnemonicAlias<"usubaddx", "usax">; + +// "mov Rd, mod_imm_not" can be handled via "mvn" in assembly, just like +// for isel. +def : ARMInstSubst<"mov${s}${p} $Rd, $imm", + (MVNi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>; +def : ARMInstSubst<"mvn${s}${p} $Rd, $imm", + (MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>; +// Same for AND <--> BIC +def : ARMInstSubst<"bic${s}${p} $Rd, $Rn, $imm", + (ANDri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : ARMInstSubst<"bic${s}${p} $Rdn, $imm", + (ANDri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : ARMInstSubst<"and${s}${p} $Rd, $Rn, $imm", + (BICri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : ARMInstSubst<"and${s}${p} $Rdn, $imm", + (BICri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, + pred:$p, cc_out:$s)>; + +// Likewise, "add Rd, mod_imm_neg" -> sub +def : ARMInstSubst<"add${s}${p} $Rd, $Rn, $imm", + (SUBri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; +def : ARMInstSubst<"add${s}${p} $Rd, $imm", + (SUBri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; +// Likewise, "sub Rd, mod_imm_neg" -> add +def : ARMInstSubst<"sub${s}${p} $Rd, $Rn, $imm", + (ADDri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; +def : ARMInstSubst<"sub${s}${p} $Rd, $imm", + (ADDri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; + + +def : ARMInstSubst<"adc${s}${p} $Rd, $Rn, $imm", + (SBCri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; +def : ARMInstSubst<"adc${s}${p} $Rdn, $imm", + (SBCri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; +def : ARMInstSubst<"sbc${s}${p} $Rd, $Rn, $imm", + (ADCri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; +def : ARMInstSubst<"sbc${s}${p} $Rdn, $imm", + (ADCri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; + +// Same for CMP <--> CMN via mod_imm_neg +def : ARMInstSubst<"cmp${p} $Rd, $imm", + (CMNri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>; +def : ARMInstSubst<"cmn${p} $Rd, $imm", + (CMPri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>; + +// The shifter forms of the MOV instruction are aliased to the ASR, LSL, +// LSR, ROR, and RRX instructions. +// FIXME: We need C++ parser hooks to map the alias to the MOV +// encoding. It seems we should be able to do that sort of thing +// in tblgen, but it could get ugly. +let TwoOperandAliasConstraint = "$Rm = $Rd" in { +def ASRi : ARMAsmPseudo<"asr${s}${p} $Rd, $Rm, $imm", + (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p, + cc_out:$s)>; +def LSRi : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rm, $imm", + (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p, + cc_out:$s)>; +def LSLi : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rm, $imm", + (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p, + cc_out:$s)>; +def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm", + (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p, + cc_out:$s)>; +} +def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm", + (ins GPR:$Rd, GPR:$Rm, pred:$p, cc_out:$s)>; +let TwoOperandAliasConstraint = "$Rn = $Rd" in { +def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def LSRr : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rn, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def LSLr : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rn, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def RORr : ARMAsmPseudo<"ror${s}${p} $Rd, $Rn, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +} + +// "neg" is and alias for "rsb rd, rn, #0" +def : ARMInstAlias<"neg${s}${p} $Rd, $Rm", + (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>; + +// Pre-v6, 'mov r0, r0' was used as a NOP encoding. +def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>, + Requires<[IsARM, NoV6]>; + +// MUL/UMLAL/SMLAL/UMULL/SMULL are available on all arches, but +// the instruction definitions need difference constraints pre-v6. +// Use these aliases for the assembly parsing on pre-v6. +def : InstAlias<"mul${s}${p} $Rd, $Rn, $Rm", + (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s), 0>, + Requires<[IsARM, NoV6]>; +def : InstAlias<"mla${s}${p} $Rd, $Rn, $Rm, $Ra", + (MLA GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, + pred:$p, cc_out:$s), 0>, + Requires<[IsARM, NoV6]>; +def : InstAlias<"smlal${s}${p} $RdLo, $RdHi, $Rn, $Rm", + (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 0>, + Requires<[IsARM, NoV6]>; +def : InstAlias<"umlal${s}${p} $RdLo, $RdHi, $Rn, $Rm", + (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 0>, + Requires<[IsARM, NoV6]>; +def : InstAlias<"smull${s}${p} $RdLo, $RdHi, $Rn, $Rm", + (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 0>, + Requires<[IsARM, NoV6]>; +def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm", + (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 0>, + Requires<[IsARM, NoV6]>; + +// 'it' blocks in ARM mode just validate the predicates. The IT itself +// is discarded. +def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>, + ComplexDeprecationPredicate<"IT">; + +let mayLoad = 1, mayStore =1, hasSideEffects = 1 in +def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), + NoItinerary, + [(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>; + +//===---------------------------------- +// Atomic cmpxchg for -O0 +//===---------------------------------- + +// The fast register allocator used during -O0 inserts spills to cover any VRegs +// live across basic block boundaries. When this happens between an LDXR and an +// STXR it can clear the exclusive monitor, causing all cmpxchg attempts to +// fail. + +// Unfortunately, this means we have to have an alternative (expanded +// post-regalloc) path for -O0 compilations. Fortunately this path can be +// significantly more naive than the standard expansion: we conservatively +// assume seq_cst, strong cmpxchg and omit clrex on failure. + +let Constraints = "@earlyclobber $Rd,@earlyclobber $temp", + mayLoad = 1, mayStore = 1 in { +def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$temp), + (ins GPR:$addr, GPR:$desired, GPR:$new), + NoItinerary, []>, Sched<[]>; + +def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$temp), + (ins GPR:$addr, GPR:$desired, GPR:$new), + NoItinerary, []>, Sched<[]>; + +def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$temp), + (ins GPR:$addr, GPR:$desired, GPR:$new), + NoItinerary, []>, Sched<[]>; + +def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$temp), + (ins GPR:$addr, GPRPair:$desired, GPRPair:$new), + NoItinerary, []>, Sched<[]>; +} + +def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary, + [(atomic_fence imm:$ordering, 0)]> { + let hasSideEffects = 1; + let Size = 0; + let AsmString = "@ COMPILER BARRIER"; +} diff --git a/capstone/suite/synctools/tablegen/ARM/ARMInstrNEON.td b/capstone/suite/synctools/tablegen/ARM/ARMInstrNEON.td new file mode 100644 index 000000000..4525eec8d --- /dev/null +++ b/capstone/suite/synctools/tablegen/ARM/ARMInstrNEON.td @@ -0,0 +1,8545 @@ +//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the ARM NEON instruction set. +// +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// NEON-specific Operands. +//===----------------------------------------------------------------------===// +def nModImm : Operand { + let PrintMethod = "printNEONModImmOperand"; +} + +def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } +def nImmSplatI8 : Operand { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmSplatI8AsmOperand; +} +def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } +def nImmSplatI16 : Operand { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmSplatI16AsmOperand; +} +def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } +def nImmSplatI32 : Operand { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmSplatI32AsmOperand; +} +def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } +def nImmSplatNotI16 : Operand { + let ParserMatchClass = nImmSplatNotI16AsmOperand; +} +def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; } +def nImmSplatNotI32 : Operand { + let ParserMatchClass = nImmSplatNotI32AsmOperand; +} +def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } +def nImmVMOVI32 : Operand { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMOVI32AsmOperand; +} + +class nImmVMOVIAsmOperandReplicate + : AsmOperandClass { + let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate"; + let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">"; + let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands"; +} + +class nImmVINVIAsmOperandReplicate + : AsmOperandClass { + let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate"; + let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">"; + let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands"; +} + +class nImmVMOVIReplicate : Operand { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMOVIAsmOperandReplicate; +} + +class nImmVINVIReplicate : Operand { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVINVIAsmOperandReplicate; +} + +def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } +def nImmVMOVI32Neg : Operand { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMOVI32NegAsmOperand; +} +def nImmVMOVF32 : Operand { + let PrintMethod = "printFPImmOperand"; + let ParserMatchClass = FPImmOperand; +} +def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } +def nImmSplatI64 : Operand { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmSplatI64AsmOperand; +} + +def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } +def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } +def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } +def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; } +def VectorIndex8 : Operand, ImmLeaf { + let ParserMatchClass = VectorIndex8Operand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i32imm); +} +def VectorIndex16 : Operand, ImmLeaf { + let ParserMatchClass = VectorIndex16Operand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i32imm); +} +def VectorIndex32 : Operand, ImmLeaf { + let ParserMatchClass = VectorIndex32Operand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i32imm); +} +def VectorIndex64 : Operand, ImmLeaf { + let ParserMatchClass = VectorIndex64Operand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i32imm); +} + +// Register list of one D register. +def VecListOneDAsmOperand : AsmOperandClass { + let Name = "VecListOneD"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListOneD : RegisterOperand { + let ParserMatchClass = VecListOneDAsmOperand; +} +// Register list of two sequential D registers. +def VecListDPairAsmOperand : AsmOperandClass { + let Name = "VecListDPair"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListDPair : RegisterOperand { + let ParserMatchClass = VecListDPairAsmOperand; +} +// Register list of three sequential D registers. +def VecListThreeDAsmOperand : AsmOperandClass { + let Name = "VecListThreeD"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeD : RegisterOperand { + let ParserMatchClass = VecListThreeDAsmOperand; +} +// Register list of four sequential D registers. +def VecListFourDAsmOperand : AsmOperandClass { + let Name = "VecListFourD"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourD : RegisterOperand { + let ParserMatchClass = VecListFourDAsmOperand; +} +// Register list of two D registers spaced by 2 (two sequential Q registers). +def VecListDPairSpacedAsmOperand : AsmOperandClass { + let Name = "VecListDPairSpaced"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListDPairSpaced : RegisterOperand { + let ParserMatchClass = VecListDPairSpacedAsmOperand; +} +// Register list of three D registers spaced by 2 (three Q registers). +def VecListThreeQAsmOperand : AsmOperandClass { + let Name = "VecListThreeQ"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeQ : RegisterOperand { + let ParserMatchClass = VecListThreeQAsmOperand; +} +// Register list of three D registers spaced by 2 (three Q registers). +def VecListFourQAsmOperand : AsmOperandClass { + let Name = "VecListFourQ"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourQ : RegisterOperand { + let ParserMatchClass = VecListFourQAsmOperand; +} + +// Register list of one D register, with "all lanes" subscripting. +def VecListOneDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListOneDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListOneDAllLanes : RegisterOperand { + let ParserMatchClass = VecListOneDAllLanesAsmOperand; +} +// Register list of two D registers, with "all lanes" subscripting. +def VecListDPairAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListDPairAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListDPairAllLanes : RegisterOperand { + let ParserMatchClass = VecListDPairAllLanesAsmOperand; +} +// Register list of two D registers spaced by 2 (two sequential Q registers). +def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListDPairSpacedAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListDPairSpacedAllLanes : RegisterOperand { + let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; +} +// Register list of three D registers, with "all lanes" subscripting. +def VecListThreeDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListThreeDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeDAllLanes : RegisterOperand { + let ParserMatchClass = VecListThreeDAllLanesAsmOperand; +} +// Register list of three D registers spaced by 2 (three sequential Q regs). +def VecListThreeQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListThreeQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeQAllLanes : RegisterOperand { + let ParserMatchClass = VecListThreeQAllLanesAsmOperand; +} +// Register list of four D registers, with "all lanes" subscripting. +def VecListFourDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListFourDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourDAllLanes : RegisterOperand { + let ParserMatchClass = VecListFourDAllLanesAsmOperand; +} +// Register list of four D registers spaced by 2 (four sequential Q regs). +def VecListFourQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListFourQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourQAllLanes : RegisterOperand { + let ParserMatchClass = VecListFourQAllLanesAsmOperand; +} + + +// Register list of one D register, with byte lane subscripting. +def VecListOneDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDByteIndexed : Operand { + let ParserMatchClass = VecListOneDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListOneDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDHWordIndexed : Operand { + let ParserMatchClass = VecListOneDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListOneDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDWordIndexed : Operand { + let ParserMatchClass = VecListOneDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + +// Register list of two D registers with byte lane subscripting. +def VecListTwoDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDByteIndexed : Operand { + let ParserMatchClass = VecListTwoDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDHWordIndexed : Operand { + let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListTwoDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDWordIndexed : Operand { + let ParserMatchClass = VecListTwoDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of two Q registers with half-word lane subscripting. +def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoQHWordIndexed : Operand { + let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListTwoQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoQWordIndexed : Operand { + let ParserMatchClass = VecListTwoQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + + +// Register list of three D registers with byte lane subscripting. +def VecListThreeDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDByteIndexed : Operand { + let ParserMatchClass = VecListThreeDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDHWordIndexed : Operand { + let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListThreeDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDWordIndexed : Operand { + let ParserMatchClass = VecListThreeDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of three Q registers with half-word lane subscripting. +def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeQHWordIndexed : Operand { + let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListThreeQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeQWordIndexed : Operand { + let ParserMatchClass = VecListThreeQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + +// Register list of four D registers with byte lane subscripting. +def VecListFourDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDByteIndexed : Operand { + let ParserMatchClass = VecListFourDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListFourDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDHWordIndexed : Operand { + let ParserMatchClass = VecListFourDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListFourDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDWordIndexed : Operand { + let ParserMatchClass = VecListFourDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of four Q registers with half-word lane subscripting. +def VecListFourQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourQHWordIndexed : Operand { + let ParserMatchClass = VecListFourQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListFourQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourQWordIndexed : Operand { + let ParserMatchClass = VecListFourQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + +def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() >= 8; +}]>; +def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() >= 8; +}]>; +def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() == 4; +}]>; +def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() == 4; +}]>; +def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() == 2; +}]>; +def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() == 2; +}]>; +def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() == 1; +}]>; +def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() == 1; +}]>; +def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; + +//===----------------------------------------------------------------------===// +// NEON-specific DAG Nodes. +//===----------------------------------------------------------------------===// + +def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; +def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; + +def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; +def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; +def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; +def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; +def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; +def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; +def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; +def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; +def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; +def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; +def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; + +// Types for vector shift by immediates. The "SHX" version is for long and +// narrow operations where the source and destination vectors have different +// types. The "SHINS" version is for shift and insert operations. +def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, + SDTCisVT<2, i32>]>; +def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; + +def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; +def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; +def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; +def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; + +def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; +def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; +def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; + +def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; +def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; +def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; +def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; +def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; +def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; + +def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; +def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; +def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; + +def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; +def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; + +def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, + SDTCisVT<2, i32>]>; +def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; +def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; + +def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; +def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; +def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; +def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; + +def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; +def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; + +def NEONvbsl : SDNode<"ARMISD::VBSL", + SDTypeProfile<1, 3, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>>; + +def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; + +// VDUPLANE can produce a quad-register result from a double-register source, +// so the result is not constrained to match the source. +def NEONvduplane : SDNode<"ARMISD::VDUPLANE", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisVT<2, i32>]>>; + +def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; +def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; + +def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; +def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; +def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; +def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; + +def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>; +def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; +def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; +def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; + +def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, + SDTCisSameAs<1, 2>]>; +def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; +def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; + +def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, + SDTCisVT<2, v8i8>]>; +def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, + SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; +def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; +def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; + + +def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ + ConstantSDNode *ConstVal = cast(N->getOperand(0)); + unsigned EltBits = 0; + uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); + return (EltBits == 32 && EltVal == 0); +}]>; + +def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ + ConstantSDNode *ConstVal = cast(N->getOperand(0)); + unsigned EltBits = 0; + uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); + return (EltBits == 8 && EltVal == 0xff); +}]>; + +//===----------------------------------------------------------------------===// +// NEON load / store instructions +//===----------------------------------------------------------------------===// + +// Use VLDM to load a Q register as a D register pair. +// This is a pseudo instruction that is expanded to VLDMD after reg alloc. +def VLDMQIA + : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), + IIC_fpLoad_m, "", + [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>; + +// Use VSTM to store a Q register as a D register pair. +// This is a pseudo instruction that is expanded to VSTMD after reg alloc. +def VSTMQIA + : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), + IIC_fpStore_m, "", + [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>; + +// Classes for VLD* pseudo-instructions with multi-register operands. +// These are expanded to real instructions after register allocation. +class VLDQPseudo + : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; +class VLDQWBPseudo + : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), itin, + "$addr.addr = $wb">; +class VLDQWBfixedPseudo + : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr), itin, + "$addr.addr = $wb">; +class VLDQWBregisterPseudo + : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset), itin, + "$addr.addr = $wb">; + +class VLDQQPseudo + : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; +class VLDQQWBPseudo + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), itin, + "$addr.addr = $wb">; +class VLDQQWBfixedPseudo + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr), itin, + "$addr.addr = $wb">; +class VLDQQWBregisterPseudo + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset), itin, + "$addr.addr = $wb">; + + +class VLDQQQQPseudo + : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, + "$src = $dst">; +class VLDQQQQWBPseudo + : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, + "$addr.addr = $wb, $src = $dst">; + +let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { + +// VLD1 : Vector Load (multiple single elements) +class VLD1D op7_4, string Dt, Operand AddrMode> + : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), + (ins AddrMode:$Rn), IIC_VLD1, + "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDST1Instruction"; +} +class VLD1Q op7_4, string Dt, Operand AddrMode> + : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), + (ins AddrMode:$Rn), IIC_VLD1x2, + "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST1Instruction"; +} + +def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; +def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; +def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; +def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; + +def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; +def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; +def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; +def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; + +// ...with address register writeback: +multiclass VLD1DWB op7_4, string Dt, Operand AddrMode> { + def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), + (ins AddrMode:$Rn), IIC_VLD1u, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDST1Instruction"; + } + def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDST1Instruction"; + } +} +multiclass VLD1QWB op7_4, string Dt, Operand AddrMode> { + def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), + (ins AddrMode:$Rn), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST1Instruction"; + } + def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST1Instruction"; + } +} + +defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; +defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; +defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; +defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; +defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; +defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; +defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; +defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; + +// ...with 3 registers +class VLD1D3 op7_4, string Dt, Operand AddrMode> + : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), + (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, + "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDST1Instruction"; +} +multiclass VLD1D3WB op7_4, string Dt, Operand AddrMode> { + def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), + (ins AddrMode:$Rn), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDST1Instruction"; + } + def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDST1Instruction"; + } +} + +def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; +def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; +def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; +def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; + +defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; +defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; +defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; +defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; + +def VLD1d8TPseudo : VLDQQPseudo, Sched<[WriteVLD3]>; +def VLD1d16TPseudo : VLDQQPseudo, Sched<[WriteVLD3]>; +def VLD1d32TPseudo : VLDQQPseudo, Sched<[WriteVLD3]>; +def VLD1d64TPseudo : VLDQQPseudo, Sched<[WriteVLD3]>; +def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD3]>; +def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD3]>; + +def VLD1q8HighTPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; +def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD1q16HighTPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; +def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD1q32HighTPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; +def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD1q64HighTPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; +def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; + +// ...with 4 registers +class VLD1D4 op7_4, string Dt, Operand AddrMode> + : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), + (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, + "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST1Instruction"; +} +multiclass VLD1D4WB op7_4, string Dt, Operand AddrMode> { + def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), + (ins AddrMode:$Rn), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST1Instruction"; + } + def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST1Instruction"; + } +} + +def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; +def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; +def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; +def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; + +defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; +defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; +defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; +defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; + +def VLD1d8QPseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD1d16QPseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD1d32QPseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD1d64QPseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; +def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; + +def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD1q8HighQPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; +def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD1q16HighQPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; +def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD1q32HighQPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; +def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD1q64HighQPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; + +// VLD2 : Vector Load (multiple 2-element structures) +class VLD2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, + InstrItinClass itin, Operand AddrMode> + : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), + (ins AddrMode:$Rn), itin, + "vld2", Dt, "$Vd, $Rn", "", []> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST2Instruction"; +} + +def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, + addrmode6align64or128>, Sched<[WriteVLD2]>; +def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, + addrmode6align64or128>, Sched<[WriteVLD2]>; +def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, + addrmode6align64or128>, Sched<[WriteVLD2]>; + +def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, + addrmode6align64or128or256>, Sched<[WriteVLD4]>; +def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, + addrmode6align64or128or256>, Sched<[WriteVLD4]>; +def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, + addrmode6align64or128or256>, Sched<[WriteVLD4]>; + +def VLD2q8Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD2q16Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD2q32Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; + +// ...with address register writeback: +multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, + RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { + def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), + (ins AddrMode:$Rn), itin, + "vld2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST2Instruction"; + } + def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), + (ins AddrMode:$Rn, rGPR:$Rm), itin, + "vld2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST2Instruction"; + } +} + +defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, + addrmode6align64or128>, Sched<[WriteVLD2]>; +defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, + addrmode6align64or128>, Sched<[WriteVLD2]>; +defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, + addrmode6align64or128>, Sched<[WriteVLD2]>; + +defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, + addrmode6align64or128or256>, Sched<[WriteVLD4]>; +defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, + addrmode6align64or128or256>, Sched<[WriteVLD4]>; +defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, + addrmode6align64or128or256>, Sched<[WriteVLD4]>; + +def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; +def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; +def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; +def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; +def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; +def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; + +// ...with double-spaced registers +def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, + addrmode6align64or128>, Sched<[WriteVLD2]>; +def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, + addrmode6align64or128>, Sched<[WriteVLD2]>; +def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, + addrmode6align64or128>, Sched<[WriteVLD2]>; +defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, + addrmode6align64or128>, Sched<[WriteVLD2]>; +defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, + addrmode6align64or128>, Sched<[WriteVLD2]>; +defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, + addrmode6align64or128>, Sched<[WriteVLD2]>; + +// VLD3 : Vector Load (multiple 3-element structures) +class VLD3D op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$Rn), IIC_VLD3, + "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDST3Instruction"; +} + +def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; +def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; +def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; + +def VLD3d8Pseudo : VLDQQPseudo, Sched<[WriteVLD3]>; +def VLD3d16Pseudo : VLDQQPseudo, Sched<[WriteVLD3]>; +def VLD3d32Pseudo : VLDQQPseudo, Sched<[WriteVLD3]>; + +// ...with address register writeback: +class VLD3DWB op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, + "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", + "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDST3Instruction"; +} + +def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; +def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; +def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; + +def VLD3d8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3d16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3d32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD3]>; + +// ...with double-spaced registers: +def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; +def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; +def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; +def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; +def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; +def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; + +def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; + +// ...alternate versions to be allocated odd register numbers: +def VLD3q8oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; +def VLD3q16oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; +def VLD3q32oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; + +def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; + +// VLD4 : Vector Load (multiple 4-element structures) +class VLD4D op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$Rn), IIC_VLD4, + "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>, + Sched<[WriteVLD4]> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST4Instruction"; +} + +def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; +def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; +def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; + +def VLD4d8Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD4d16Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD4d32Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; + +// ...with address register writeback: +class VLD4DWB op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, + "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", + "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST4Instruction"; +} + +def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; +def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; +def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; + +def VLD4d8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4d16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4d32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD4]>; + +// ...with double-spaced registers: +def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; +def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; +def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; +def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; +def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; +def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; + +def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; + +// ...alternate versions to be allocated odd register numbers: +def VLD4q8oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; +def VLD4q16oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; +def VLD4q32oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; + +def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; + +} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 + +// Classes for VLD*LN pseudo-instructions with multi-register operands. +// These are expanded to real instructions after register allocation. +class VLDQLNPseudo + : PseudoNLdSt<(outs QPR:$dst), + (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), + itin, "$src = $dst">; +class VLDQLNWBPseudo + : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QPR:$src, + nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; +class VLDQQLNPseudo + : PseudoNLdSt<(outs QQPR:$dst), + (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), + itin, "$src = $dst">; +class VLDQQLNWBPseudo + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, + nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; +class VLDQQQQLNPseudo + : PseudoNLdSt<(outs QQQQPR:$dst), + (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), + itin, "$src = $dst">; +class VLDQQQQLNWBPseudo + : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, + nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; + +// VLD1LN : Vector Load (single element to one lane) +class VLD1LN op11_8, bits<4> op7_4, string Dt, ValueType Ty, + PatFrag LoadOp> + : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), + (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), + IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", + "$src = $Vd", + [(set DPR:$Vd, (vector_insert (Ty DPR:$src), + (i32 (LoadOp addrmode6:$Rn)), + imm:$lane))]> { + let Rm = 0b1111; + let DecoderMethod = "DecodeVLD1LN"; +} +class VLD1LN32 op11_8, bits<4> op7_4, string Dt, ValueType Ty, + PatFrag LoadOp> + : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), + (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), + IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", + "$src = $Vd", + [(set DPR:$Vd, (vector_insert (Ty DPR:$src), + (i32 (LoadOp addrmode6oneL32:$Rn)), + imm:$lane))]>, Sched<[WriteVLD1]> { + let Rm = 0b1111; + let DecoderMethod = "DecodeVLD1LN"; +} +class VLD1QLNPseudo : VLDQLNPseudo, + Sched<[WriteVLD1]> { + let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), + (i32 (LoadOp addrmode6:$addr)), + imm:$lane))]; +} + +def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { + let Inst{7-5} = lane{2-0}; +} +def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { + let Inst{7-6} = lane{1-0}; + let Inst{5-4} = Rn{5-4}; +} +def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { + let Inst{7} = lane{0}; + let Inst{5-4} = Rn{5-4}; +} + +def VLD1LNq8Pseudo : VLD1QLNPseudo; +def VLD1LNq16Pseudo : VLD1QLNPseudo; +def VLD1LNq32Pseudo : VLD1QLNPseudo; + +def : Pat<(vector_insert (v2f32 DPR:$src), + (f32 (load addrmode6:$addr)), imm:$lane), + (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; +def : Pat<(vector_insert (v4f32 QPR:$src), + (f32 (load addrmode6:$addr)), imm:$lane), + (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; + +// A 64-bit subvector insert to the first 128-bit vector position +// is a subregister copy that needs no instruction. +def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; +def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; +def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; +def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)), + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; +def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)), + (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; +def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)), + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; + + +let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { + +// ...with address register writeback: +class VLD1LNWB op11_8, bits<4> op7_4, string Dt> + : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, + "\\{$Vd[$lane]\\}, $Rn$Rm", + "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> { + let DecoderMethod = "DecodeVLD1LN"; +} + +def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; + let Inst{4} = Rn{4}; +} +def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{4}; + let Inst{4} = Rn{4}; +} + +def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; + +// VLD2LN : Vector Load (single 2-element structure to one lane) +class VLD2LN op11_8, bits<4> op7_4, string Dt> + : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), + (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), + IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", + "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2LN"; +} + +def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { + let Inst{7} = lane{0}; +} + +def VLD2LNd8Pseudo : VLDQLNPseudo, Sched<[WriteVLD1]>; +def VLD2LNd16Pseudo : VLDQLNPseudo, Sched<[WriteVLD1]>; +def VLD2LNd32Pseudo : VLDQLNPseudo, Sched<[WriteVLD1]>; + +// ...with double-spaced registers: +def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { + let Inst{7} = lane{0}; +} + +def VLD2LNq16Pseudo : VLDQQLNPseudo, Sched<[WriteVLD1]>; +def VLD2LNq32Pseudo : VLDQQLNPseudo, Sched<[WriteVLD1]>; + +// ...with address register writeback: +class VLD2LNWB op11_8, bits<4> op7_4, string Dt> + : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, + "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", + "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2LN"; +} + +def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { + let Inst{7} = lane{0}; +} + +def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; + +def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { + let Inst{7} = lane{0}; +} + +def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD1]>; + +// VLD3LN : Vector Load (single 3-element structure to one lane) +class VLD3LN op11_8, bits<4> op7_4, string Dt> + : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, + "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", + "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> { + let Rm = 0b1111; + let DecoderMethod = "DecodeVLD3LN"; +} + +def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { + let Inst{7} = lane{0}; +} + +def VLD3LNd8Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; +def VLD3LNd16Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; +def VLD3LNd32Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; + +// ...with double-spaced registers: +def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { + let Inst{7} = lane{0}; +} + +def VLD3LNq16Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; +def VLD3LNq32Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; + +// ...with address register writeback: +class VLD3LNWB op11_8, bits<4> op7_4, string Dt> + : NLdStLn<1, 0b10, op11_8, op7_4, + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), + IIC_VLD3lnu, "vld3", Dt, + "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", + "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", + []>, Sched<[WriteVLD2]> { + let DecoderMethod = "DecodeVLD3LN"; +} + +def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { + let Inst{7} = lane{0}; +} + +def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; + +def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { + let Inst{7} = lane{0}; +} + +def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; + +// VLD4LN : Vector Load (single 4-element structure to one lane) +class VLD4LN op11_8, bits<4> op7_4, string Dt> + : NLdStLn<1, 0b10, op11_8, op7_4, + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, + "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", + "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>, + Sched<[WriteVLD2]> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD4LN"; +} + +def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} + +def VLD4LNd8Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; +def VLD4LNd16Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; +def VLD4LNd32Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; + +// ...with double-spaced registers: +def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} + +def VLD4LNq16Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; +def VLD4LNq32Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; + +// ...with address register writeback: +class VLD4LNWB op11_8, bits<4> op7_4, string Dt> + : NLdStLn<1, 0b10, op11_8, op7_4, + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), + IIC_VLD4lnu, "vld4", Dt, +"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", +"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", + []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD4LN" ; +} + +def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} + +def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; + +def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} + +def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; + +} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 + +// VLD1DUP : Vector Load (single element to all lanes) +class VLD1DUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp, + Operand AddrMode> + : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), + (ins AddrMode:$Rn), + IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", + [(set VecListOneDAllLanes:$Vd, + (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]>, + Sched<[WriteVLD2]> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; +} +def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, + addrmode6dupalignNone>; +def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, + addrmode6dupalign16>; +def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, + addrmode6dupalign32>; + +def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), + (VLD1DUPd32 addrmode6:$addr)>; + +class VLD1QDUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp, + Operand AddrMode> + : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), + (ins AddrMode:$Rn), IIC_VLD1dup, + "vld1", Dt, "$Vd, $Rn", "", + [(set VecListDPairAllLanes:$Vd, + (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; +} + +def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, + addrmode6dupalignNone>; +def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, + addrmode6dupalign16>; +def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, + addrmode6dupalign32>; + +def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), + (VLD1DUPq32 addrmode6:$addr)>; + +let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { +// ...with address register writeback: +multiclass VLD1DUPWB op7_4, string Dt, Operand AddrMode> { + def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListOneDAllLanes:$Vd, GPR:$wb), + (ins AddrMode:$Rn), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + } + def _register : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListOneDAllLanes:$Vd, GPR:$wb), + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + } +} +multiclass VLD1QDUPWB op7_4, string Dt, Operand AddrMode> { + def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListDPairAllLanes:$Vd, GPR:$wb), + (ins AddrMode:$Rn), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + } + def _register : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListDPairAllLanes:$Vd, GPR:$wb), + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + } +} + +defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; +defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; +defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; + +defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; +defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; +defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; + +// VLD2DUP : Vector Load (single 2-element structure to all lanes) +class VLD2DUP op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> + : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), + (ins AddrMode:$Rn), IIC_VLD2dup, + "vld2", Dt, "$Vd, $Rn", "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2DupInstruction"; +} + +def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, + addrmode6dupalign16>; +def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, + addrmode6dupalign32>; +def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, + addrmode6dupalign64>; + +// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or +// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". +// ...with double-spaced registers +def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, + addrmode6dupalign16>; +def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, + addrmode6dupalign32>; +def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, + addrmode6dupalign64>; + +def VLD2DUPq8EvenPseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD2DUPq8OddPseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD2DUPq16EvenPseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD2DUPq16OddPseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD2DUPq32EvenPseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD2DUPq32OddPseudo : VLDQQPseudo, Sched<[WriteVLD2]>; + +// ...with address register writeback: +multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy, + Operand AddrMode> { + def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, + (outs VdTy:$Vd, GPR:$wb), + (ins AddrMode:$Rn), IIC_VLD2dupu, + "vld2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2DupInstruction"; + } + def _register : NLdSt<1, 0b10, 0b1101, op7_4, + (outs VdTy:$Vd, GPR:$wb), + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, + "vld2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2DupInstruction"; + } +} + +defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, + addrmode6dupalign16>; +defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, + addrmode6dupalign32>; +defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, + addrmode6dupalign64>; + +defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, + addrmode6dupalign16>; +defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, + addrmode6dupalign32>; +defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, + addrmode6dupalign64>; + +// VLD3DUP : Vector Load (single 3-element structure to all lanes) +class VLD3DUP op7_4, string Dt> + : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), + (ins addrmode6dup:$Rn), IIC_VLD3dup, + "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>, + Sched<[WriteVLD2]> { + let Rm = 0b1111; + let Inst{4} = 0; + let DecoderMethod = "DecodeVLD3DupInstruction"; +} + +def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; +def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; +def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; + +def VLD3DUPd8Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD3DUPd16Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD3DUPd32Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; + +// ...with double-spaced registers (not used for codegen): +def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; +def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; +def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; + +def VLD3DUPq8EvenPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; +def VLD3DUPq8OddPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; +def VLD3DUPq16EvenPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; +def VLD3DUPq16OddPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; +def VLD3DUPq32EvenPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; +def VLD3DUPq32OddPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; + +// ...with address register writeback: +class VLD3DUPWB op7_4, string Dt, Operand AddrMode> + : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, + "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { + let Inst{4} = 0; + let DecoderMethod = "DecodeVLD3DupInstruction"; +} + +def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; +def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; +def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; + +def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; +def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; +def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; + +def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; +def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; +def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; + +// VLD4DUP : Vector Load (single 4-element structure to all lanes) +class VLD4DUP op7_4, string Dt> + : NLdSt<1, 0b10, 0b1111, op7_4, + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6dup:$Rn), IIC_VLD4dup, + "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD4DupInstruction"; +} + +def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; +def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; +def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } + +def VLD4DUPd8Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD4DUPd16Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD4DUPd32Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; + +// ...with double-spaced registers (not used for codegen): +def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; +def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; +def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } + +def VLD4DUPq8EvenPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; +def VLD4DUPq8OddPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; +def VLD4DUPq16EvenPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; +def VLD4DUPq16OddPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; +def VLD4DUPq32EvenPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; +def VLD4DUPq32OddPseudo : VLDQQQQPseudo, Sched<[WriteVLD2]>; + +// ...with address register writeback: +class VLD4DUPWB op7_4, string Dt> + : NLdSt<1, 0b10, 0b1111, op7_4, + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, + "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD4DupInstruction"; +} + +def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; +def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; +def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } + +def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; +def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; +def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } + +def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; +def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; +def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; + +} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 + +let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { + +// Classes for VST* pseudo-instructions with multi-register operands. +// These are expanded to real instructions after register allocation. +class VSTQPseudo + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; +class VSTQWBPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, + "$addr.addr = $wb">; +class VSTQWBfixedPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, QPR:$src), itin, + "$addr.addr = $wb">; +class VSTQWBregisterPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, + "$addr.addr = $wb">; +class VSTQQPseudo + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; +class VSTQQWBPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, + "$addr.addr = $wb">; +class VSTQQWBfixedPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, QQPR:$src), itin, + "$addr.addr = $wb">; +class VSTQQWBregisterPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, + "$addr.addr = $wb">; + +class VSTQQQQPseudo + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; +class VSTQQQQWBPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, + "$addr.addr = $wb">; + +// VST1 : Vector Store (multiple single elements) +class VST1D op7_4, string Dt, Operand AddrMode> + : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), + IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDST1Instruction"; +} +class VST1Q op7_4, string Dt, Operand AddrMode> + : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), + IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST1Instruction"; +} + +def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; +def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; +def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; +def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; + +def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; +def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; +def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; +def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; + +// ...with address register writeback: +multiclass VST1DWB op7_4, string Dt, Operand AddrMode> { + def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), + (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDST1Instruction"; + } + def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), + (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), + IIC_VLD1u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDST1Instruction"; + } +} +multiclass VST1QWB op7_4, string Dt, Operand AddrMode> { + def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), + (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST1Instruction"; + } + def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), + (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), + IIC_VLD1x2u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST1Instruction"; + } +} + +defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; +defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; +defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; +defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; + +defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; +defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; +defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; +defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; + +// ...with 3 registers +class VST1D3 op7_4, string Dt, Operand AddrMode> + : NLdSt<0, 0b00, 0b0110, op7_4, (outs), + (ins AddrMode:$Rn, VecListThreeD:$Vd), + IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDST1Instruction"; +} +multiclass VST1D3WB op7_4, string Dt, Operand AddrMode> { + def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), + (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST1Instruction"; + } + def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), + (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), + IIC_VLD1x3u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST1Instruction"; + } +} + +def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; +def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; +def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; +def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; + +defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; +defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; +defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; +defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; + +def VST1d8TPseudo : VSTQQPseudo, Sched<[WriteVST3]>; +def VST1d16TPseudo : VSTQQPseudo, Sched<[WriteVST3]>; +def VST1d32TPseudo : VSTQQPseudo, Sched<[WriteVST3]>; +def VST1d64TPseudo : VSTQQPseudo, Sched<[WriteVST3]>; +def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST3]>; +def VST1d64TPseudoWB_register : VSTQQWBPseudo, Sched<[WriteVST3]>; + +def VST1q8HighTPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; +def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST1q16HighTPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; +def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST1q32HighTPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; +def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST1q64HighTPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; +def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; + +// ...with 4 registers +class VST1D4 op7_4, string Dt, Operand AddrMode> + : NLdSt<0, 0b00, 0b0010, op7_4, (outs), + (ins AddrMode:$Rn, VecListFourD:$Vd), + IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", + []>, Sched<[WriteVST4]> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST1Instruction"; +} +multiclass VST1D4WB op7_4, string Dt, Operand AddrMode> { + def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), + (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST1Instruction"; + } + def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), + (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), + IIC_VLD1x4u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST1Instruction"; + } +} + +def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; +def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; +def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; +def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; + +defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; +defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; +defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; +defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; + +def VST1d8QPseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST1d16QPseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST1d32QPseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST1d64QPseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; +def VST1d64QPseudoWB_register : VSTQQWBPseudo, Sched<[WriteVST4]>; + +def VST1q8HighQPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; +def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST1q16HighQPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; +def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST1q32HighQPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; +def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST1q64HighQPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; +def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; + +// VST2 : Vector Store (multiple 2-element structures) +class VST2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, + InstrItinClass itin, Operand AddrMode> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), + itin, "vst2", Dt, "$Vd, $Rn", "", []> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST2Instruction"; +} + +def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, + addrmode6align64or128>, Sched<[WriteVST2]>; +def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, + addrmode6align64or128>, Sched<[WriteVST2]>; +def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, + addrmode6align64or128>, Sched<[WriteVST2]>; + +def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, + addrmode6align64or128or256>, Sched<[WriteVST4]>; +def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, + addrmode6align64or128or256>, Sched<[WriteVST4]>; +def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, + addrmode6align64or128or256>, Sched<[WriteVST4]>; + +def VST2q8Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST2q16Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST2q32Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; + +// ...with address register writeback: +multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, + RegisterOperand VdTy, Operand AddrMode> { + def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST2Instruction"; + } + def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST2Instruction"; + } +} +multiclass VST2QWB op7_4, string Dt, Operand AddrMode> { + def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), + (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST2Instruction"; + } + def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), + (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), + IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST2Instruction"; + } +} + +defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, + addrmode6align64or128>; +defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, + addrmode6align64or128>; +defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, + addrmode6align64or128>; + +defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; +defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; +defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; + +def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; +def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; +def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; +def VST2q8PseudoWB_register : VSTQQWBregisterPseudo, Sched<[WriteVST4]>; +def VST2q16PseudoWB_register : VSTQQWBregisterPseudo, Sched<[WriteVST4]>; +def VST2q32PseudoWB_register : VSTQQWBregisterPseudo, Sched<[WriteVST4]>; + +// ...with double-spaced registers +def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, + addrmode6align64or128>; +def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, + addrmode6align64or128>; +def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, + addrmode6align64or128>; +defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, + addrmode6align64or128>; +defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, + addrmode6align64or128>; +defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, + addrmode6align64or128>; + +// VST3 : Vector Store (multiple 3-element structures) +class VST3D op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, + "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDST3Instruction"; +} + +def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; +def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; +def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; + +def VST3d8Pseudo : VSTQQPseudo, Sched<[WriteVST3]>; +def VST3d16Pseudo : VSTQQPseudo, Sched<[WriteVST3]>; +def VST3d32Pseudo : VSTQQPseudo, Sched<[WriteVST3]>; + +// ...with address register writeback: +class VST3DWB op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, + "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", + "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDST3Instruction"; +} + +def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; +def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; +def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; + +def VST3d8Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST3]>; +def VST3d16Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST3]>; +def VST3d32Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST3]>; + +// ...with double-spaced registers: +def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; +def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; +def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; +def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; +def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; +def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; + +def VST3q8Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST3q16Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST3q32Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; + +// ...alternate versions to be allocated odd register numbers: +def VST3q8oddPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; +def VST3q16oddPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; +def VST3q32oddPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; + +def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; + +// VST4 : Vector Store (multiple 4-element structures) +class VST4D op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", + "", []>, Sched<[WriteVST4]> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST4Instruction"; +} + +def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; +def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; +def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; + +def VST4d8Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST4d16Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST4d32Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; + +// ...with address register writeback: +class VST4DWB op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, + "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDST4Instruction"; +} + +def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; +def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; +def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; + +def VST4d8Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST4]>; +def VST4d16Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST4]>; +def VST4d32Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST4]>; + +// ...with double-spaced registers: +def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; +def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; +def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; +def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; +def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; +def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; + +def VST4q8Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST4q16Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST4q32Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; + +// ...alternate versions to be allocated odd register numbers: +def VST4q8oddPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; +def VST4q16oddPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; +def VST4q32oddPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; + +def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; + +} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 + +// Classes for VST*LN pseudo-instructions with multi-register operands. +// These are expanded to real instructions after register allocation. +class VSTQLNPseudo + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), + itin, "">; +class VSTQLNWBPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QPR:$src, + nohash_imm:$lane), itin, "$addr.addr = $wb">; +class VSTQQLNPseudo + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), + itin, "">; +class VSTQQLNWBPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, + nohash_imm:$lane), itin, "$addr.addr = $wb">; +class VSTQQQQLNPseudo + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), + itin, "">; +class VSTQQQQLNWBPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, + nohash_imm:$lane), itin, "$addr.addr = $wb">; + +// VST1LN : Vector Store (single element from one lane) +class VST1LN op11_8, bits<4> op7_4, string Dt, ValueType Ty, + PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> + : NLdStLn<1, 0b00, op11_8, op7_4, (outs), + (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), + IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", + [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>, + Sched<[WriteVST1]> { + let Rm = 0b1111; + let DecoderMethod = "DecodeVST1LN"; +} +class VST1QLNPseudo + : VSTQLNPseudo, Sched<[WriteVST1]> { + let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), + addrmode6:$addr)]; +} + +def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, + NEONvgetlaneu, addrmode6> { + let Inst{7-5} = lane{2-0}; +} +def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, + NEONvgetlaneu, addrmode6> { + let Inst{7-6} = lane{1-0}; + let Inst{4} = Rn{4}; +} + +def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, + addrmode6oneL32> { + let Inst{7} = lane{0}; + let Inst{5-4} = Rn{5-4}; +} + +def VST1LNq8Pseudo : VST1QLNPseudo; +def VST1LNq16Pseudo : VST1QLNPseudo; +def VST1LNq32Pseudo : VST1QLNPseudo; + +def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), + (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; +def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), + (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; + +// ...with address register writeback: +class VST1LNWB op11_8, bits<4> op7_4, string Dt, ValueType Ty, + PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> + : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins AdrMode:$Rn, am6offset:$Rm, + DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, + "\\{$Vd[$lane]\\}, $Rn$Rm", + "$Rn.addr = $wb", + [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), + AdrMode:$Rn, am6offset:$Rm))]>, + Sched<[WriteVST1]> { + let DecoderMethod = "DecodeVST1LN"; +} +class VST1QLNWBPseudo + : VSTQLNWBPseudo, Sched<[WriteVST1]> { + let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), + addrmode6:$addr, am6offset:$offset))]; +} + +def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, + NEONvgetlaneu, addrmode6> { + let Inst{7-5} = lane{2-0}; +} +def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, + NEONvgetlaneu, addrmode6> { + let Inst{7-6} = lane{1-0}; + let Inst{4} = Rn{4}; +} +def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, + extractelt, addrmode6oneL32> { + let Inst{7} = lane{0}; + let Inst{5-4} = Rn{5-4}; +} + +def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo; +def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo; +def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo; + +let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { + +// VST2LN : Vector Store (single 2-element structure from one lane) +class VST2LN op11_8, bits<4> op7_4, string Dt> + : NLdStLn<1, 0b00, op11_8, op7_4, (outs), + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), + IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", + "", []>, Sched<[WriteVST1]> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVST2LN"; +} + +def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { + let Inst{7} = lane{0}; +} + +def VST2LNd8Pseudo : VSTQLNPseudo, Sched<[WriteVST1]>; +def VST2LNd16Pseudo : VSTQLNPseudo, Sched<[WriteVST1]>; +def VST2LNd32Pseudo : VSTQLNPseudo, Sched<[WriteVST1]>; + +// ...with double-spaced registers: +def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; + let Inst{4} = Rn{4}; +} +def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{4} = Rn{4}; +} + +def VST2LNq16Pseudo : VSTQQLNPseudo, Sched<[WriteVST1]>; +def VST2LNq32Pseudo : VSTQQLNPseudo, Sched<[WriteVST1]>; + +// ...with address register writeback: +class VST2LNWB op11_8, bits<4> op7_4, string Dt> + : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, + "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVST2LN"; +} + +def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { + let Inst{7} = lane{0}; +} + +def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo, Sched<[WriteVST1]>; +def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo, Sched<[WriteVST1]>; +def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo, Sched<[WriteVST1]>; + +def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { + let Inst{7} = lane{0}; +} + +def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST1]>; +def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST1]>; + +// VST3LN : Vector Store (single 3-element structure from one lane) +class VST3LN op11_8, bits<4> op7_4, string Dt> + : NLdStLn<1, 0b00, op11_8, op7_4, (outs), + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, + "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>, + Sched<[WriteVST2]> { + let Rm = 0b1111; + let DecoderMethod = "DecodeVST3LN"; +} + +def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { + let Inst{7} = lane{0}; +} + +def VST3LNd8Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; +def VST3LNd16Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; +def VST3LNd32Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; + +// ...with double-spaced registers: +def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { + let Inst{7} = lane{0}; +} + +def VST3LNq16Pseudo : VSTQQQQLNPseudo; +def VST3LNq32Pseudo : VSTQQQQLNPseudo; + +// ...with address register writeback: +class VST3LNWB op11_8, bits<4> op7_4, string Dt> + : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), + IIC_VST3lnu, "vst3", Dt, + "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let DecoderMethod = "DecodeVST3LN"; +} + +def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { + let Inst{7} = lane{0}; +} + +def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; +def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; +def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; + +def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { + let Inst{7} = lane{0}; +} + +def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; +def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; + +// VST4LN : Vector Store (single 4-element structure from one lane) +class VST4LN op11_8, bits<4> op7_4, string Dt> + : NLdStLn<1, 0b00, op11_8, op7_4, (outs), + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, + "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", + "", []>, Sched<[WriteVST2]> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVST4LN"; +} + +def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} + +def VST4LNd8Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; +def VST4LNd16Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; +def VST4LNd32Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; + +// ...with double-spaced registers: +def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} + +def VST4LNq16Pseudo : VSTQQQQLNPseudo, Sched<[WriteVST2]>; +def VST4LNq32Pseudo : VSTQQQQLNPseudo, Sched<[WriteVST2]>; + +// ...with address register writeback: +class VST4LNWB op11_8, bits<4> op7_4, string Dt> + : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), + IIC_VST4lnu, "vst4", Dt, + "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVST4LN"; +} + +def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} + +def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; +def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; +def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; + +def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} + +def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; +def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; + +} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 + +// Use vld1/vst1 for unaligned f64 load / store +def : Pat<(f64 (hword_alignedload addrmode6:$addr)), + (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; +def : Pat<(f64 (byte_alignedload addrmode6:$addr)), + (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; +def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), + (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; +def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; + +// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 +// load / store if it's legal. +def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), + (VLD1q64 addrmode6:$addr)>; +def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q64 addrmode6:$addr, QPR:$value)>; +def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), + (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; +def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), + (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; +def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), + (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; + +//===----------------------------------------------------------------------===// +// NEON pattern fragments +//===----------------------------------------------------------------------===// + +// Extract D sub-registers of Q registers. +def DSubReg_i8_reg : SDNodeXFormgetTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, SDLoc(N), + MVT::i32); +}]>; +def DSubReg_i16_reg : SDNodeXFormgetTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, SDLoc(N), + MVT::i32); +}]>; +def DSubReg_i32_reg : SDNodeXFormgetTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, SDLoc(N), + MVT::i32); +}]>; +def DSubReg_f64_reg : SDNodeXFormgetTargetConstant(ARM::dsub_0 + N->getZExtValue(), SDLoc(N), + MVT::i32); +}]>; + +// Extract S sub-registers of Q/D registers. +def SSubReg_f32_reg : SDNodeXFormgetTargetConstant(ARM::ssub_0 + N->getZExtValue(), SDLoc(N), + MVT::i32); +}]>; + +// Translate lane numbers from Q registers to D subregs. +def SubReg_i8_lane : SDNodeXFormgetTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32); +}]>; +def SubReg_i16_lane : SDNodeXFormgetTargetConstant(N->getZExtValue() & 3, SDLoc(N), MVT::i32); +}]>; +def SubReg_i32_lane : SDNodeXFormgetTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i32); +}]>; + +//===----------------------------------------------------------------------===// +// Instruction Classes +//===----------------------------------------------------------------------===// + +// Basic 2-register operations: double- and quad-register. +class N2VD op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> + : N2V; +class N2VQ op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> + : N2V; + +// Basic 2-register intrinsics, both double- and quad-register. +class N2VDInt op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2V; +class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2V; + +// Same as above, but not predicated. +class N2VDIntnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp; + +class N2VQIntnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp; + +// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). +class N2VQIntXnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, + bit op7, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp; + +// Same as N2VQIntXnp but with Vd as a src register. +class N2VQIntX2np op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, + bit op7, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp { + let Constraints = "$src = $Vd"; +} + +// Narrow 2-register operations. +class N2VN op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyD, ValueType TyQ, SDNode OpNode> + : N2V; + +// Narrow 2-register intrinsics. +class N2VNInt op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> + : N2V; + +// Long 2-register operations (currently only used for VMOVL). +class N2VL op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode> + : N2V; + +// Long 2-register intrinsics. +class N2VLInt op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> + : N2V; + +// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. +class N2VDShuffle op19_18, bits<5> op11_7, string OpcodeStr, string Dt> + : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), + (ins DPR:$src1, DPR:$src2), IIC_VPERMD, + OpcodeStr, Dt, "$Vd, $Vm", + "$src1 = $Vd, $src2 = $Vm", []>; +class N2VQShuffle op19_18, bits<5> op11_7, + InstrItinClass itin, string OpcodeStr, string Dt> + : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), + (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", + "$src1 = $Vd, $src2 = $Vm", []>; + +// Basic 3-register operations: double- and quad-register. +class N3VD op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> + : N3V { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = Commutable; +} +// Same as N3VD but no data type. +class N3VDX op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, + SDNode OpNode, bit Commutable> + : N3VX{ + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = Commutable; +} + +class N3VDSL op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, SDNode ShOp> + : N3VLane32<0, 1, op21_20, op11_8, 1, 0, + (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", + [(set (Ty DPR:$Vd), + (Ty (ShOp (Ty DPR:$Vn), + (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = 0; +} +class N3VDSL16 op21_20, bits<4> op11_8, + string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> + : N3VLane16<0, 1, op21_20, op11_8, 1, 0, + (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), + NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", + [(set (Ty DPR:$Vd), + (Ty (ShOp (Ty DPR:$Vn), + (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = 0; +} + +class N3VQ op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> + : N3V { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = Commutable; +} +class N3VQX op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> + : N3VX{ + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = Commutable; +} +class N3VQSL op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode ShOp> + : N3VLane32<1, 1, op21_20, op11_8, 1, 0, + (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", + [(set (ResTy QPR:$Vd), + (ResTy (ShOp (ResTy QPR:$Vn), + (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), + imm:$lane)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = 0; +} +class N3VQSL16 op21_20, bits<4> op11_8, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode ShOp> + : N3VLane16<1, 1, op21_20, op11_8, 1, 0, + (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), + NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", + [(set (ResTy QPR:$Vd), + (ResTy (ShOp (ResTy QPR:$Vn), + (ResTy (NEONvduplane (OpTy DPR_8:$Vm), + imm:$lane)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = 0; +} + +// Basic 3-register intrinsics, both double- and quad-register. +class N3VDInt op21_20, bits<4> op11_8, bit op4, + Format f, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> + : N3V { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = Commutable; +} + +class N3VDIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp; + +class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> + : N3VLane32<0, 1, op21_20, op11_8, 1, 0, + (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", + [(set (Ty DPR:$Vd), + (Ty (IntOp (Ty DPR:$Vn), + (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), + imm:$lane)))))]> { + let isCommutable = 0; +} + +class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> + : N3VLane16<0, 1, op21_20, op11_8, 1, 0, + (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", + [(set (Ty DPR:$Vd), + (Ty (IntOp (Ty DPR:$Vn), + (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { + let isCommutable = 0; +} +class N3VDIntSh op21_20, bits<4> op11_8, bit op4, + Format f, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N3V { + let TwoOperandAliasConstraint = "$Vm = $Vd"; + let isCommutable = 0; +} + +class N3VQInt op21_20, bits<4> op11_8, bit op4, + Format f, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> + : N3V { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = Commutable; +} + +class N3VQIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp; + +// Same as N3VQIntnp but with Vd as a src register. +class N3VQInt3np op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp { + let Constraints = "$src = $Vd"; +} + +class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N3VLane32<1, 1, op21_20, op11_8, 1, 0, + (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", + [(set (ResTy QPR:$Vd), + (ResTy (IntOp (ResTy QPR:$Vn), + (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), + imm:$lane)))))]> { + let isCommutable = 0; +} +class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N3VLane16<1, 1, op21_20, op11_8, 1, 0, + (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", + [(set (ResTy QPR:$Vd), + (ResTy (IntOp (ResTy QPR:$Vn), + (ResTy (NEONvduplane (OpTy DPR_8:$Vm), + imm:$lane)))))]> { + let isCommutable = 0; +} +class N3VQIntSh op21_20, bits<4> op11_8, bit op4, + Format f, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N3V { + let TwoOperandAliasConstraint = "$Vm = $Vd"; + let isCommutable = 0; +} + +// Multiply-Add/Sub operations: double- and quad-register. +class N3VDMulOp op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> + : N3V; + +class N3VDMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, string Dt, + ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> + : N3VLane32<0, 1, op21_20, op11_8, 1, 0, + (outs DPR:$Vd), + (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), + NVMulSLFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", + [(set (Ty DPR:$Vd), + (Ty (ShOp (Ty DPR:$src1), + (Ty (MulOp DPR:$Vn, + (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), + imm:$lane)))))))]>; +class N3VDMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, string Dt, + ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> + : N3VLane16<0, 1, op21_20, op11_8, 1, 0, + (outs DPR:$Vd), + (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), + NVMulSLFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", + [(set (Ty DPR:$Vd), + (Ty (ShOp (Ty DPR:$src1), + (Ty (MulOp DPR:$Vn, + (Ty (NEONvduplane (Ty DPR_8:$Vm), + imm:$lane)))))))]>; + +class N3VQMulOp op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, + SDPatternOperator MulOp, SDPatternOperator OpNode> + : N3V; +class N3VQMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator MulOp, SDPatternOperator ShOp> + : N3VLane32<1, 1, op21_20, op11_8, 1, 0, + (outs QPR:$Vd), + (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), + NVMulSLFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", + [(set (ResTy QPR:$Vd), + (ResTy (ShOp (ResTy QPR:$src1), + (ResTy (MulOp QPR:$Vn, + (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), + imm:$lane)))))))]>; +class N3VQMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, + SDPatternOperator MulOp, SDPatternOperator ShOp> + : N3VLane16<1, 1, op21_20, op11_8, 1, 0, + (outs QPR:$Vd), + (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), + NVMulSLFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", + [(set (ResTy QPR:$Vd), + (ResTy (ShOp (ResTy QPR:$src1), + (ResTy (MulOp QPR:$Vn, + (ResTy (NEONvduplane (OpTy DPR_8:$Vm), + imm:$lane)))))))]>; + +// Neon Intrinsic-Op instructions (VABA): double- and quad-register. +class N3VDIntOp op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> + : N3V; +class N3VQIntOp op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> + : N3V; + +// Neon 3-argument intrinsics, both double- and quad-register. +// The destination register is also used as the first source operand register. +class N3VDInt3 op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N3V; +class N3VQInt3 op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N3V; + +// Long Multiply-Add/Sub operations. +class N3VLMulOp op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> + : N3V; +class N3VLMulOpSL op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> + : N3VLane32; +class N3VLMulOpSL16 op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> + : N3VLane16; + +// Long Intrinsic-Op vector operations with explicit extend (VABAL). +class N3VLIntExtOp op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, + SDNode OpNode> + : N3V; + +// Neon Long 3-argument intrinsic. The destination register is +// a quad-register and is also used as the first source operand register. +class N3VLInt3 op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> + : N3V; +class N3VLInt3SL op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N3VLane32; +class N3VLInt3SL16 op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N3VLane16; + +// Narrowing 3-register intrinsics. +class N3VNInt op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, + SDPatternOperator IntOp, bit Commutable> + : N3V { + let isCommutable = Commutable; +} + +// Long 3-register operations. +class N3VL op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> + : N3V { + let isCommutable = Commutable; +} + +class N3VLSL op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode> + : N3VLane32; +class N3VLSL16 op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode> + : N3VLane16; + +// Long 3-register operations with explicitly extended operands. +class N3VLExt op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, + bit Commutable> + : N3V { + let isCommutable = Commutable; +} + +// Long 3-register intrinsics with explicit extend (VABDL). +class N3VLIntExt op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, + bit Commutable> + : N3V { + let isCommutable = Commutable; +} + +// Long 3-register intrinsics. +class N3VLInt op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> + : N3V { + let isCommutable = Commutable; +} + +// Same as above, but not predicated. +class N3VLIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp; + +class N3VLIntSL op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N3VLane32; +class N3VLIntSL16 op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N3VLane16; + +// Wide 3-register operations. +class N3VW op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, + SDNode OpNode, SDNode ExtOp, bit Commutable> + : N3V { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = Commutable; +} + +// Pairwise long 2-register intrinsics, both double- and quad-register. +class N2VDPLInt op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2V; +class N2VQPLInt op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2V; + +// Pairwise long 2-register accumulate intrinsics, +// both double- and quad-register. +// The destination register is also used as the first source operand register. +class N2VDPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2V; +class N2VQPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2V; + +// Shift by immediate, +// both double- and quad-register. +let TwoOperandAliasConstraint = "$Vm = $Vd" in { +class N2VDSh op11_8, bit op7, bit op4, + Format f, InstrItinClass itin, Operand ImmTy, + string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> + : N2VImm; +class N2VQSh op11_8, bit op7, bit op4, + Format f, InstrItinClass itin, Operand ImmTy, + string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> + : N2VImm; +} + +// Long shift by immediate. +class N2VLSh op11_8, bit op7, bit op6, bit op4, + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Operand ImmTy, + SDPatternOperator OpNode> + : N2VImm; + +// Narrow shift by immediate. +class N2VNSh op11_8, bit op7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Operand ImmTy, + SDPatternOperator OpNode> + : N2VImm; + +// Shift right by immediate and accumulate, +// both double- and quad-register. +let TwoOperandAliasConstraint = "$Vm = $Vd" in { +class N2VDShAdd op11_8, bit op7, bit op4, + Operand ImmTy, string OpcodeStr, string Dt, + ValueType Ty, SDNode ShOp> + : N2VImm; +class N2VQShAdd op11_8, bit op7, bit op4, + Operand ImmTy, string OpcodeStr, string Dt, + ValueType Ty, SDNode ShOp> + : N2VImm; +} + +// Shift by immediate and insert, +// both double- and quad-register. +let TwoOperandAliasConstraint = "$Vm = $Vd" in { +class N2VDShIns op11_8, bit op7, bit op4, + Operand ImmTy, Format f, string OpcodeStr, string Dt, + ValueType Ty,SDNode ShOp> + : N2VImm; +class N2VQShIns op11_8, bit op7, bit op4, + Operand ImmTy, Format f, string OpcodeStr, string Dt, + ValueType Ty,SDNode ShOp> + : N2VImm; +} + +// Convert, with fractional bits immediate, +// both double- and quad-register. +class N2VCvtD op11_8, bit op7, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp> + : N2VImm; +class N2VCvtQ op11_8, bit op7, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp> + : N2VImm; + +//===----------------------------------------------------------------------===// +// Multiclasses +//===----------------------------------------------------------------------===// + +// Abbreviations used in multiclass suffixes: +// Q = quarter int (8 bit) elements +// H = half int (16 bit) elements +// S = single int (32 bit) elements +// D = double int (64 bit) elements + +// Neon 2-register vector operations and intrinsics. + +// Neon 2-register comparisons. +// source operand element sizes of 8, 16 and 32 bits: +multiclass N2V_QHS_cmp op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op4, string opc, string Dt, + string asm, SDNode OpNode> { + // 64-bit vector types. + def v8i8 : N2V; + def v4i16 : N2V; + def v2i32 : N2V; + def v2f32 : N2V { + let Inst{10} = 1; // overwrite F = 1 + } + def v4f16 : N2V, + Requires<[HasNEON,HasFullFP16]> { + let Inst{10} = 1; // overwrite F = 1 + } + + // 128-bit vector types. + def v16i8 : N2V; + def v8i16 : N2V; + def v4i32 : N2V; + def v4f32 : N2V { + let Inst{10} = 1; // overwrite F = 1 + } + def v8f16 : N2V, + Requires<[HasNEON,HasFullFP16]> { + let Inst{10} = 1; // overwrite F = 1 + } +} + + +// Neon 2-register vector intrinsics, +// element sizes of 8, 16 and 32 bits: +multiclass N2VInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op4, + InstrItinClass itinD, InstrItinClass itinQ, + string OpcodeStr, string Dt, SDPatternOperator IntOp> { + // 64-bit vector types. + def v8i8 : N2VDInt; + def v4i16 : N2VDInt; + def v2i32 : N2VDInt; + + // 128-bit vector types. + def v16i8 : N2VQInt; + def v8i16 : N2VQInt; + def v4i32 : N2VQInt; +} + + +// Neon Narrowing 2-register vector operations, +// source operand element sizes of 16, 32 and 64 bits: +multiclass N2VN_HSD op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { + def v8i8 : N2VN; + def v4i16 : N2VN; + def v2i32 : N2VN; +} + +// Neon Narrowing 2-register vector intrinsics, +// source operand element sizes of 16, 32 and 64 bits: +multiclass N2VNInt_HSD op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + SDPatternOperator IntOp> { + def v8i8 : N2VNInt; + def v4i16 : N2VNInt; + def v2i32 : N2VNInt; +} + + +// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). +// source operand element sizes of 16, 32 and 64 bits: +multiclass N2VL_QHS op24_23, bits<5> op11_7, bit op6, bit op4, + string OpcodeStr, string Dt, SDNode OpNode> { + def v8i16 : N2VL; + def v4i32 : N2VL; + def v2i64 : N2VL; +} + + +// Neon 3-register vector operations. + +// First with only element sizes of 8, 16 and 32 bits: +multiclass N3V_QHS op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, + SDNode OpNode, bit Commutable = 0> { + // 64-bit vector types. + def v8i8 : N3VD; + def v4i16 : N3VD; + def v2i32 : N3VD; + + // 128-bit vector types. + def v16i8 : N3VQ; + def v8i16 : N3VQ; + def v4i32 : N3VQ; +} + +multiclass N3VSL_HS op11_8, string OpcodeStr, SDNode ShOp> { + def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; + def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; + def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; + def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", + v4i32, v2i32, ShOp>; +} + +// ....then also with element size 64 bits: +multiclass N3V_QHSD op11_8, bit op4, + InstrItinClass itinD, InstrItinClass itinQ, + string OpcodeStr, string Dt, + SDNode OpNode, bit Commutable = 0> + : N3V_QHS { + def v1i64 : N3VD; + def v2i64 : N3VQ; +} + + +// Neon 3-register vector intrinsics. + +// First with only element sizes of 16 and 32 bits: +multiclass N3VInt_HS op11_8, bit op4, Format f, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, + SDPatternOperator IntOp, bit Commutable = 0> { + // 64-bit vector types. + def v4i16 : N3VDInt; + def v2i32 : N3VDInt; + + // 128-bit vector types. + def v8i16 : N3VQInt; + def v4i32 : N3VQInt; +} +multiclass N3VInt_HSSh op11_8, bit op4, Format f, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, + SDPatternOperator IntOp> { + // 64-bit vector types. + def v4i16 : N3VDIntSh; + def v2i32 : N3VDIntSh; + + // 128-bit vector types. + def v8i16 : N3VQIntSh; + def v4i32 : N3VQIntSh; +} + +multiclass N3VIntSL_HS op11_8, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, SDPatternOperator IntOp> { + def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, + OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; + def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, + OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; + def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, + OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; + def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, + OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; +} + +// ....then also with element size of 8 bits: +multiclass N3VInt_QHS op11_8, bit op4, Format f, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, + SDPatternOperator IntOp, bit Commutable = 0> + : N3VInt_HS { + def v8i8 : N3VDInt; + def v16i8 : N3VQInt; +} +multiclass N3VInt_QHSSh op11_8, bit op4, Format f, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, + SDPatternOperator IntOp> + : N3VInt_HSSh { + def v8i8 : N3VDIntSh; + def v16i8 : N3VQIntSh; +} + + +// ....then also with element size of 64 bits: +multiclass N3VInt_QHSD op11_8, bit op4, Format f, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, + SDPatternOperator IntOp, bit Commutable = 0> + : N3VInt_QHS { + def v1i64 : N3VDInt; + def v2i64 : N3VQInt; +} +multiclass N3VInt_QHSDSh op11_8, bit op4, Format f, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, + SDPatternOperator IntOp> + : N3VInt_QHSSh { + def v1i64 : N3VDIntSh; + def v2i64 : N3VQIntSh; +} + +// Neon Narrowing 3-register vector intrinsics, +// source operand element sizes of 16, 32 and 64 bits: +multiclass N3VNInt_HSD op11_8, bit op4, + string OpcodeStr, string Dt, + SDPatternOperator IntOp, bit Commutable = 0> { + def v8i8 : N3VNInt; + def v4i16 : N3VNInt; + def v2i32 : N3VNInt; +} + + +// Neon Long 3-register vector operations. + +multiclass N3VL_QHS op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, + SDNode OpNode, bit Commutable = 0> { + def v8i16 : N3VL; + def v4i32 : N3VL; + def v2i64 : N3VL; +} + +multiclass N3VLSL_HS op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { + def v4i16 : N3VLSL16; + def v2i32 : N3VLSL; +} + +multiclass N3VLExt_QHS op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, + SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { + def v8i16 : N3VLExt; + def v4i32 : N3VLExt; + def v2i64 : N3VLExt; +} + +// Neon Long 3-register vector intrinsics. + +// First with only element sizes of 16 and 32 bits: +multiclass N3VLInt_HS op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, + SDPatternOperator IntOp, bit Commutable = 0> { + def v4i32 : N3VLInt; + def v2i64 : N3VLInt; +} + +multiclass N3VLIntSL_HS op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + SDPatternOperator IntOp> { + def v4i16 : N3VLIntSL16; + def v2i32 : N3VLIntSL; +} + +// ....then also with element size of 8 bits: +multiclass N3VLInt_QHS op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, + SDPatternOperator IntOp, bit Commutable = 0> + : N3VLInt_HS { + def v8i16 : N3VLInt; +} + +// ....with explicit extend (VABDL). +multiclass N3VLIntExt_QHS op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { + def v8i16 : N3VLIntExt; + def v4i32 : N3VLIntExt; + def v2i64 : N3VLIntExt; +} + + +// Neon Wide 3-register vector intrinsics, +// source operand element sizes of 8, 16 and 32 bits: +multiclass N3VW_QHS op11_8, bit op4, + string OpcodeStr, string Dt, + SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { + def v8i16 : N3VW; + def v4i32 : N3VW; + def v2i64 : N3VW; +} + + +// Neon Multiply-Op vector operations, +// element sizes of 8, 16 and 32 bits: +multiclass N3VMulOp_QHS op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, SDNode OpNode> { + // 64-bit vector types. + def v8i8 : N3VDMulOp; + def v4i16 : N3VDMulOp; + def v2i32 : N3VDMulOp; + + // 128-bit vector types. + def v16i8 : N3VQMulOp; + def v8i16 : N3VQMulOp; + def v4i32 : N3VQMulOp; +} + +multiclass N3VMulOpSL_HS op11_8, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, SDPatternOperator ShOp> { + def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, + OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; + def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, + OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; + def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, + OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, + mul, ShOp>; + def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, + OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, + mul, ShOp>; +} + +// Neon Intrinsic-Op vector operations, +// element sizes of 8, 16 and 32 bits: +multiclass N3VIntOp_QHS op11_8, bit op4, + InstrItinClass itinD, InstrItinClass itinQ, + string OpcodeStr, string Dt, SDPatternOperator IntOp, + SDNode OpNode> { + // 64-bit vector types. + def v8i8 : N3VDIntOp; + def v4i16 : N3VDIntOp; + def v2i32 : N3VDIntOp; + + // 128-bit vector types. + def v16i8 : N3VQIntOp; + def v8i16 : N3VQIntOp; + def v4i32 : N3VQIntOp; +} + +// Neon 3-argument intrinsics, +// element sizes of 16 and 32 bits: +multiclass N3VInt3_HS op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, SDPatternOperator IntOp> { + // 64-bit vector types. + def v4i16 : N3VDInt3; + def v2i32 : N3VDInt3; + + // 128-bit vector types. + def v8i16 : N3VQInt3; + def v4i32 : N3VQInt3; +} + +// element sizes of 8, 16 and 32 bits: +multiclass N3VInt3_QHS op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, SDPatternOperator IntOp> + :N3VInt3_HS { + // 64-bit vector types. + def v8i8 : N3VDInt3; + // 128-bit vector types. + def v16i8 : N3VQInt3; +} + +// Neon Long Multiply-Op vector operations, +// element sizes of 8, 16 and 32 bits: +multiclass N3VLMulOp_QHS op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, SDNode MulOp, + SDNode OpNode> { + def v8i16 : N3VLMulOp; + def v4i32 : N3VLMulOp; + def v2i64 : N3VLMulOp; +} + +multiclass N3VLMulOpSL_HS op11_8, string OpcodeStr, + string Dt, SDNode MulOp, SDNode OpNode> { + def v4i16 : N3VLMulOpSL16; + def v2i32 : N3VLMulOpSL; +} + + +// Neon Long 3-argument intrinsics. + +// First with only element sizes of 16 and 32 bits: +multiclass N3VLInt3_HS op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, SDPatternOperator IntOp> { + def v4i32 : N3VLInt3; + def v2i64 : N3VLInt3; +} + +multiclass N3VLInt3SL_HS op11_8, + string OpcodeStr, string Dt, SDPatternOperator IntOp> { + def v4i16 : N3VLInt3SL16; + def v2i32 : N3VLInt3SL; +} + +// ....then also with element size of 8 bits: +multiclass N3VLInt3_QHS op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, SDPatternOperator IntOp> + : N3VLInt3_HS { + def v8i16 : N3VLInt3; +} + +// ....with explicit extend (VABAL). +multiclass N3VLIntExtOp_QHS op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { + def v8i16 : N3VLIntExtOp; + def v4i32 : N3VLIntExtOp; + def v2i64 : N3VLIntExtOp; +} + + +// Neon Pairwise long 2-register intrinsics, +// element sizes of 8, 16 and 32 bits: +multiclass N2VPLInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, SDPatternOperator IntOp> { + // 64-bit vector types. + def v8i8 : N2VDPLInt; + def v4i16 : N2VDPLInt; + def v2i32 : N2VDPLInt; + + // 128-bit vector types. + def v16i8 : N2VQPLInt; + def v8i16 : N2VQPLInt; + def v4i32 : N2VQPLInt; +} + + +// Neon Pairwise long 2-register accumulate intrinsics, +// element sizes of 8, 16 and 32 bits: +multiclass N2VPLInt2_QHS op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, SDPatternOperator IntOp> { + // 64-bit vector types. + def v8i8 : N2VDPLInt2; + def v4i16 : N2VDPLInt2; + def v2i32 : N2VDPLInt2; + + // 128-bit vector types. + def v16i8 : N2VQPLInt2; + def v8i16 : N2VQPLInt2; + def v4i32 : N2VQPLInt2; +} + + +// Neon 2-register vector shift by immediate, +// with f of either N2RegVShLFrm or N2RegVShRFrm +// element sizes of 8, 16, 32 and 64 bits: +multiclass N2VShL_QHSD op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { + // 64-bit vector types. + def v8i8 : N2VDSh { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDSh { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDSh { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDSh; + // imm6 = xxxxxx + + // 128-bit vector types. + def v16i8 : N2VQSh { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQSh { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQSh { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQSh; + // imm6 = xxxxxx +} +multiclass N2VShR_QHSD op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + string baseOpc, SDNode OpNode> { + // 64-bit vector types. + def v8i8 : N2VDSh { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDSh { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDSh { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDSh; + // imm6 = xxxxxx + + // 128-bit vector types. + def v16i8 : N2VQSh { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQSh { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQSh { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQSh; + // imm6 = xxxxxx +} + +// Neon Shift-Accumulate vector operations, +// element sizes of 8, 16, 32 and 64 bits: +multiclass N2VShAdd_QHSD op11_8, bit op4, + string OpcodeStr, string Dt, SDNode ShOp> { + // 64-bit vector types. + def v8i8 : N2VDShAdd { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDShAdd { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDShAdd { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDShAdd; + // imm6 = xxxxxx + + // 128-bit vector types. + def v16i8 : N2VQShAdd { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQShAdd { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQShAdd { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQShAdd; + // imm6 = xxxxxx +} + +// Neon Shift-Insert vector operations, +// with f of either N2RegVShLFrm or N2RegVShRFrm +// element sizes of 8, 16, 32 and 64 bits: +multiclass N2VShInsL_QHSD op11_8, bit op4, + string OpcodeStr> { + // 64-bit vector types. + def v8i8 : N2VDShIns { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDShIns { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDShIns { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDShIns; + // imm6 = xxxxxx + + // 128-bit vector types. + def v16i8 : N2VQShIns { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQShIns { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQShIns { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQShIns; + // imm6 = xxxxxx +} +multiclass N2VShInsR_QHSD op11_8, bit op4, + string OpcodeStr> { + // 64-bit vector types. + def v8i8 : N2VDShIns { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDShIns { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDShIns { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDShIns; + // imm6 = xxxxxx + + // 128-bit vector types. + def v16i8 : N2VQShIns { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQShIns { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQShIns { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQShIns; + // imm6 = xxxxxx +} + +// Neon Shift Long operations, +// element sizes of 8, 16, 32 bits: +multiclass N2VLSh_QHS op11_8, bit op7, bit op6, + bit op4, string OpcodeStr, string Dt, + SDPatternOperator OpNode> { + def v8i16 : N2VLSh { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i32 : N2VLSh { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i64 : N2VLSh { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } +} + +// Neon Shift Narrow operations, +// element sizes of 16, 32, 64 bits: +multiclass N2VNSh_HSD op11_8, bit op7, bit op6, + bit op4, InstrItinClass itin, string OpcodeStr, string Dt, + SDPatternOperator OpNode> { + def v8i8 : N2VNSh { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VNSh { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VNSh { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } +} + +//===----------------------------------------------------------------------===// +// Instruction Definitions. +//===----------------------------------------------------------------------===// + +// Vector Add Operations. + +// VADD : Vector Add (integer and floating-point) +defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", + add, 1>; +def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", + v2f32, v2f32, fadd, 1>; +def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", + v4f32, v4f32, fadd, 1>; +def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16", + v4f16, v4f16, fadd, 1>, + Requires<[HasNEON,HasFullFP16]>; +def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16", + v8f16, v8f16, fadd, 1>, + Requires<[HasNEON,HasFullFP16]>; +// VADDL : Vector Add Long (Q = D + D) +defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "s", add, sext, 1>; +defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "u", add, zext, 1>; +// VADDW : Vector Add Wide (Q = Q + D) +defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; +defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; +// VHADD : Vector Halving Add +defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vhadd", "s", int_arm_neon_vhadds, 1>; +defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vhadd", "u", int_arm_neon_vhaddu, 1>; +// VRHADD : Vector Rounding Halving Add +defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vrhadd", "s", int_arm_neon_vrhadds, 1>; +defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vrhadd", "u", int_arm_neon_vrhaddu, 1>; +// VQADD : Vector Saturating Add +defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vqadd", "s", int_arm_neon_vqadds, 1>; +defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vqadd", "u", int_arm_neon_vqaddu, 1>; +// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) +defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; +// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) +defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", + int_arm_neon_vraddhn, 1>; + +def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), + (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), + (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), + (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; + +// Vector Multiply Operations. + +// VMUL : Vector Multiply (integer, polynomial and floating-point) +defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; +def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", + "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; +def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", + "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; +def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", + v2f32, v2f32, fmul, 1>; +def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", + v4f32, v4f32, fmul, 1>; +def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16", + v4f16, v4f16, fmul, 1>, + Requires<[HasNEON,HasFullFP16]>; +def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16", + v8f16, v8f16, fmul, 1>, + Requires<[HasNEON,HasFullFP16]>; +defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; +def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; +def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, + v2f32, fmul>; +def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>, + Requires<[HasNEON,HasFullFP16]>; +def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16, + v4f16, fmul>, + Requires<[HasNEON,HasFullFP16]>; + +def : Pat<(v8i16 (mul (v8i16 QPR:$src1), + (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), + (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), + (v4i16 (EXTRACT_SUBREG QPR:$src2, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; +def : Pat<(v4i32 (mul (v4i32 QPR:$src1), + (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), + (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), + (v2i32 (EXTRACT_SUBREG QPR:$src2, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; +def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), + (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), + (v4f32 (VMULslfq (v4f32 QPR:$src1), + (v2f32 (EXTRACT_SUBREG QPR:$src2, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + + +def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), + (VMULslfd DPR:$Rn, + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), + (i32 0))>; +def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), + (VMULslfq QPR:$Rn, + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), + (i32 0))>; + + +// VQDMULH : Vector Saturating Doubling Multiply Returning High Half +defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, + "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; +defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, + "vqdmulh", "s", int_arm_neon_vqdmulh>; +def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), + (v8i16 (NEONvduplane (v8i16 QPR:$src2), + imm:$lane)))), + (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), + (v4i16 (EXTRACT_SUBREG QPR:$src2, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; +def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), + (v4i32 (NEONvduplane (v4i32 QPR:$src2), + imm:$lane)))), + (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), + (v2i32 (EXTRACT_SUBREG QPR:$src2, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + +// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half +defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, + IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, + "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; +defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, + "vqrdmulh", "s", int_arm_neon_vqrdmulh>; +def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), + (v8i16 (NEONvduplane (v8i16 QPR:$src2), + imm:$lane)))), + (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), + (v4i16 (EXTRACT_SUBREG QPR:$src2, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; +def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), + (v4i32 (NEONvduplane (v4i32 QPR:$src2), + imm:$lane)))), + (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), + (v2i32 (EXTRACT_SUBREG QPR:$src2, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + +// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) +let PostEncoderMethod = "NEONThumb2DataIPostEncoder", + DecoderNamespace = "NEONData" in { + defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "s", NEONvmulls, 1>; + defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "u", NEONvmullu, 1>; + def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", + v8i16, v8i8, int_arm_neon_vmullp, 1>; + def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, + "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, + Requires<[HasV8, HasCrypto]>; +} +defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; +defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; + +// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) +defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, + "vqdmull", "s", int_arm_neon_vqdmull, 1>; +defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, + "vqdmull", "s", int_arm_neon_vqdmull>; + +// Vector Multiply-Accumulate and Multiply-Subtract Operations. + +// VMLA : Vector Multiply Accumulate (integer and floating-point) +defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; +def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", + v2f32, fmul_su, fadd_mlx>, + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; +def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", + v4f32, fmul_su, fadd_mlx>, + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; +def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16", + v4f16, fmul_su, fadd_mlx>, + Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; +def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16", + v8f16, fmul_su, fadd_mlx>, + Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; +defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; +def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", + v2f32, fmul_su, fadd_mlx>, + Requires<[HasNEON, UseFPVMLx]>; +def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", + v4f32, v2f32, fmul_su, fadd_mlx>, + Requires<[HasNEON, UseFPVMLx]>; +def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16", + v4f16, fmul, fadd>, + Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; +def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16", + v8f16, v4f16, fmul, fadd>, + Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; + +def : Pat<(v8i16 (add (v8i16 QPR:$src1), + (mul (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), + (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), + (v4i16 (EXTRACT_SUBREG QPR:$src3, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; + +def : Pat<(v4i32 (add (v4i32 QPR:$src1), + (mul (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), + (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), + (v2i32 (EXTRACT_SUBREG QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + +def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), + (fmul_su (v4f32 QPR:$src2), + (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), + (v4f32 (VMLAslfq (v4f32 QPR:$src1), + (v4f32 QPR:$src2), + (v2f32 (EXTRACT_SUBREG QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>, + Requires<[HasNEON, UseFPVMLx]>; + +// VMLAL : Vector Multiply Accumulate Long (Q += D * D) +defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlal", "s", NEONvmulls, add>; +defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlal", "u", NEONvmullu, add>; + +defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; +defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; + +let Predicates = [HasNEON, HasV8_1a] in { + // v8.1a Neon Rounding Double Multiply-Op vector operations, + // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long + // (Q += D * D) + defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqadds + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v2i32 (int_arm_neon_vqadds + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v8i16 (int_arm_neon_vqadds + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), + (v8i16 QPR:$Vm))))), + (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + def : Pat<(v4i32 (int_arm_neon_vqadds + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), + (v4i32 QPR:$Vm))))), + (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + + defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqadds + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh + (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, + imm:$lane))>; + def : Pat<(v2i32 (int_arm_neon_vqadds + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh + (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, + imm:$lane))>; + def : Pat<(v8i16 (int_arm_neon_vqadds + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh + (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), + imm:$lane)))))), + (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), + (v8i16 QPR:$src2), + (v4i16 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; + def : Pat<(v4i32 (int_arm_neon_vqadds + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh + (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), + imm:$lane)))))), + (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), + (v4i32 QPR:$src2), + (v2i32 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + + // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long + // (Q -= D * D) + defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqsubs + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v2i32 (int_arm_neon_vqsubs + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v8i16 (int_arm_neon_vqsubs + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), + (v8i16 QPR:$Vm))))), + (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + def : Pat<(v4i32 (int_arm_neon_vqsubs + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), + (v4i32 QPR:$Vm))))), + (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + + defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqsubs + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh + (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; + def : Pat<(v2i32 (int_arm_neon_vqsubs + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh + (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, + imm:$lane))>; + def : Pat<(v8i16 (int_arm_neon_vqsubs + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh + (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), + imm:$lane)))))), + (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), + (v8i16 QPR:$src2), + (v4i16 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; + def : Pat<(v4i32 (int_arm_neon_vqsubs + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh + (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), + imm:$lane)))))), + (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), + (v4i32 QPR:$src2), + (v2i32 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; +} +// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) +defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, + "vqdmlal", "s", null_frag>; +defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; + +def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; +def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; + +// VMLS : Vector Multiply Subtract (integer and floating-point) +defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; +def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", + v2f32, fmul_su, fsub_mlx>, + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; +def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", + v4f32, fmul_su, fsub_mlx>, + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; +def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16", + v4f16, fmul, fsub>, + Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; +def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16", + v8f16, fmul, fsub>, + Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; +defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; +def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", + v2f32, fmul_su, fsub_mlx>, + Requires<[HasNEON, UseFPVMLx]>; +def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", + v4f32, v2f32, fmul_su, fsub_mlx>, + Requires<[HasNEON, UseFPVMLx]>; +def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16", + v4f16, fmul, fsub>, + Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; +def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16", + v8f16, v4f16, fmul, fsub>, + Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; + +def : Pat<(v8i16 (sub (v8i16 QPR:$src1), + (mul (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), + (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), + (v4i16 (EXTRACT_SUBREG QPR:$src3, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; + +def : Pat<(v4i32 (sub (v4i32 QPR:$src1), + (mul (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), + (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), + (v2i32 (EXTRACT_SUBREG QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + +def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), + (fmul_su (v4f32 QPR:$src2), + (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), + (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), + (v2f32 (EXTRACT_SUBREG QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>, + Requires<[HasNEON, UseFPVMLx]>; + +// VMLSL : Vector Multiply Subtract Long (Q -= D * D) +defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlsl", "s", NEONvmulls, sub>; +defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlsl", "u", NEONvmullu, sub>; + +defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; +defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; + +// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) +defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, + "vqdmlsl", "s", null_frag>; +defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; + +def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; +def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; + +// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. +def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", + v2f32, fmul_su, fadd_mlx>, + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; + +def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", + v4f32, fmul_su, fadd_mlx>, + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; +def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16", + v4f16, fmul, fadd>, + Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; + +def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16", + v8f16, fmul, fadd>, + Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; + +// Fused Vector Multiply Subtract (floating-point) +def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", + v2f32, fmul_su, fsub_mlx>, + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; +def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", + v4f32, fmul_su, fsub_mlx>, + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; +def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16", + v4f16, fmul, fsub>, + Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; +def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16", + v8f16, fmul, fsub>, + Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), + (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), + (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), + (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), + (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasVFP4]>; + +// ARMv8.2a dot product instructions. +// We put them in the VFPV8 decoder namespace because the ARM and Thumb +// encodings are the same and thus no further bit twiddling is necessary +// in the disassembler. +class VDOT : + N3Vnp<0b11000, 0b10, 0b1101, op6, op4, (outs RegTy:$dst), + (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD, + Asm, AsmTy, + [(set (AccumTy RegTy:$dst), + (OpNode (AccumTy RegTy:$Vd), + (InputTy RegTy:$Vn), + (InputTy RegTy:$Vm)))]> { + let Predicates = [HasDotProd]; + let DecoderNamespace = "VFPV8"; + let Constraints = "$dst = $Vd"; +} + +def VUDOTD : VDOT<0, 1, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>; +def VSDOTD : VDOT<0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>; +def VUDOTQ : VDOT<1, 1, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>; +def VSDOTQ : VDOT<1, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>; + +// Indexed dot product instructions: +multiclass DOTI { + def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst), + (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), + N3RegFrm, IIC_VDOTPROD, opc, dt, []> { + bit lane; + let Inst{5} = lane; + let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane"); + let Constraints = "$dst = $Vd"; + let Predicates = [HasDotProd]; + let DecoderNamespace = "VFPV8"; + } + + def : Pat< + (AccumType (OpNode (AccumType Ty:$Vd), + (InputType Ty:$Vn), + (InputType (bitconvert (AccumType + (NEONvduplane (AccumType Ty:$Vm), + VectorIndex32:$lane)))))), + (!cast(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>; +} + +defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8, + int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>; +defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8, + int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>; +defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8, + int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; +defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8, + int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; + + +// ARMv8.3 complex operations +class BaseN3VCP8ComplexTied pattern> + : N3VCP8<{?,?}, {op21,s}, q, op4, oops, + iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{ + bits<2> rot; + let Inst{24-23} = rot; +} + +class BaseN3VCP8ComplexOdd pattern> + : N3VCP8<{?,op23}, {op21,s}, q, op4, oops, + iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> { + bits<1> rot; + let Inst{24} = rot; +} + +class BaseN3VCP8ComplexTiedLane32 pattern> + : N3VLaneCP8 { + bits<2> rot; + bit lane; + + let Inst{21-20} = rot; + let Inst{5} = lane; +} + +class BaseN3VCP8ComplexTiedLane64 pattern> + : N3VLaneCP8 { + bits<2> rot; + bit lane; + + let Inst{21-20} = rot; + let Inst{5} = Vm{4}; + // This is needed because the lane operand does not have any bits in the + // encoding (it only has one possible value), so we need to manually set it + // to it's default value. + let DecoderMethod = "DecodeNEONComplexLane64Instruction"; +} + +multiclass N3VCP8ComplexTied { + let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { + def v4f16 : BaseN3VCP8ComplexTied; + def v8f16 : BaseN3VCP8ComplexTied; + } + let Predicates = [HasNEON,HasV8_3a] in { + def v2f32 : BaseN3VCP8ComplexTied; + def v4f32 : BaseN3VCP8ComplexTied; + } +} + +multiclass N3VCP8ComplexOdd { + let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { + def v4f16 : BaseN3VCP8ComplexOdd; + def v8f16 : BaseN3VCP8ComplexOdd; + } + let Predicates = [HasNEON,HasV8_3a] in { + def v2f32 : BaseN3VCP8ComplexOdd; + def v4f32 : BaseN3VCP8ComplexOdd; + } +} + +// These instructions index by pairs of lanes, so the VectorIndexes are twice +// as wide as the data types. +multiclass N3VCP8ComplexTiedLane { + let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { + def v4f16_indexed : BaseN3VCP8ComplexTiedLane32; + def v8f16_indexed : BaseN3VCP8ComplexTiedLane32; + } + let Predicates = [HasNEON,HasV8_3a] in { + def v2f32_indexed : BaseN3VCP8ComplexTiedLane64; + def v4f32_indexed : BaseN3VCP8ComplexTiedLane64; + } +} + +defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>; +defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>; +defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>; + +// Vector Subtract Operations. + +// VSUB : Vector Subtract (integer and floating-point) +defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, + "vsub", "i", sub, 0>; +def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", + v2f32, v2f32, fsub, 0>; +def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", + v4f32, v4f32, fsub, 0>; +def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16", + v4f16, v4f16, fsub, 0>, + Requires<[HasNEON,HasFullFP16]>; +def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16", + v8f16, v8f16, fsub, 0>, + Requires<[HasNEON,HasFullFP16]>; +// VSUBL : Vector Subtract Long (Q = D - D) +defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "s", sub, sext, 0>; +defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "u", sub, zext, 0>; +// VSUBW : Vector Subtract Wide (Q = Q - D) +defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; +defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; +// VHSUB : Vector Halving Subtract +defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, + "vhsub", "s", int_arm_neon_vhsubs, 0>; +defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, + "vhsub", "u", int_arm_neon_vhsubu, 0>; +// VQSUB : Vector Saturing Subtract +defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, + "vqsub", "s", int_arm_neon_vqsubs, 0>; +defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, + "vqsub", "u", int_arm_neon_vqsubu, 0>; +// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) +defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; +// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) +defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", + int_arm_neon_vrsubhn, 0>; + +def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), + (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), + (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), + (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; + +// Vector Comparisons. + +// VCEQ : Vector Compare Equal +defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; +def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, + NEONvceq, 1>; +def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, + NEONvceq, 1>; +def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, + NEONvceq, 1>, + Requires<[HasNEON, HasFullFP16]>; +def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, + NEONvceq, 1>, + Requires<[HasNEON, HasFullFP16]>; + +let TwoOperandAliasConstraint = "$Vm = $Vd" in +defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", + "$Vd, $Vm, #0", NEONvceqz>; + +// VCGE : Vector Compare Greater Than or Equal +defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; +defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; +def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, + NEONvcge, 0>; +def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, + NEONvcge, 0>; +def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, + NEONvcge, 0>, + Requires<[HasNEON, HasFullFP16]>; +def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, + NEONvcge, 0>, + Requires<[HasNEON, HasFullFP16]>; + +let TwoOperandAliasConstraint = "$Vm = $Vd" in { +defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", + "$Vd, $Vm, #0", NEONvcgez>; +defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", + "$Vd, $Vm, #0", NEONvclez>; +} + +// VCGT : Vector Compare Greater Than +defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; +defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; +def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, + NEONvcgt, 0>; +def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, + NEONvcgt, 0>; +def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, + NEONvcgt, 0>, + Requires<[HasNEON, HasFullFP16]>; +def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, + NEONvcgt, 0>, + Requires<[HasNEON, HasFullFP16]>; + +let TwoOperandAliasConstraint = "$Vm = $Vd" in { +defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", + "$Vd, $Vm, #0", NEONvcgtz>; +defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", + "$Vd, $Vm, #0", NEONvcltz>; +} + +// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) +def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", + "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; +def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", + "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; +def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", + "f16", v4i16, v4f16, int_arm_neon_vacge, 0>, + Requires<[HasNEON, HasFullFP16]>; +def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", + "f16", v8i16, v8f16, int_arm_neon_vacge, 0>, + Requires<[HasNEON, HasFullFP16]>; +// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) +def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", + "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; +def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", + "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; +def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", + "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>, + Requires<[HasNEON, HasFullFP16]>; +def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", + "f16", v8f16, v8f16, int_arm_neon_vacgt, 0>, + Requires<[HasNEON, HasFullFP16]>; +// VTST : Vector Test Bits +defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; + +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", + (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", + (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", + (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", + (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; +let Predicates = [HasNEON, HasFullFP16] in { +def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", + (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", + (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", + (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", + (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; +} + +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", + (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", + (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", + (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", + (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; +let Predicates = [HasNEON, HasFullFP16] in { +def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", + (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", + (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", + (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", + (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; +} + +// Vector Bitwise Operations. + +def vnotd : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; +def vnotq : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; + + +// VAND : Vector Bitwise AND +def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", + v2i32, v2i32, and, 1>; +def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", + v4i32, v4i32, and, 1>; + +// VEOR : Vector Bitwise Exclusive OR +def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", + v2i32, v2i32, xor, 1>; +def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", + v4i32, v4i32, xor, 1>; + +// VORR : Vector Bitwise OR +def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", + v2i32, v2i32, or, 1>; +def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", + v4i32, v4i32, or, 1>; + +def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, + (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), + IIC_VMOVImm, + "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", + [(set DPR:$Vd, + (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, + (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), + IIC_VMOVImm, + "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", + [(set DPR:$Vd, + (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { + let Inst{10-9} = SIMM{10-9}; +} + +def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, + (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), + IIC_VMOVImm, + "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", + [(set QPR:$Vd, + (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, + (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), + IIC_VMOVImm, + "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", + [(set QPR:$Vd, + (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { + let Inst{10-9} = SIMM{10-9}; +} + + +// VBIC : Vector Bitwise Bit Clear (AND NOT) +let TwoOperandAliasConstraint = "$Vn = $Vd" in { +def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), + (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, + "vbic", "$Vd, $Vn, $Vm", "", + [(set DPR:$Vd, (v2i32 (and DPR:$Vn, + (vnotd DPR:$Vm))))]>; +def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), + (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, + "vbic", "$Vd, $Vn, $Vm", "", + [(set QPR:$Vd, (v4i32 (and QPR:$Vn, + (vnotq QPR:$Vm))))]>; +} + +def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, + (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), + IIC_VMOVImm, + "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", + [(set DPR:$Vd, + (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, + (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), + IIC_VMOVImm, + "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", + [(set DPR:$Vd, + (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { + let Inst{10-9} = SIMM{10-9}; +} + +def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, + (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), + IIC_VMOVImm, + "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", + [(set QPR:$Vd, + (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, + (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), + IIC_VMOVImm, + "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", + [(set QPR:$Vd, + (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { + let Inst{10-9} = SIMM{10-9}; +} + +// VORN : Vector Bitwise OR NOT +def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), + (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, + "vorn", "$Vd, $Vn, $Vm", "", + [(set DPR:$Vd, (v2i32 (or DPR:$Vn, + (vnotd DPR:$Vm))))]>; +def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), + (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, + "vorn", "$Vd, $Vn, $Vm", "", + [(set QPR:$Vd, (v4i32 (or QPR:$Vn, + (vnotq QPR:$Vm))))]>; + +// VMVN : Vector Bitwise NOT (Immediate) + +let isReMaterializable = 1 in { + +def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), + (ins nImmSplatI16:$SIMM), IIC_VMOVImm, + "vmvn", "i16", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), + (ins nImmSplatI16:$SIMM), IIC_VMOVImm, + "vmvn", "i16", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), + (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, + "vmvn", "i32", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { + let Inst{11-8} = SIMM{11-8}; +} + +def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), + (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, + "vmvn", "i32", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { + let Inst{11-8} = SIMM{11-8}; +} +} + +// VMVN : Vector Bitwise NOT +def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, + (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, + "vmvn", "$Vd, $Vm", "", + [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; +def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, + (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, + "vmvn", "$Vd, $Vm", "", + [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; +def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; +def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; + +// VBSL : Vector Bitwise Select +def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), + (ins DPR:$src1, DPR:$Vn, DPR:$Vm), + N3RegFrm, IIC_VCNTiD, + "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", + [(set DPR:$Vd, + (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; +def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), + (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), + (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), + (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), + (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), + (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; + +def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), + (and DPR:$Vm, (vnotd DPR:$Vd)))), + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; + +def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), + (and DPR:$Vm, (vnotd DPR:$Vd)))), + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; + +def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), + (ins QPR:$src1, QPR:$Vn, QPR:$Vm), + N3RegFrm, IIC_VCNTiQ, + "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", + [(set QPR:$Vd, + (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; + +def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), + (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), + (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), + (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), + (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), + (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; + +def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), + (and QPR:$Vm, (vnotq QPR:$Vd)))), + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), + (and QPR:$Vm, (vnotq QPR:$Vd)))), + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; + +// VBIF : Vector Bitwise Insert if False +// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", +// FIXME: This instruction's encoding MAY NOT BE correct. +def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, + (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), + N3RegFrm, IIC_VBINiD, + "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", + []>; +def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, + (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), + N3RegFrm, IIC_VBINiQ, + "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", + []>; + +// VBIT : Vector Bitwise Insert if True +// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", +// FIXME: This instruction's encoding MAY NOT BE correct. +def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, + (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), + N3RegFrm, IIC_VBINiD, + "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", + []>; +def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, + (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), + N3RegFrm, IIC_VBINiQ, + "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", + []>; + +// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking +// for equivalent operations with different register constraints; it just +// inserts copies. + +// Vector Absolute Differences. + +// VABD : Vector Absolute Difference +defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, + "vabd", "s", int_arm_neon_vabds, 1>; +defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, + "vabd", "u", int_arm_neon_vabdu, 1>; +def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, + "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; +def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, + "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; +def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND, + "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>, + Requires<[HasNEON, HasFullFP16]>; +def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ, + "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>, + Requires<[HasNEON, HasFullFP16]>; + +// VABDL : Vector Absolute Difference Long (Q = | D - D |) +defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, + "vabdl", "s", int_arm_neon_vabds, zext, 1>; +defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, + "vabdl", "u", int_arm_neon_vabdu, zext, 1>; + +def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))), + (VABDLuv8i16 DPR:$opA, DPR:$opB)>; +def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))), + (VABDLuv4i32 DPR:$opA, DPR:$opB)>; + +// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the +// shift/xor pattern for ABS. + +def abd_shr : + PatFrag<(ops node:$in1, node:$in2, node:$shift), + (NEONvshrs (sub (zext node:$in1), + (zext node:$in2)), (i32 $shift))>; + +def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), + (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)), + (zext (v2i32 DPR:$opB))), + (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))), + (VABDLuv2i64 DPR:$opA, DPR:$opB)>; + +// VABA : Vector Absolute Difference and Accumulate +defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "s", int_arm_neon_vabds, add>; +defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "u", int_arm_neon_vabdu, add>; + +// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) +defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, + "vabal", "s", int_arm_neon_vabds, zext, add>; +defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, + "vabal", "u", int_arm_neon_vabdu, zext, add>; + +// Vector Maximum and Minimum. + +// VMAX : Vector Maximum +defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, + "vmax", "s", smax, 1>; +defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, + "vmax", "u", umax, 1>; +def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, + "vmax", "f32", + v2f32, v2f32, fmaxnan, 1>; +def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, + "vmax", "f32", + v4f32, v4f32, fmaxnan, 1>; +def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND, + "vmax", "f16", + v4f16, v4f16, fmaxnan, 1>, + Requires<[HasNEON, HasFullFP16]>; +def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ, + "vmax", "f16", + v8f16, v8f16, fmaxnan, 1>, + Requires<[HasNEON, HasFullFP16]>; + +// VMAXNM +let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f32", + v2f32, v2f32, fmaxnum, 1>, + Requires<[HasV8, HasNEON]>; + def VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f32", + v4f32, v4f32, fmaxnum, 1>, + Requires<[HasV8, HasNEON]>; + def VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f16", + v4f16, v4f16, fmaxnum, 1>, + Requires<[HasV8, HasNEON, HasFullFP16]>; + def VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f16", + v8f16, v8f16, fmaxnum, 1>, + Requires<[HasV8, HasNEON, HasFullFP16]>; +} + +// VMIN : Vector Minimum +defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, + "vmin", "s", smin, 1>; +defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, + "vmin", "u", umin, 1>; +def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, + "vmin", "f32", + v2f32, v2f32, fminnan, 1>; +def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, + "vmin", "f32", + v4f32, v4f32, fminnan, 1>; +def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND, + "vmin", "f16", + v4f16, v4f16, fminnan, 1>, + Requires<[HasNEON, HasFullFP16]>; +def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ, + "vmin", "f16", + v8f16, v8f16, fminnan, 1>, + Requires<[HasNEON, HasFullFP16]>; + +// VMINNM +let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vminnm", "f32", + v2f32, v2f32, fminnum, 1>, + Requires<[HasV8, HasNEON]>; + def VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vminnm", "f32", + v4f32, v4f32, fminnum, 1>, + Requires<[HasV8, HasNEON]>; + def VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vminnm", "f16", + v4f16, v4f16, fminnum, 1>, + Requires<[HasV8, HasNEON, HasFullFP16]>; + def VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vminnm", "f16", + v8f16, v8f16, fminnum, 1>, + Requires<[HasV8, HasNEON, HasFullFP16]>; +} + +// Vector Pairwise Operations. + +// VPADD : Vector Pairwise Add +def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, + "vpadd", "i8", + v8i8, v8i8, int_arm_neon_vpadd, 0>; +def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, + "vpadd", "i16", + v4i16, v4i16, int_arm_neon_vpadd, 0>; +def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, + "vpadd", "i32", + v2i32, v2i32, int_arm_neon_vpadd, 0>; +def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, + IIC_VPBIND, "vpadd", "f32", + v2f32, v2f32, int_arm_neon_vpadd, 0>; +def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm, + IIC_VPBIND, "vpadd", "f16", + v4f16, v4f16, int_arm_neon_vpadd, 0>, + Requires<[HasNEON, HasFullFP16]>; + +// VPADDL : Vector Pairwise Add Long +defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", + int_arm_neon_vpaddls>; +defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", + int_arm_neon_vpaddlu>; + +// VPADAL : Vector Pairwise Add and Accumulate Long +defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", + int_arm_neon_vpadals>; +defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", + int_arm_neon_vpadalu>; + +// VPMAX : Vector Pairwise Maximum +def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", + "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; +def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", + "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; +def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", + "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; +def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", + "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; +def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", + "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; +def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", + "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; +def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", + "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; +def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", + "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>, + Requires<[HasNEON, HasFullFP16]>; + +// VPMIN : Vector Pairwise Minimum +def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", + "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; +def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", + "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; +def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", + "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; +def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", + "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; +def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", + "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; +def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", + "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; +def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", + "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; +def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", + "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>, + Requires<[HasNEON, HasFullFP16]>; + +// Vector Reciprocal and Reciprocal Square Root Estimate and Step. + +// VRECPE : Vector Reciprocal Estimate +def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, + IIC_VUNAD, "vrecpe", "u32", + v2i32, v2i32, int_arm_neon_vrecpe>; +def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, + IIC_VUNAQ, "vrecpe", "u32", + v4i32, v4i32, int_arm_neon_vrecpe>; +def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, + IIC_VUNAD, "vrecpe", "f32", + v2f32, v2f32, int_arm_neon_vrecpe>; +def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, + IIC_VUNAQ, "vrecpe", "f32", + v4f32, v4f32, int_arm_neon_vrecpe>; +def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, + IIC_VUNAD, "vrecpe", "f16", + v4f16, v4f16, int_arm_neon_vrecpe>, + Requires<[HasNEON, HasFullFP16]>; +def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, + IIC_VUNAQ, "vrecpe", "f16", + v8f16, v8f16, int_arm_neon_vrecpe>, + Requires<[HasNEON, HasFullFP16]>; + +// VRECPS : Vector Reciprocal Step +def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, + IIC_VRECSD, "vrecps", "f32", + v2f32, v2f32, int_arm_neon_vrecps, 1>; +def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, + IIC_VRECSQ, "vrecps", "f32", + v4f32, v4f32, int_arm_neon_vrecps, 1>; +def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, + IIC_VRECSD, "vrecps", "f16", + v4f16, v4f16, int_arm_neon_vrecps, 1>, + Requires<[HasNEON, HasFullFP16]>; +def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, + IIC_VRECSQ, "vrecps", "f16", + v8f16, v8f16, int_arm_neon_vrecps, 1>, + Requires<[HasNEON, HasFullFP16]>; + +// VRSQRTE : Vector Reciprocal Square Root Estimate +def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, + IIC_VUNAD, "vrsqrte", "u32", + v2i32, v2i32, int_arm_neon_vrsqrte>; +def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, + IIC_VUNAQ, "vrsqrte", "u32", + v4i32, v4i32, int_arm_neon_vrsqrte>; +def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, + IIC_VUNAD, "vrsqrte", "f32", + v2f32, v2f32, int_arm_neon_vrsqrte>; +def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, + IIC_VUNAQ, "vrsqrte", "f32", + v4f32, v4f32, int_arm_neon_vrsqrte>; +def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, + IIC_VUNAD, "vrsqrte", "f16", + v4f16, v4f16, int_arm_neon_vrsqrte>, + Requires<[HasNEON, HasFullFP16]>; +def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, + IIC_VUNAQ, "vrsqrte", "f16", + v8f16, v8f16, int_arm_neon_vrsqrte>, + Requires<[HasNEON, HasFullFP16]>; + +// VRSQRTS : Vector Reciprocal Square Root Step +def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, + IIC_VRECSD, "vrsqrts", "f32", + v2f32, v2f32, int_arm_neon_vrsqrts, 1>; +def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, + IIC_VRECSQ, "vrsqrts", "f32", + v4f32, v4f32, int_arm_neon_vrsqrts, 1>; +def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, + IIC_VRECSD, "vrsqrts", "f16", + v4f16, v4f16, int_arm_neon_vrsqrts, 1>, + Requires<[HasNEON, HasFullFP16]>; +def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, + IIC_VRECSQ, "vrsqrts", "f16", + v8f16, v8f16, int_arm_neon_vrsqrts, 1>, + Requires<[HasNEON, HasFullFP16]>; + +// Vector Shifts. + +// VSHL : Vector Shift +defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, + IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, + "vshl", "s", int_arm_neon_vshifts>; +defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, + IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, + "vshl", "u", int_arm_neon_vshiftu>; + +// VSHL : Vector Shift Left (Immediate) +defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; + +// VSHR : Vector Shift Right (Immediate) +defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", + NEONvshrs>; +defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", + NEONvshru>; + +// VSHLL : Vector Shift Left Long +defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", + PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (sext node:$LHS), node:$RHS)>>; +defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", + PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (zext node:$LHS), node:$RHS)>>; + +// VSHLL : Vector Shift Left Long (with maximum shift count) +class N2VLShMax op21_16, bits<4> op11_8, bit op7, + bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, + ValueType OpTy, Operand ImmTy> + : N2VLSh { + let Inst{21-16} = op21_16; + let DecoderMethod = "DecodeVSHLMaxInstruction"; +} +def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", + v8i16, v8i8, imm8>; +def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", + v4i32, v4i16, imm16>; +def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", + v2i64, v2i32, imm32>; + +def : Pat<(v8i16 (NEONvshl (zext (v8i8 DPR:$Rn)), (i32 8))), + (VSHLLi8 DPR:$Rn, 8)>; +def : Pat<(v4i32 (NEONvshl (zext (v4i16 DPR:$Rn)), (i32 16))), + (VSHLLi16 DPR:$Rn, 16)>; +def : Pat<(v2i64 (NEONvshl (zext (v2i32 DPR:$Rn)), (i32 32))), + (VSHLLi32 DPR:$Rn, 32)>; +def : Pat<(v8i16 (NEONvshl (sext (v8i8 DPR:$Rn)), (i32 8))), + (VSHLLi8 DPR:$Rn, 8)>; +def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))), + (VSHLLi16 DPR:$Rn, 16)>; +def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))), + (VSHLLi32 DPR:$Rn, 32)>; +def : Pat<(v8i16 (NEONvshl (anyext (v8i8 DPR:$Rn)), (i32 8))), + (VSHLLi8 DPR:$Rn, 8)>; +def : Pat<(v4i32 (NEONvshl (anyext (v4i16 DPR:$Rn)), (i32 16))), + (VSHLLi16 DPR:$Rn, 16)>; +def : Pat<(v2i64 (NEONvshl (anyext (v2i32 DPR:$Rn)), (i32 32))), + (VSHLLi32 DPR:$Rn, 32)>; + +// VSHRN : Vector Shift Right and Narrow +defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", + PatFrag<(ops node:$Rn, node:$amt), + (trunc (NEONvshrs node:$Rn, node:$amt))>>; + +def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))), + (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>; +def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))), + (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>; +def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))), + (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>; + +// VRSHL : Vector Rounding Shift +defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vrshl", "s", int_arm_neon_vrshifts>; +defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vrshl", "u", int_arm_neon_vrshiftu>; +// VRSHR : Vector Rounding Shift Right +defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", + NEONvrshrs>; +defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", + NEONvrshru>; + +// VRSHRN : Vector Rounding Shift Right and Narrow +defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", + NEONvrshrn>; + +// VQSHL : Vector Saturating Shift +defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vqshl", "s", int_arm_neon_vqshifts>; +defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vqshl", "u", int_arm_neon_vqshiftu>; +// VQSHL : Vector Saturating Shift Left (Immediate) +defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; +defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; + +// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) +defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; + +// VQSHRN : Vector Saturating Shift Right and Narrow +defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", + NEONvqshrns>; +defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", + NEONvqshrnu>; + +// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) +defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", + NEONvqshrnsu>; + +// VQRSHL : Vector Saturating Rounding Shift +defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vqrshl", "s", int_arm_neon_vqrshifts>; +defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vqrshl", "u", int_arm_neon_vqrshiftu>; + +// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow +defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", + NEONvqrshrns>; +defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", + NEONvqrshrnu>; + +// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) +defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", + NEONvqrshrnsu>; + +// VSRA : Vector Shift Right and Accumulate +defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; +defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; +// VRSRA : Vector Rounding Shift Right and Accumulate +defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; +defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; + +// VSLI : Vector Shift Left and Insert +defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; + +// VSRI : Vector Shift Right and Insert +defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; + +// Vector Absolute and Saturating Absolute. + +// VABS : Vector Absolute Value +defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, + IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>; +def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, + "vabs", "f32", + v2f32, v2f32, fabs>; +def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, + "vabs", "f32", + v4f32, v4f32, fabs>; +def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0, + "vabs", "f16", + v4f16, v4f16, fabs>, + Requires<[HasNEON, HasFullFP16]>; +def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0, + "vabs", "f16", + v8f16, v8f16, fabs>, + Requires<[HasNEON, HasFullFP16]>; + +// VQABS : Vector Saturating Absolute Value +defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, + IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", + int_arm_neon_vqabs>; + +// Vector Negate. + +def vnegd : PatFrag<(ops node:$in), + (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; +def vnegq : PatFrag<(ops node:$in), + (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; + +class VNEGD size, string OpcodeStr, string Dt, ValueType Ty> + : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), + IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", + [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; +class VNEGQ size, string OpcodeStr, string Dt, ValueType Ty> + : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), + IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", + [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; + +// VNEG : Vector Negate (integer) +def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; +def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; +def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; +def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; +def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; +def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; + +// VNEG : Vector Negate (floating-point) +def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, + (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, + "vneg", "f32", "$Vd, $Vm", "", + [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; +def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, + (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, + "vneg", "f32", "$Vd, $Vm", "", + [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; +def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0, + (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, + "vneg", "f16", "$Vd, $Vm", "", + [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>, + Requires<[HasNEON, HasFullFP16]>; +def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0, + (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, + "vneg", "f16", "$Vd, $Vm", "", + [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>, + Requires<[HasNEON, HasFullFP16]>; + +def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; +def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; +def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; +def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; +def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; +def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; + +// VQNEG : Vector Saturating Negate +defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, + IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", + int_arm_neon_vqneg>; + +// Vector Bit Counting Operations. + +// VCLS : Vector Count Leading Sign Bits +defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, + IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", + int_arm_neon_vcls>; +// VCLZ : Vector Count Leading Zeros +defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, + IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", + ctlz>; +// VCNT : Vector Count One Bits +def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, + IIC_VCNTiD, "vcnt", "8", + v8i8, v8i8, ctpop>; +def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, + IIC_VCNTiQ, "vcnt", "8", + v16i8, v16i8, ctpop>; + +// Vector Swap +def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, + (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), + NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", + []>; +def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, + (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), + NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", + []>; + +// Vector Move Operations. + +// VMOV : Vector Move (Register) +def : NEONInstAlias<"vmov${p} $Vd, $Vm", + (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p} $Vd, $Vm", + (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; + +// VMOV : Vector Move (Immediate) + +// Although VMOVs are not strictly speaking cheap, they are as expensive +// as their copies counterpart (VORR), so we should prefer rematerialization +// over splitting when it applies. +let isReMaterializable = 1, isAsCheapAsAMove=1 in { +def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), + (ins nImmSplatI8:$SIMM), IIC_VMOVImm, + "vmov", "i8", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; +def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), + (ins nImmSplatI8:$SIMM), IIC_VMOVImm, + "vmov", "i8", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; + +def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), + (ins nImmSplatI16:$SIMM), IIC_VMOVImm, + "vmov", "i16", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), + (ins nImmSplatI16:$SIMM), IIC_VMOVImm, + "vmov", "i16", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), + (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, + "vmov", "i32", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { + let Inst{11-8} = SIMM{11-8}; +} + +def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), + (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, + "vmov", "i32", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { + let Inst{11-8} = SIMM{11-8}; +} + +def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), + (ins nImmSplatI64:$SIMM), IIC_VMOVImm, + "vmov", "i64", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; +def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), + (ins nImmSplatI64:$SIMM), IIC_VMOVImm, + "vmov", "i64", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; + +def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), + (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, + "vmov", "f32", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; +def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), + (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, + "vmov", "f32", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; +} // isReMaterializable, isAsCheapAsAMove + +// Add support for bytes replication feature, so it could be GAS compatible. +multiclass NEONImmReplicateI8InstAlias { + // E.g. instructions below: + // "vmov.i32 d0, #0xffffffff" + // "vmov.i32 d0, #0xabababab" + // "vmov.i16 d0, #0xabab" + // are incorrect, but we could deal with such cases. + // For last two instructions, for example, it should emit: + // "vmov.i8 d0, #0xab" + def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", + (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate:$Vm, pred:$p)>; + def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", + (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate:$Vm, pred:$p)>; + // Also add same support for VMVN instructions. So instruction: + // "vmvn.i32 d0, #0xabababab" + // actually means: + // "vmov.i8 d0, #0x54" + def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", + (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate:$Vm, pred:$p)>; + def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", + (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate:$Vm, pred:$p)>; +} + +defm : NEONImmReplicateI8InstAlias; +defm : NEONImmReplicateI8InstAlias; +defm : NEONImmReplicateI8InstAlias; + +// Similar to above for types other than i8, e.g.: +// "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00" +// "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000" +// In this case we do not canonicalize VMVN to VMOV +multiclass NEONImmReplicateInstAlias { + def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", + (V8 DPR:$Vd, nImmVMOVIReplicate:$Vm, pred:$p)>; + def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", + (V16 QPR:$Vd, nImmVMOVIReplicate:$Vm, pred:$p)>; + def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", + (NV8 DPR:$Vd, nImmVMOVIReplicate:$Vm, pred:$p)>; + def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", + (NV16 QPR:$Vd, nImmVMOVIReplicate:$Vm, pred:$p)>; +} + +defm : NEONImmReplicateInstAlias; +defm : NEONImmReplicateInstAlias; +defm : NEONImmReplicateInstAlias; +// TODO: add "VMOV <-> VMVN" conversion for cases like +// "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55" +// "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00" + +// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" +// require zero cycles to execute so they should be used wherever possible for +// setting a register to zero. + +// Even without these pseudo-insts we would probably end up with the correct +// instruction, but we could not mark the general ones with "isAsCheapAsAMove" +// since they are sometimes rather expensive (in general). + +let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { + def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, + [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))], + (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, + Requires<[HasZCZ]>; + def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, + [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))], + (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, + Requires<[HasZCZ]>; +} + +// VMOV : Vector Get Lane (move scalar to ARM core register) + +def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, + (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), + IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", + [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), + imm:$lane))]> { + let Inst{21} = lane{2}; + let Inst{6-5} = lane{1-0}; +} +def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, + (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), + IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", + [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), + imm:$lane))]> { + let Inst{21} = lane{1}; + let Inst{6} = lane{0}; +} +def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, + (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), + IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", + [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), + imm:$lane))]> { + let Inst{21} = lane{2}; + let Inst{6-5} = lane{1-0}; +} +def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, + (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), + IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", + [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), + imm:$lane))]> { + let Inst{21} = lane{1}; + let Inst{6} = lane{0}; +} +def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, + (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), + IIC_VMOVSI, "vmov", "32", "$R, $V$lane", + [(set GPR:$R, (extractelt (v2i32 DPR:$V), + imm:$lane))]>, + Requires<[HasVFP2, HasFastVGETLNi32]> { + let Inst{21} = lane{0}; +} +// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td +def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), + (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i8_reg imm:$lane))), + (SubReg_i8_lane imm:$lane))>; +def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), + (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane))>; +def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), + (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i8_reg imm:$lane))), + (SubReg_i8_lane imm:$lane))>; +def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), + (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane))>; +def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), + (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane))>, + Requires<[HasNEON, HasFastVGETLNi32]>; +def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, + Requires<[HasNEON, HasSlowVGETLNi32]>; +def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, + Requires<[HasNEON, HasSlowVGETLNi32]>; +def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), + (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), + (SSubReg_f32_reg imm:$src2))>; +def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), + (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), + (SSubReg_f32_reg imm:$src2))>; +//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), +// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; +def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), + (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; + + +// VMOV : Vector Set Lane (move ARM core register to scalar) + +let Constraints = "$src1 = $V" in { +def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), + (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), + IIC_VMOVISL, "vmov", "8", "$V$lane, $R", + [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), + GPR:$R, imm:$lane))]> { + let Inst{21} = lane{2}; + let Inst{6-5} = lane{1-0}; +} +def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), + (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), + IIC_VMOVISL, "vmov", "16", "$V$lane, $R", + [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), + GPR:$R, imm:$lane))]> { + let Inst{21} = lane{1}; + let Inst{6} = lane{0}; +} +def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), + (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), + IIC_VMOVISL, "vmov", "32", "$V$lane, $R", + [(set DPR:$V, (insertelt (v2i32 DPR:$src1), + GPR:$R, imm:$lane))]>, + Requires<[HasVFP2]> { + let Inst{21} = lane{0}; + // This instruction is equivalent as + // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm) + let isInsertSubreg = 1; +} +} +def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), + (v16i8 (INSERT_SUBREG QPR:$src1, + (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, + (DSubReg_i8_reg imm:$lane))), + GPR:$src2, (SubReg_i8_lane imm:$lane))), + (DSubReg_i8_reg imm:$lane)))>; +def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), + (v8i16 (INSERT_SUBREG QPR:$src1, + (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, + (DSubReg_i16_reg imm:$lane))), + GPR:$src2, (SubReg_i16_lane imm:$lane))), + (DSubReg_i16_reg imm:$lane)))>; +def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), + (v4i32 (INSERT_SUBREG QPR:$src1, + (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, + (DSubReg_i32_reg imm:$lane))), + GPR:$src2, (SubReg_i32_lane imm:$lane))), + (DSubReg_i32_reg imm:$lane)))>; + +def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), + (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), + SPR:$src2, (SSubReg_f32_reg imm:$src3))>; +def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), + (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), + SPR:$src2, (SSubReg_f32_reg imm:$src3))>; + +//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), +// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; +def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), + (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; + +def : Pat<(v2f32 (scalar_to_vector SPR:$src)), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; +def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; +def : Pat<(v4f32 (scalar_to_vector SPR:$src)), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; + +def : Pat<(v8i8 (scalar_to_vector GPR:$src)), + (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; +def : Pat<(v4i16 (scalar_to_vector GPR:$src)), + (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; +def : Pat<(v2i32 (scalar_to_vector GPR:$src)), + (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; + +def : Pat<(v16i8 (scalar_to_vector GPR:$src)), + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), + dsub_0)>; +def : Pat<(v8i16 (scalar_to_vector GPR:$src)), + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), + (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), + dsub_0)>; +def : Pat<(v4i32 (scalar_to_vector GPR:$src)), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), + dsub_0)>; + +// VDUP : Vector Duplicate (from ARM core register to all elements) + +class VDUPD opcod1, bits<2> opcod3, string Dt, ValueType Ty> + : NVDup; +class VDUPQ opcod1, bits<2> opcod3, string Dt, ValueType Ty> + : NVDup; + +def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; +def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; +def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, + Requires<[HasNEON, HasFastVDUP32]>; +def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; +def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; +def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; + +// NEONvdup patterns for uarchs with fast VDUP.32. +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, + Requires<[HasNEON,HasFastVDUP32]>; +def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; + +// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. +def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, + Requires<[HasNEON,HasSlowVDUP32]>; +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, + Requires<[HasNEON,HasSlowVDUP32]>; + +// VDUP : Vector Duplicate Lane (from scalar to all elements) + +class VDUPLND op19_16, string OpcodeStr, string Dt, + ValueType Ty, Operand IdxTy> + : NVDupLane; + +class VDUPLNQ op19_16, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Operand IdxTy> + : NVDupLane; + +// Inst{19-16} is partially specified depending on the element size. + +def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { + bits<3> lane; + let Inst{19-17} = lane{2-0}; +} +def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { + bits<2> lane; + let Inst{19-18} = lane{1-0}; +} +def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { + bits<1> lane; + let Inst{19} = lane{0}; +} +def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { + bits<3> lane; + let Inst{19-17} = lane{2-0}; +} +def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { + bits<2> lane; + let Inst{19-18} = lane{1-0}; +} +def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { + bits<1> lane; + let Inst{19} = lane{0}; +} + +def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), + (VDUPLN32d DPR:$Vm, imm:$lane)>; + +def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), + (VDUPLN32q DPR:$Vm, imm:$lane)>; + +def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), + (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i8_reg imm:$lane))), + (SubReg_i8_lane imm:$lane)))>; +def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), + (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; +def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), + (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; +def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), + (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + +def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))), + (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$src, ssub_0), (i32 0)))>; +def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))), + (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$src, ssub_0), (i32 0)))>; + +// VMOVN : Vector Narrowing Move +defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, + "vmovn", "i", trunc>; +// VQMOVN : Vector Saturating Narrowing Move +defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, + "vqmovn", "s", int_arm_neon_vqmovns>; +defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, + "vqmovn", "u", int_arm_neon_vqmovnu>; +defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, + "vqmovun", "s", int_arm_neon_vqmovnsu>; +// VMOVL : Vector Lengthening Move +defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; +defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; +def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; +def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; +def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; + +// Vector Conversions. + +// VCVT : Vector Convert Between Floating-Point and Integers +def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", + v2i32, v2f32, fp_to_sint>; +def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", + v2i32, v2f32, fp_to_uint>; +def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", + v2f32, v2i32, sint_to_fp>; +def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", + v2f32, v2i32, uint_to_fp>; + +def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", + v4i32, v4f32, fp_to_sint>; +def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", + v4i32, v4f32, fp_to_uint>; +def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", + v4f32, v4i32, sint_to_fp>; +def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", + v4f32, v4i32, uint_to_fp>; + +def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", + v4i16, v4f16, fp_to_sint>, + Requires<[HasNEON, HasFullFP16]>; +def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", + v4i16, v4f16, fp_to_uint>, + Requires<[HasNEON, HasFullFP16]>; +def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", + v4f16, v4i16, sint_to_fp>, + Requires<[HasNEON, HasFullFP16]>; +def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", + v4f16, v4i16, uint_to_fp>, + Requires<[HasNEON, HasFullFP16]>; + +def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", + v8i16, v8f16, fp_to_sint>, + Requires<[HasNEON, HasFullFP16]>; +def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", + v8i16, v8f16, fp_to_uint>, + Requires<[HasNEON, HasFullFP16]>; +def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", + v8f16, v8i16, sint_to_fp>, + Requires<[HasNEON, HasFullFP16]>; +def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", + v8f16, v8i16, uint_to_fp>, + Requires<[HasNEON, HasFullFP16]>; + +// VCVT{A, N, P, M} +multiclass VCVT_FPI op10_8, SDPatternOperator IntS, + SDPatternOperator IntU> { + let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; + def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; + def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; + def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; + def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s16.f16", v4i16, v4f16, IntS>, + Requires<[HasV8, HasNEON, HasFullFP16]>; + def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s16.f16", v8i16, v8f16, IntS>, + Requires<[HasV8, HasNEON, HasFullFP16]>; + def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u16.f16", v4i16, v4f16, IntU>, + Requires<[HasV8, HasNEON, HasFullFP16]>; + def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u16.f16", v8i16, v8f16, IntU>, + Requires<[HasV8, HasNEON, HasFullFP16]>; + } +} + +defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; +defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; +defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; +defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; + +// VCVT : Vector Convert Between Floating-Point and Fixed-Point. +let DecoderMethod = "DecodeVCVTD" in { +def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", + v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; +def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", + v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; +def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", + v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; +def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", + v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; +let Predicates = [HasNEON, HasFullFP16] in { +def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", + v4i16, v4f16, int_arm_neon_vcvtfp2fxs>; +def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", + v4i16, v4f16, int_arm_neon_vcvtfp2fxu>; +def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", + v4f16, v4i16, int_arm_neon_vcvtfxs2fp>; +def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", + v4f16, v4i16, int_arm_neon_vcvtfxu2fp>; +} // Predicates = [HasNEON, HasFullFP16] +} + +let DecoderMethod = "DecodeVCVTQ" in { +def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", + v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; +def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", + v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; +def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", + v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; +def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", + v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; +let Predicates = [HasNEON, HasFullFP16] in { +def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", + v8i16, v8f16, int_arm_neon_vcvtfp2fxs>; +def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", + v8i16, v8f16, int_arm_neon_vcvtfp2fxu>; +def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", + v8f16, v8i16, int_arm_neon_vcvtfxs2fp>; +def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", + v8f16, v8i16, int_arm_neon_vcvtfxu2fp>; +} // Predicates = [HasNEON, HasFullFP16] +} + +def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", + (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", + (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", + (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", + (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", + (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", + (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", + (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", + (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; + +def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0", + (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0", + (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0", + (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0", + (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0", + (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0", + (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0", + (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0", + (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>; + + +// VCVT : Vector Convert Between Half-Precision and Single-Precision. +def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, + IIC_VUNAQ, "vcvt", "f16.f32", + v4i16, v4f32, int_arm_neon_vcvtfp2hf>, + Requires<[HasNEON, HasFP16]>; +def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, + IIC_VUNAQ, "vcvt", "f32.f16", + v4f32, v4i16, int_arm_neon_vcvthf2fp>, + Requires<[HasNEON, HasFP16]>; + +// Vector Reverse. + +// VREV64 : Vector Reverse elements within 64-bit doublewords + +class VREV64D op19_18, string OpcodeStr, string Dt, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), + (ins DPR:$Vm), IIC_VMOVD, + OpcodeStr, Dt, "$Vd, $Vm", "", + [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>; +class VREV64Q op19_18, string OpcodeStr, string Dt, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), + (ins QPR:$Vm), IIC_VMOVQ, + OpcodeStr, Dt, "$Vd, $Vm", "", + [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>; + +def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; +def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; +def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; +def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; + +def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; +def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; +def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; +def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; + +// VREV32 : Vector Reverse elements within 32-bit words + +class VREV32D op19_18, string OpcodeStr, string Dt, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), + (ins DPR:$Vm), IIC_VMOVD, + OpcodeStr, Dt, "$Vd, $Vm", "", + [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>; +class VREV32Q op19_18, string OpcodeStr, string Dt, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), + (ins QPR:$Vm), IIC_VMOVQ, + OpcodeStr, Dt, "$Vd, $Vm", "", + [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>; + +def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; +def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; + +def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; +def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; + +// VREV16 : Vector Reverse elements within 16-bit halfwords + +class VREV16D op19_18, string OpcodeStr, string Dt, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), + (ins DPR:$Vm), IIC_VMOVD, + OpcodeStr, Dt, "$Vd, $Vm", "", + [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>; +class VREV16Q op19_18, string OpcodeStr, string Dt, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), + (ins QPR:$Vm), IIC_VMOVQ, + OpcodeStr, Dt, "$Vd, $Vm", "", + [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>; + +def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; +def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; + +// Other Vector Shuffles. + +// Aligned extractions: really just dropping registers + +class AlignedVEXTq + : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), + (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>; + +def : AlignedVEXTq; + +def : AlignedVEXTq; + +def : AlignedVEXTq; + +def : AlignedVEXTq; + +def : AlignedVEXTq; + + +// VEXT : Vector Extract + + +// All of these have a two-operand InstAlias. +let TwoOperandAliasConstraint = "$Vn = $Vd" in { +class VEXTd + : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), + (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, + IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", + [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), + (Ty DPR:$Vm), imm:$index)))]> { + bits<3> index; + let Inst{11} = 0b0; + let Inst{10-8} = index{2-0}; +} + +class VEXTq + : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), + (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, + IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", + [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), + (Ty QPR:$Vm), imm:$index)))]> { + bits<4> index; + let Inst{11-8} = index{3-0}; +} +} + +def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { + let Inst{10-8} = index{2-0}; +} +def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { + let Inst{10-9} = index{1-0}; + let Inst{8} = 0b0; +} +def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { + let Inst{10} = index{0}; + let Inst{9-8} = 0b00; +} +def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), + (v2f32 DPR:$Vm), + (i32 imm:$index))), + (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; + +def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { + let Inst{11-8} = index{3-0}; +} +def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { + let Inst{11-9} = index{2-0}; + let Inst{8} = 0b0; +} +def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { + let Inst{11-10} = index{1-0}; + let Inst{9-8} = 0b00; +} +def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { + let Inst{11} = index{0}; + let Inst{10-8} = 0b000; +} +def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), + (v4f32 QPR:$Vm), + (i32 imm:$index))), + (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; + +// VTRN : Vector Transpose + +def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; +def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; +def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; + +def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; +def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; +def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; + +// VUZP : Vector Unzip (Deinterleave) + +def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; +def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; +// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. +def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; + +def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; +def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; +def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; + +// VZIP : Vector Zip (Interleave) + +def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; +def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; +// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. +def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; + +def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; +def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; +def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; + +// Vector Table Lookup and Table Extension. + +// VTBL : Vector Table Lookup +let DecoderMethod = "DecodeTBLInstruction" in { +def VTBL1 + : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), + (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, + "vtbl", "8", "$Vd, $Vn, $Vm", "", + [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; + +let hasExtraSrcRegAllocReq = 1 in { +def VTBL2 + : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), + (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; +def VTBL3 + : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), + (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; +def VTBL4 + : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), + (ins VecListFourD:$Vn, DPR:$Vm), + NVTBLFrm, IIC_VTB4, + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; +} // hasExtraSrcRegAllocReq = 1 + +def VTBL3Pseudo + : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; +def VTBL4Pseudo + : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; + +// VTBX : Vector Table Extension +def VTBX1 + : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), + (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", + [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 + DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; +let hasExtraSrcRegAllocReq = 1 in { +def VTBX2 + : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), + (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; +def VTBX3 + : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), + (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), + NVTBLFrm, IIC_VTBX3, + "vtbx", "8", "$Vd, $Vn, $Vm", + "$orig = $Vd", []>; +def VTBX4 + : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), + (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, + "vtbx", "8", "$Vd, $Vn, $Vm", + "$orig = $Vd", []>; +} // hasExtraSrcRegAllocReq = 1 + +def VTBX3Pseudo + : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), + IIC_VTBX3, "$orig = $dst", []>; +def VTBX4Pseudo + : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), + IIC_VTBX4, "$orig = $dst", []>; +} // DecoderMethod = "DecodeTBLInstruction" + +def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)), + (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1), + v8i8:$Vm))>; +def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vm)), + (v8i8 (VTBX2 v8i8:$orig, + (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1), + v8i8:$Vm))>; + +def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vm)), + (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + (v8i8 (IMPLICIT_DEF)), dsub_3), + v8i8:$Vm))>; +def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vm)), + (v8i8 (VTBX3Pseudo v8i8:$orig, + (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + (v8i8 (IMPLICIT_DEF)), dsub_3), + v8i8:$Vm))>; + +def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), + (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + v8i8:$Vn3, dsub_3), + v8i8:$Vm))>; +def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), + (v8i8 (VTBX4Pseudo v8i8:$orig, + (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + v8i8:$Vn3, dsub_3), + v8i8:$Vm))>; + +// VRINT : Vector Rounding +multiclass VRINT_FPI op9_7, SDPatternOperator Int> { + let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f32", + v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { + let Inst{9-7} = op9_7; + } + def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f32", + v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { + let Inst{9-7} = op9_7; + } + def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f16", + v4f16, v4f16, Int>, + Requires<[HasV8, HasNEON, HasFullFP16]> { + let Inst{9-7} = op9_7; + } + def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f16", + v8f16, v8f16, Int>, + Requires<[HasV8, HasNEON, HasFullFP16]> { + let Inst{9-7} = op9_7; + } + } + + def : NEONInstAlias(NAME#"Df") DPR:$Dd, DPR:$Dm)>; + def : NEONInstAlias(NAME#"Qf") QPR:$Qd, QPR:$Qm)>; + let Predicates = [HasNEON, HasFullFP16] in { + def : NEONInstAlias(NAME#"Dh") DPR:$Dd, DPR:$Dm)>; + def : NEONInstAlias(NAME#"Qh") QPR:$Qd, QPR:$Qm)>; + } +} + +defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; +defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; +defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; +defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; +defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; +defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; + +// Cryptography instructions +let PostEncoderMethod = "NEONThumb2DataIPostEncoder", + DecoderNamespace = "v8Crypto", hasSideEffects = 0 in { + class AES + : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, + !strconcat("aes", op), "8", v16i8, v16i8, Int>, + Requires<[HasV8, HasCrypto]>; + class AES2Op + : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, + !strconcat("aes", op), "8", v16i8, v16i8, Int>, + Requires<[HasV8, HasCrypto]>; + class N2SHA op17_16, bits<3> op10_8, bit op7, bit op6, + SDPatternOperator Int> + : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, + !strconcat("sha", op), "32", v4i32, v4i32, Int>, + Requires<[HasV8, HasCrypto]>; + class N2SHA2Op op17_16, bits<3> op10_8, bit op7, bit op6, + SDPatternOperator Int> + : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, + !strconcat("sha", op), "32", v4i32, v4i32, Int>, + Requires<[HasV8, HasCrypto]>; + class N3SHA3Op op27_23, bits<2> op21_20, SDPatternOperator Int> + : N3VQInt3np, + Requires<[HasV8, HasCrypto]>; +} + +def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; +def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; +def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; +def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; + +def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>; +def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; +def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; +def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>; +def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>; +def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>; +def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; +def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; +def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; +def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; + +def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), + (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG + (SHA1H (SUBREG_TO_REG (i64 0), + (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)), + ssub_0)), + ssub_0)), GPR)>; + +def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), + (SHA1C v4i32:$hash_abcd, + (SUBREG_TO_REG (i64 0), + (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), + ssub_0), + v4i32:$wk)>; + +def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), + (SHA1M v4i32:$hash_abcd, + (SUBREG_TO_REG (i64 0), + (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), + ssub_0), + v4i32:$wk)>; + +def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), + (SHA1P v4i32:$hash_abcd, + (SUBREG_TO_REG (i64 0), + (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), + ssub_0), + v4i32:$wk)>; + +//===----------------------------------------------------------------------===// +// NEON instructions for single-precision FP math +//===----------------------------------------------------------------------===// + +class N2VSPat + : NEONFPPat<(f32 (OpNode SPR:$a)), + (EXTRACT_SUBREG + (v2f32 (COPY_TO_REGCLASS (Inst + (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), + SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; + +class N3VSPat + : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), + (EXTRACT_SUBREG + (v2f32 (COPY_TO_REGCLASS (Inst + (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), + SPR:$a, ssub_0), + (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), + SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; + +class N3VSPatFP16 + : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)), + (EXTRACT_SUBREG + (v4f16 (COPY_TO_REGCLASS (Inst + (INSERT_SUBREG + (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), + HPR:$a, ssub_0), + (INSERT_SUBREG + (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), + HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; + +class N3VSMulOpPat + : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), + (EXTRACT_SUBREG + (v2f32 (COPY_TO_REGCLASS (Inst + (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), + SPR:$acc, ssub_0), + (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), + SPR:$a, ssub_0), + (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), + SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; + +class NVCVTIFPat + : NEONFPPat<(f32 (OpNode GPR:$a)), + (f32 (EXTRACT_SUBREG + (v2f32 (Inst + (INSERT_SUBREG + (v2f32 (IMPLICIT_DEF)), + (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))), + ssub_0))>; +class NVCVTFIPat + : NEONFPPat<(i32 (OpNode SPR:$a)), + (i32 (EXTRACT_SUBREG + (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$a, ssub_0))), + ssub_0))>; + +def : N3VSPat; +def : N3VSPat; +def : N3VSPat; +def : N3VSMulOpPat, + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; +def : N3VSMulOpPat, + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; +def : N3VSMulOpPat, + Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; +def : N3VSMulOpPat, + Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; +def : N2VSPat; +def : N2VSPat; +def : N3VSPatFP16, Requires<[HasFullFP16]>; +def : N3VSPatFP16, Requires<[HasFullFP16]>; +def : N3VSPat, Requires<[HasNEON]>; +def : N3VSPat, Requires<[HasNEON]>; +def : NVCVTFIPat; +def : NVCVTFIPat; +def : NVCVTIFPat; +def : NVCVTIFPat; + +// NEON doesn't have any f64 conversions, so provide patterns to make +// sure the VFP conversions match when extracting from a vector. +def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), + (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; +def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), + (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; +def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), + (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; +def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), + (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; + + +// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. +def : Pat<(f32 (bitconvert GPR:$a)), + (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, + Requires<[HasNEON, DontUseVMOVSR]>; +def : Pat<(arm_vmovsr GPR:$a), + (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, + Requires<[HasNEON, DontUseVMOVSR]>; + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +//===----------------------------------------------------------------------===// + +// bit_convert +let Predicates = [IsLE] in { + def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; +} +def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; +} +def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; +} +def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; + def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; + def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>; + def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; + def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; + def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; +} +def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; +} + +let Predicates = [IsLE] in { + def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; +} +def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; +} +def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; +} +def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; +} +def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; +} + +let Predicates = [IsBE] in { + // 64 bit conversions + def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; + def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; + def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; + def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; + def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; + + // 128 bit conversions + def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; +} + +// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian +def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), + (VREV64q8 (VLD1q8 addrmode6:$addr))>, Requires<[IsBE]>; +def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>, Requires<[IsBE]>; +def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), + (VREV64q16 (VLD1q16 addrmode6:$addr))>, Requires<[IsBE]>; +def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>, Requires<[IsBE]>; + +// Fold extracting an element out of a v2i32 into a vfp register. +def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), + (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; + +// Vector lengthening move with load, matching extending loads. + +// extload, zextload and sextload for a standard lengthening load. Example: +// Lengthen_Single<"8", "i16", "8"> = +// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) +// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, +// (f64 (IMPLICIT_DEF)), (i32 0)))>; +multiclass Lengthen_Single { + let AddedComplexity = 10 in { + def _Any : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("extloadvi" # SrcTy) addrmode6:$addr)), + (!cast("VMOVLuv" # DestLanes # DestTy) + (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; + + def _Z : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("zextloadvi" # SrcTy) addrmode6:$addr)), + (!cast("VMOVLuv" # DestLanes # DestTy) + (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; + + def _S : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("sextloadvi" # SrcTy) addrmode6:$addr)), + (!cast("VMOVLsv" # DestLanes # DestTy) + (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; + } +} + +// extload, zextload and sextload for a lengthening load which only uses +// half the lanes available. Example: +// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = +// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, +// (f64 (IMPLICIT_DEF)), (i32 0))), +// dsub_0)>; +multiclass Lengthen_HalfSingle { + def _Any : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("extloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLuv" # InsnLanes # InsnTy) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)>; + def _Z : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("zextloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLuv" # InsnLanes # InsnTy) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)>; + def _S : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("sextloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLsv" # InsnLanes # InsnTy) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)>; +} + +// The following class definition is basically a copy of the +// Lengthen_HalfSingle definition above, however with an additional parameter +// "RevLanes" to select the correct VREV32dXX instruction. This is to convert +// data loaded by VLD1LN into proper vector format in big endian mode. +multiclass Lengthen_HalfSingle_Big_Endian { + def _Any : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("extloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLuv" # InsnLanes # InsnTy) + (!cast("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)>; + def _Z : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("zextloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLuv" # InsnLanes # InsnTy) + (!cast("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)>; + def _S : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("sextloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLsv" # InsnLanes # InsnTy) + (!cast("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)>; +} + +// extload, zextload and sextload for a lengthening load followed by another +// lengthening load, to quadruple the initial length. +// +// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = +// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) +// (EXTRACT_SUBREG (VMOVLuv4i32 +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, +// (f64 (IMPLICIT_DEF)), +// (i32 0))), +// dsub_0)), +// dsub_0)>; +multiclass Lengthen_Double { + def _Any : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("extloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0))>; + def _Z : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("zextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0))>; + def _S : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("sextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn1Lanes # Insn1Ty) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0))>; +} + +// The following class definition is basically a copy of the +// Lengthen_Double definition above, however with an additional parameter +// "RevLanes" to select the correct VREV32dXX instruction. This is to convert +// data loaded by VLD1LN into proper vector format in big endian mode. +multiclass Lengthen_Double_Big_Endian { + def _Any : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("extloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) + (!cast("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0))>; + def _Z : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("zextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) + (!cast("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0))>; + def _S : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("sextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn1Lanes # Insn1Ty) + (!cast("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0))>; +} + +// extload, zextload and sextload for a lengthening load followed by another +// lengthening load, to quadruple the initial length, but which ends up only +// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). +// +// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = +// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) +// (EXTRACT_SUBREG (VMOVLuv4i32 +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, +// (f64 (IMPLICIT_DEF)), (i32 0))), +// dsub_0)), +// dsub_0)>; +multiclass Lengthen_HalfDouble { + def _Any : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("extloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)), + dsub_0)>; + def _Z : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("zextloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)), + dsub_0)>; + def _S : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("sextloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn1Lanes # Insn1Ty) + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)), + dsub_0)>; +} + +// The following class definition is basically a copy of the +// Lengthen_HalfDouble definition above, however with an additional VREV16d8 +// instruction to convert data loaded by VLD1LN into proper vector format +// in big endian mode. +multiclass Lengthen_HalfDouble_Big_Endian { + def _Any : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("extloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) + (!cast("VREV16d8") + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)), + dsub_0)>; + def _Z : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("zextloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) + (!cast("VREV16d8") + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)), + dsub_0)>; + def _S : Pat<(!cast("v" # DestLanes # DestTy) + (!cast("sextloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn1Lanes # Insn1Ty) + (!cast("VREV16d8") + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)), + dsub_0)>; +} + +defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 +defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 +defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 + +let Predicates = [IsLE] in { + defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 + defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 + + // Double lengthening - v4i8 -> v4i16 -> v4i32 + defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; + // v2i8 -> v2i16 -> v2i32 + defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; + // v2i16 -> v2i32 -> v2i64 + defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; +} + +let Predicates = [IsBE] in { + defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 + defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 + + // Double lengthening - v4i8 -> v4i16 -> v4i32 + defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; + // v2i8 -> v2i16 -> v2i32 + defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; + // v2i16 -> v2i32 -> v2i64 + defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; +} + +// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 +let Predicates = [IsLE] in { + def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; + def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; + def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), + (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; +} +// The following patterns are basically a copy of the patterns above, +// however with an additional VREV16d instruction to convert data +// loaded by VLD1LN into proper vector format in big endian mode. +let Predicates = [IsBE] in { + def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (!cast("VREV16d8") + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; + def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (!cast("VREV16d8") + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; + def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), + (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 + (!cast("VREV16d8") + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; +} + +def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)), + (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; +def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)), + (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; +def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)), + (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; +def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)), + (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; +def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)), + (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; + +//===----------------------------------------------------------------------===// +// Assembler aliases +// + +def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", + (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; +def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", + (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; + +// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. +defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", + (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", + (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", + (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", + (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", + (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", + (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", + (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", + (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +// ... two-operand aliases +defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", + (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", + (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", + (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", + (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", + (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", + (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +// ... immediates +def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", + (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; +def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", + (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; +def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", + (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; +def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", + (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; + + +// VLD1 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + pred:$p)>; +def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; + +def VLD1LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VLD1LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + pred:$p)>; +def VLD1LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; +def VLD1LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + rGPR:$Rm, pred:$p)>; +def VLD1LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + rGPR:$Rm, pred:$p)>; +def VLD1LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST1 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + pred:$p)>; +def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; + +def VST1LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VST1LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + pred:$p)>; +def VST1LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; +def VST1LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + rGPR:$Rm, pred:$p)>; +def VST1LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + rGPR:$Rm, pred:$p)>; +def VST1LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD2 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + pred:$p)>; +def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; +def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; +def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; +def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; + +def VLD2LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + pred:$p)>; +def VLD2LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; +def VLD2LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; +def VLD2LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; +def VLD2LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; +def VLD2LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + rGPR:$Rm, pred:$p)>; +def VLD2LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + rGPR:$Rm, pred:$p)>; +def VLD2LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; +def VLD2LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + rGPR:$Rm, pred:$p)>; +def VLD2LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST2 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + pred:$p)>; +def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; +def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; +def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; +def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; + +def VST2LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + pred:$p)>; +def VST2LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; +def VST2LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; +def VST2LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; +def VST2LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; +def VST2LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + rGPR:$Rm, pred:$p)>; +def VST2LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + rGPR:$Rm, pred:$p)>; +def VST2LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; +def VST2LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + rGPR:$Rm, pred:$p)>; +def VST2LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD3 all-lanes pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; +def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; +def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; +def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; +def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; +def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; + +def VLD3DUPdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; +def VLD3DUPdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; +def VLD3DUPdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; +def VLD3DUPqWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; +def VLD3DUPqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; +def VLD3DUPqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; +def VLD3DUPdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + rGPR:$Rm, pred:$p)>; + + +// VLD3 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; + +def VLD3LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VLD3LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VLD3LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VLD3LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VLD3LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VLD3LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDHWordIndexed:$list, + addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; +def VLD3LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQHWordIndexed:$list, + addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; +def VLD3LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD3 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; +def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; +def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; +def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; +def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; +def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; + +def VLD3dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; +def VLD3dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; +def VLD3dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; +def VLD3dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; + +// VST3 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; + +def VST3LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VST3LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VST3LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VST3LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VST3LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; +def VST3LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDHWordIndexed:$list, + addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; +def VST3LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQHWordIndexed:$list, + addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; +def VST3LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST3 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; +def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; +def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; +def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; +def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; +def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; + +def VST3dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; +def VST3dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; +def VST3dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; +def VST3dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; +def VST3dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; +def VST3dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD4 all-lanes pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, + pred:$p)>; +def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, + pred:$p)>; +def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, + pred:$p)>; +def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, + pred:$p)>; +def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, + pred:$p)>; +def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, + pred:$p)>; + +def VLD4DUPdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, + pred:$p)>; +def VLD4DUPdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, + pred:$p)>; +def VLD4DUPdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, + pred:$p)>; +def VLD4DUPqWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, + pred:$p)>; +def VLD4DUPqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, + pred:$p)>; +def VLD4DUPqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, + pred:$p)>; +def VLD4DUPdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, + addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, + addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; + + +// VLD4 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + pred:$p)>; +def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; +def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; +def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; +def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; + +def VLD4LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + pred:$p)>; +def VLD4LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; +def VLD4LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; +def VLD4LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; +def VLD4LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; +def VLD4LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDWordIndexed:$list, + addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; +def VLD4LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQWordIndexed:$list, + addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; + + + +// VLD4 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; + +def VLD4dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VLD4dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VLD4dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VLD4qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VLD4qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VLD4qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VLD4dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + rGPR:$Rm, pred:$p)>; + +// VST4 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + pred:$p)>; +def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; +def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; +def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; +def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; + +def VST4LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + pred:$p)>; +def VST4LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; +def VST4LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; +def VST4LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; +def VST4LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; +def VST4LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDWordIndexed:$list, + addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; +def VST4LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQWordIndexed:$list, + addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; + + +// VST4 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; + +def VST4dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VST4dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VST4dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VST4qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VST4qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VST4qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; +def VST4dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + rGPR:$Rm, pred:$p)>; +def VST4dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + rGPR:$Rm, pred:$p)>; +def VST4dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + rGPR:$Rm, pred:$p)>; + +// VMOV/VMVN takes an optional datatype suffix +defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", + (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", + (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; + +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; + +// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. +// D-register versions. +def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", + (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", + (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", + (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", + (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", + (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", + (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", + (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +let Predicates = [HasNEON, HasFullFP16] in +def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm", + (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +// Q-register versions. +def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", + (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", + (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", + (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", + (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", + (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", + (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", + (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +let Predicates = [HasNEON, HasFullFP16] in +def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm", + (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; + +// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. +// D-register versions. +def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", + (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", + (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", + (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", + (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", + (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", + (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", + (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +let Predicates = [HasNEON, HasFullFP16] in +def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm", + (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +// Q-register versions. +def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", + (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", + (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", + (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", + (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", + (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", + (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", + (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +let Predicates = [HasNEON, HasFullFP16] in +def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm", + (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; + +// VSWP allows, but does not require, a type suffix. +defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", + (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", + (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; + +// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. +defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", + (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", + (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", + (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", + (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; + +// "vmov Rd, #-imm" can be handled via "vmvn". +def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", + (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", + (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", + (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", + (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; + +// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, +// these should restrict to just the Q register variants, but the register +// classes are enough to match correctly regardless, so we keep it simple +// and just use MnemonicAlias. +def : NEONMnemonicAlias<"vbicq", "vbic">; +def : NEONMnemonicAlias<"vandq", "vand">; +def : NEONMnemonicAlias<"veorq", "veor">; +def : NEONMnemonicAlias<"vorrq", "vorr">; + +def : NEONMnemonicAlias<"vmovq", "vmov">; +def : NEONMnemonicAlias<"vmvnq", "vmvn">; +// Explicit versions for floating point so that the FPImm variants get +// handled early. The parser gets confused otherwise. +def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; +def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; + +def : NEONMnemonicAlias<"vaddq", "vadd">; +def : NEONMnemonicAlias<"vsubq", "vsub">; + +def : NEONMnemonicAlias<"vminq", "vmin">; +def : NEONMnemonicAlias<"vmaxq", "vmax">; + +def : NEONMnemonicAlias<"vmulq", "vmul">; + +def : NEONMnemonicAlias<"vabsq", "vabs">; + +def : NEONMnemonicAlias<"vshlq", "vshl">; +def : NEONMnemonicAlias<"vshrq", "vshr">; + +def : NEONMnemonicAlias<"vcvtq", "vcvt">; + +def : NEONMnemonicAlias<"vcleq", "vcle">; +def : NEONMnemonicAlias<"vceqq", "vceq">; + +def : NEONMnemonicAlias<"vzipq", "vzip">; +def : NEONMnemonicAlias<"vswpq", "vswp">; + +def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; +def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; + + +// Alias for loading floating point immediates that aren't representable +// using the vmov.f32 encoding but the bitpattern is representable using +// the .i32 encoding. +def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", + (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", + (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; diff --git a/capstone/suite/synctools/tablegen/ARM/ARMInstrThumb.td b/capstone/suite/synctools/tablegen/ARM/ARMInstrThumb.td new file mode 100644 index 000000000..88aab47a7 --- /dev/null +++ b/capstone/suite/synctools/tablegen/ARM/ARMInstrThumb.td @@ -0,0 +1,1707 @@ +//===-- ARMInstrThumb.td - Thumb support for ARM -----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Thumb instruction set. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Thumb specific DAG Nodes. +// + +def imm_sr_XFORM: SDNodeXFormgetZExtValue(); + return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), SDLoc(N), MVT::i32); +}]>; +def ThumbSRImmAsmOperand: ImmAsmOperand<1,32> { let Name = "ImmThumbSR"; } +def imm_sr : Operand, PatLeaf<(imm), [{ + uint64_t Imm = N->getZExtValue(); + return Imm > 0 && Imm <= 32; +}], imm_sr_XFORM> { + let PrintMethod = "printThumbSRImm"; + let ParserMatchClass = ThumbSRImmAsmOperand; +} + +def imm0_7_neg : PatLeaf<(i32 imm), [{ + return (uint32_t)-N->getZExtValue() < 8; +}], imm_neg_XFORM>; + +def ThumbModImmNeg1_7AsmOperand : AsmOperandClass { let Name = "ThumbModImmNeg1_7"; } +def mod_imm1_7_neg : Operand, PatLeaf<(imm), [{ + unsigned Value = -(unsigned)N->getZExtValue(); + return 0 < Value && Value < 8; + }], imm_neg_XFORM> { + let ParserMatchClass = ThumbModImmNeg1_7AsmOperand; +} + +def ThumbModImmNeg8_255AsmOperand : AsmOperandClass { let Name = "ThumbModImmNeg8_255"; } +def mod_imm8_255_neg : Operand, PatLeaf<(imm), [{ + unsigned Value = -(unsigned)N->getZExtValue(); + return 7 < Value && Value < 256; + }], imm_neg_XFORM> { + let ParserMatchClass = ThumbModImmNeg8_255AsmOperand; +} + + +def imm0_255_comp : PatLeaf<(i32 imm), [{ + return ~((uint32_t)N->getZExtValue()) < 256; +}]>; + +def imm8_255_neg : PatLeaf<(i32 imm), [{ + unsigned Val = -N->getZExtValue(); + return Val >= 8 && Val < 256; +}], imm_neg_XFORM>; + +// Break imm's up into two pieces: an immediate + a left shift. This uses +// thumb_immshifted to match and thumb_immshifted_val and thumb_immshifted_shamt +// to get the val/shift pieces. +def thumb_immshifted : PatLeaf<(imm), [{ + return ARM_AM::isThumbImmShiftedVal((unsigned)N->getZExtValue()); +}]>; + +def thumb_immshifted_val : SDNodeXFormgetZExtValue()); + return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32); +}]>; + +def thumb_immshifted_shamt : SDNodeXFormgetZExtValue()); + return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32); +}]>; + +def imm256_510 : ImmLeaf= 256 && Imm < 511; +}]>; + +def thumb_imm256_510_addend : SDNodeXFormgetTargetConstant(N->getZExtValue() - 255, SDLoc(N), MVT::i32); +}]>; + +// Scaled 4 immediate. +def t_imm0_1020s4_asmoperand: AsmOperandClass { let Name = "Imm0_1020s4"; } +def t_imm0_1020s4 : Operand { + let PrintMethod = "printThumbS4ImmOperand"; + let ParserMatchClass = t_imm0_1020s4_asmoperand; + let OperandType = "OPERAND_IMMEDIATE"; +} + +def t_imm0_508s4_asmoperand: AsmOperandClass { let Name = "Imm0_508s4"; } +def t_imm0_508s4 : Operand { + let PrintMethod = "printThumbS4ImmOperand"; + let ParserMatchClass = t_imm0_508s4_asmoperand; + let OperandType = "OPERAND_IMMEDIATE"; +} +// Alias use only, so no printer is necessary. +def t_imm0_508s4_neg_asmoperand: AsmOperandClass { let Name = "Imm0_508s4Neg"; } +def t_imm0_508s4_neg : Operand { + let ParserMatchClass = t_imm0_508s4_neg_asmoperand; + let OperandType = "OPERAND_IMMEDIATE"; +} + +// Define Thumb specific addressing modes. + +// unsigned 8-bit, 2-scaled memory offset +class OperandUnsignedOffset_b8s2 : AsmOperandClass { + let Name = "UnsignedOffset_b8s2"; + let PredicateMethod = "isUnsignedOffset<8, 2>"; +} + +def UnsignedOffset_b8s2 : OperandUnsignedOffset_b8s2; + +// thumb style PC relative operand. signed, 8 bits magnitude, +// two bits shift. can be represented as either [pc, #imm], #imm, +// or relocatable expression... +def ThumbMemPC : AsmOperandClass { + let Name = "ThumbMemPC"; +} + +let OperandType = "OPERAND_PCREL" in { +def t_brtarget : Operand { + let EncoderMethod = "getThumbBRTargetOpValue"; + let DecoderMethod = "DecodeThumbBROperand"; +} + +// ADR instruction labels. +def t_adrlabel : Operand { + let EncoderMethod = "getThumbAdrLabelOpValue"; + let PrintMethod = "printAdrLabelOperand<2>"; + let ParserMatchClass = UnsignedOffset_b8s2; +} + + +def thumb_br_target : Operand { + let ParserMatchClass = ThumbBranchTarget; + let EncoderMethod = "getThumbBranchTargetOpValue"; + let OperandType = "OPERAND_PCREL"; +} + +def thumb_bl_target : Operand { + let ParserMatchClass = ThumbBranchTarget; + let EncoderMethod = "getThumbBLTargetOpValue"; + let DecoderMethod = "DecodeThumbBLTargetOperand"; +} + +// Target for BLX *from* thumb mode. +def thumb_blx_target : Operand { + let ParserMatchClass = ARMBranchTarget; + let EncoderMethod = "getThumbBLXTargetOpValue"; + let DecoderMethod = "DecodeThumbBLXOffset"; +} + +def thumb_bcc_target : Operand { + let ParserMatchClass = ThumbBranchTarget; + let EncoderMethod = "getThumbBCCTargetOpValue"; + let DecoderMethod = "DecodeThumbBCCTargetOperand"; +} + +def thumb_cb_target : Operand { + let ParserMatchClass = ThumbBranchTarget; + let EncoderMethod = "getThumbCBTargetOpValue"; + let DecoderMethod = "DecodeThumbCmpBROperand"; +} + +// t_addrmode_pc :=