aboutsummaryrefslogtreecommitdiffstats
path: root/capstone/suite/synctools/tablegen/AArch64
diff options
context:
space:
mode:
Diffstat (limited to 'capstone/suite/synctools/tablegen/AArch64')
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64.td579
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td366
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td426
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td10402
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td6494
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td20
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td1113
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td1024
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64SchedA53.td295
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57.td668
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td544
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td871
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM1.td847
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td860
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td119
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td1292
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td138
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td2378
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td357
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td1880
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64Schedule.td106
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td1332
-rw-r--r--capstone/suite/synctools/tablegen/AArch64/SVEInstrFormats.td4456
23 files changed, 36567 insertions, 0 deletions
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64.td b/capstone/suite/synctools/tablegen/AArch64/AArch64.td
new file mode 100644
index 000000000..a69d38144
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64.td
@@ -0,0 +1,579 @@
+//=- AArch64.td - Describe the AArch64 Target Machine --------*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing.
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// AArch64 Subtarget features.
+//
+
+def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true",
+ "Enable ARMv8 FP">;
+
+def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
+ "Enable Advanced SIMD instructions", [FeatureFPARMv8]>;
+
+def FeatureSM4 : SubtargetFeature<
+ "sm4", "HasSM4", "true",
+ "Enable SM3 and SM4 support", [FeatureNEON]>;
+
+def FeatureSHA2 : SubtargetFeature<
+ "sha2", "HasSHA2", "true",
+ "Enable SHA1 and SHA256 support", [FeatureNEON]>;
+
+def FeatureSHA3 : SubtargetFeature<
+ "sha3", "HasSHA3", "true",
+ "Enable SHA512 and SHA3 support", [FeatureNEON, FeatureSHA2]>;
+
+def FeatureAES : SubtargetFeature<
+ "aes", "HasAES", "true",
+ "Enable AES support", [FeatureNEON]>;
+
+// Crypto has been split up and any combination is now valid (see the
+// crypto defintions above). Also, crypto is now context sensitive:
+// it has a different meaning for e.g. Armv8.4 than it has for Armv8.2.
+// Therefore, we rely on Clang, the user interacing tool, to pass on the
+// appropriate crypto options. But here in the backend, crypto has very little
+// meaning anymore. We kept the Crypto defintion here for backward
+// compatibility, and now imply features SHA2 and AES, which was the
+// "traditional" meaning of Crypto.
+def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
+ "Enable cryptographic instructions", [FeatureNEON, FeatureSHA2, FeatureAES]>;
+
+def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
+ "Enable ARMv8 CRC-32 checksum instructions">;
+
+def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
+ "Enable ARMv8 Reliability, Availability and Serviceability Extensions">;
+
+def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
+ "Enable ARMv8.1 Large System Extension (LSE) atomic instructions">;
+
+def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true",
+ "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">;
+
+def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
+ "Enable ARMv8 PMUv3 Performance Monitors extension">;
+
+def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
+ "Full FP16", [FeatureFPARMv8]>;
+
+def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true",
+ "Enable Statistical Profiling extension">;
+
+def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
+ "Enable Scalable Vector Extension (SVE) instructions">;
+
+/// Cyclone has register move instructions which are "free".
+def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
+ "Has zero-cycle register moves">;
+
+/// Cyclone has instructions which zero registers for "free".
+def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
+ "Has zero-cycle zeroing instructions">;
+
+/// ... but the floating-point version doesn't quite work in rare cases on older
+/// CPUs.
+def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround",
+ "HasZeroCycleZeroingFPWorkaround", "true",
+ "The zero-cycle floating-point zeroing instruction has a bug">;
+
+def FeatureStrictAlign : SubtargetFeature<"strict-align",
+ "StrictAlign", "true",
+ "Disallow all unaligned memory "
+ "access">;
+
+def FeatureReserveX18 : SubtargetFeature<"reserve-x18", "ReserveX18", "true",
+ "Reserve X18, making it unavailable "
+ "as a GPR">;
+
+def FeatureReserveX20 : SubtargetFeature<"reserve-x20", "ReserveX20", "true",
+ "Reserve X20, making it unavailable "
+ "as a GPR">;
+
+def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
+ "Use alias analysis during codegen">;
+
+def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops", "BalanceFPOps",
+ "true",
+ "balance mix of odd and even D-registers for fp multiply(-accumulate) ops">;
+
+def FeaturePredictableSelectIsExpensive : SubtargetFeature<
+ "predictable-select-expensive", "PredictableSelectIsExpensive", "true",
+ "Prefer likely predicted branches over selects">;
+
+def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move",
+ "CustomAsCheapAsMove", "true",
+ "Use custom code for TargetInstrInfo::isAsCheapAsAMove()">;
+
+def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move",
+ "ExynosAsCheapAsMove", "true",
+ "Use Exynos specific code in TargetInstrInfo::isAsCheapAsAMove()",
+ [FeatureCustomCheapAsMoveHandling]>;
+
+def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
+ "UsePostRAScheduler", "true", "Schedule again after register allocation">;
+
+def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
+ "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">;
+
+def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
+ "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">;
+
+def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "STRQroIsSlow",
+ "true", "STR of Q register with register offset is slow">;
+
+def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
+ "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
+ "true", "Use alternative pattern for sextload convert to f32">;
+
+def FeatureArithmeticBccFusion : SubtargetFeature<
+ "arith-bcc-fusion", "HasArithmeticBccFusion", "true",
+ "CPU fuses arithmetic+bcc operations">;
+
+def FeatureArithmeticCbzFusion : SubtargetFeature<
+ "arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
+ "CPU fuses arithmetic + cbz/cbnz operations">;
+
+def FeatureFuseAddress : SubtargetFeature<
+ "fuse-address", "HasFuseAddress", "true",
+ "CPU fuses address generation and memory operations">;
+
+def FeatureFuseAES : SubtargetFeature<
+ "fuse-aes", "HasFuseAES", "true",
+ "CPU fuses AES crypto operations">;
+
+def FeatureFuseCCSelect : SubtargetFeature<
+ "fuse-csel", "HasFuseCCSelect", "true",
+ "CPU fuses conditional select operations">;
+
+def FeatureFuseLiterals : SubtargetFeature<
+ "fuse-literals", "HasFuseLiterals", "true",
+ "CPU fuses literal generation operations">;
+
+def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
+ "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
+ "Disable latency scheduling heuristic">;
+
+def FeatureRCPC : SubtargetFeature<"rcpc", "HasRCPC", "true",
+ "Enable support for RCPC extension">;
+
+def FeatureUseRSqrt : SubtargetFeature<
+ "use-reciprocal-square-root", "UseRSqrt", "true",
+ "Use the reciprocal square root approximation">;
+
+def FeatureDotProd : SubtargetFeature<
+ "dotprod", "HasDotProd", "true",
+ "Enable dot product support">;
+
+def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
+ "NegativeImmediates", "false",
+ "Convert immediates and instructions "
+ "to their negated or complemented "
+ "equivalent when the immediate does "
+ "not fit in the encoding.">;
+
+def FeatureLSLFast : SubtargetFeature<
+ "lsl-fast", "HasLSLFast", "true",
+ "CPU has a fastpath logical shift of up to 3 places">;
+
+def FeatureAggressiveFMA :
+ SubtargetFeature<"aggressive-fma",
+ "HasAggressiveFMA",
+ "true",
+ "Enable Aggressive FMA for floating-point.">;
+
+//===----------------------------------------------------------------------===//
+// Architectures.
+//
+
+def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
+ "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM]>;
+
+def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
+ "Support ARM v8.2a instructions", [HasV8_1aOps, FeatureRAS]>;
+
+def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true",
+ "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC]>;
+
+def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true",
+ "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "AArch64RegisterInfo.td"
+include "AArch64RegisterBanks.td"
+include "AArch64CallingConvention.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "AArch64Schedule.td"
+include "AArch64InstrInfo.td"
+
+def AArch64InstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Named operands for MRS/MSR/TLBI/...
+//===----------------------------------------------------------------------===//
+
+include "AArch64SystemOperands.td"
+
+//===----------------------------------------------------------------------===//
+// AArch64 Processors supported.
+//
+include "AArch64SchedA53.td"
+include "AArch64SchedA57.td"
+include "AArch64SchedCyclone.td"
+include "AArch64SchedFalkor.td"
+include "AArch64SchedKryo.td"
+include "AArch64SchedExynosM1.td"
+include "AArch64SchedExynosM3.td"
+include "AArch64SchedThunderX.td"
+include "AArch64SchedThunderX2T99.td"
+
+def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
+ "Cortex-A35 ARM processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeaturePerfMon
+ ]>;
+
+def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
+ "Cortex-A53 ARM processors", [
+ FeatureBalanceFPOps,
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureCustomCheapAsMoveHandling,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeatureUseAA
+ ]>;
+
+def ProcA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55",
+ "Cortex-A55 ARM processors", [
+ HasV8_2aOps,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureNEON,
+ FeatureFullFP16,
+ FeatureDotProd,
+ FeatureRCPC,
+ FeaturePerfMon
+ ]>;
+
+def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
+ "Cortex-A57 ARM processors", [
+ FeatureBalanceFPOps,
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureCustomCheapAsMoveHandling,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureFuseLiterals,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive
+ ]>;
+
+def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
+ "Cortex-A72 ARM processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureNEON,
+ FeaturePerfMon
+ ]>;
+
+def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
+ "Cortex-A73 ARM processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureNEON,
+ FeaturePerfMon
+ ]>;
+
+def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75",
+ "Cortex-A75 ARM processors", [
+ HasV8_2aOps,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureNEON,
+ FeatureFullFP16,
+ FeatureDotProd,
+ FeatureRCPC,
+ FeaturePerfMon
+ ]>;
+
+// Note that cyclone does not fuse AES instructions, but newer apple chips do
+// perform the fusion and cyclone is used by default when targetting apple OSes.
+def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
+ "Cyclone", [
+ FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureCrypto,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeatureZCRegMove,
+ FeatureZCZeroing,
+ FeatureZCZeroingFPWorkaround
+ ]>;
+
+def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
+ "Samsung Exynos-M1 processors",
+ [FeatureSlowPaired128,
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureExynosCheapAsMoveHandling,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeatureSlowMisaligned128Store,
+ FeatureUseRSqrt,
+ FeatureZCZeroing]>;
+
+def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
+ "Samsung Exynos-M2 processors",
+ [FeatureSlowPaired128,
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureExynosCheapAsMoveHandling,
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeatureSlowMisaligned128Store,
+ FeatureZCZeroing]>;
+
+def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
+ "Samsung Exynos-M3 processors",
+ [FeatureCRC,
+ FeatureCrypto,
+ FeatureExynosCheapAsMoveHandling,
+ FeatureFPARMv8,
+ FeatureFuseAddress,
+ FeatureFuseAES,
+ FeatureFuseCCSelect,
+ FeatureFuseLiterals,
+ FeatureLSLFast,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureZCZeroing]>;
+
+def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
+ "Qualcomm Kryo processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureCustomCheapAsMoveHandling,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureZCZeroing,
+ FeatureLSLFast
+ ]>;
+
+def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
+ "Qualcomm Falkor processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureCustomCheapAsMoveHandling,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureRDM,
+ FeatureZCZeroing,
+ FeatureLSLFast,
+ FeatureSlowSTRQro
+ ]>;
+
+def ProcSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
+ "Qualcomm Saphira processors", [
+ FeatureCrypto,
+ FeatureCustomCheapAsMoveHandling,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureSPE,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureZCZeroing,
+ FeatureLSLFast,
+ HasV8_3aOps]>;
+
+def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily",
+ "ThunderX2T99",
+ "Cavium ThunderX2 processors", [
+ FeatureAggressiveFMA,
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureArithmeticBccFusion,
+ FeatureNEON,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureLSE,
+ HasV8_1aOps]>;
+
+def ProcThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX",
+ "Cavium ThunderX processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureNEON]>;
+
+def ProcThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily",
+ "ThunderXT88",
+ "Cavium ThunderX processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureNEON]>;
+
+def ProcThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily",
+ "ThunderXT81",
+ "Cavium ThunderX processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureNEON]>;
+
+def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
+ "ThunderXT83",
+ "Cavium ThunderX processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureNEON]>;
+
+def : ProcessorModel<"generic", NoSchedModel, [
+ FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeaturePostRAScheduler
+ ]>;
+
+// FIXME: Cortex-A35 and Cortex-A55 are currently modeled as a Cortex-A53.
+def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
+def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
+def : ProcessorModel<"cortex-a55", CortexA53Model, [ProcA55]>;
+def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
+// FIXME: Cortex-A72, Cortex-A73 and Cortex-A75 are currently modeled as a Cortex-A57.
+def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA72]>;
+def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>;
+def : ProcessorModel<"cortex-a75", CortexA57Model, [ProcA75]>;
+def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
+def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>;
+def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>;
+def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>;
+def : ProcessorModel<"exynos-m4", ExynosM3Model, [ProcExynosM3]>;
+def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
+def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>;
+def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
+// Cavium ThunderX/ThunderX T8X Processors
+def : ProcessorModel<"thunderx", ThunderXT8XModel, [ProcThunderX]>;
+def : ProcessorModel<"thunderxt88", ThunderXT8XModel, [ProcThunderXT88]>;
+def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>;
+def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>;
+// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan.
+def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
+
+//===----------------------------------------------------------------------===//
+// Assembly parser
+//===----------------------------------------------------------------------===//
+
+def GenericAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+ string Name = "generic";
+ string BreakCharacters = ".";
+ string TokenizingCharacters = "[]*!/";
+}
+
+def AppleAsmParserVariant : AsmParserVariant {
+ int Variant = 1;
+ string Name = "apple-neon";
+ string BreakCharacters = ".";
+ string TokenizingCharacters = "[]*!/";
+}
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+// AArch64 Uses the MC printer for asm output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def GenericAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ int PassSubtarget = 1;
+ int Variant = 0;
+ bit isMCAsmWriter = 1;
+}
+
+def AppleAsmWriter : AsmWriter {
+ let AsmWriterClassName = "AppleInstPrinter";
+ int PassSubtarget = 1;
+ int Variant = 1;
+ int isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def AArch64 : Target {
+ let InstructionSet = AArch64InstrInfo;
+ let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant];
+ let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter];
+ let AllowRegisterRenaming = 1;
+}
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td b/capstone/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td
new file mode 100644
index 000000000..30492003d
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td
@@ -0,0 +1,366 @@
+//=- AArch64CallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for AArch64 architecture.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfAlign - Match of the original alignment of the arg
+class CCIfAlign<string Align, CCAction A> :
+ CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
+/// CCIfBigEndian - Match only if we're in big endian mode.
+class CCIfBigEndian<CCAction A> :
+ CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS64 Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_AArch64_AAPCS : CallingConv<[
+ CCIfType<[iPTR], CCBitConvertToType<i64>>,
+ CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
+ CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
+
+ // Big endian vectors must be passed as if they were 1-element vectors so that
+ // their lanes are in a consistent order.
+ CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8],
+ CCBitConvertToType<f64>>>,
+ CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8],
+ CCBitConvertToType<f128>>>,
+
+ // An SRet is passed in X8, not X0 like a normal pointer parameter.
+ CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
+
+ // Put ByVal arguments directly on the stack. Minimum size and alignment of a
+ // slot is 64-bit.
+ CCIfByVal<CCPassByVal<8, 8>>,
+
+ // The 'nest' parameter, if any, is passed in X18.
+ // Darwin uses X18 as the platform register and hence 'nest' isn't currently
+ // supported there.
+ CCIfNest<CCAssignToReg<[X18]>>,
+
+ // Pass SwiftSelf in a callee saved register.
+ CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
+
+ // A SwiftError is passed in X21.
+ CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
+
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
+
+ // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
+ // up to eight each of GPR and FPR.
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
+ [X0, X1, X2, X3, X4, X5, X6, X7]>>,
+ // i128 is split to two i64s, we can't fit half to register X7.
+ CCIfType<[i64], CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6],
+ [X0, X1, X3, X5]>>>,
+
+ // i128 is split to two i64s, and its stack alignment is 16 bytes.
+ CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,
+
+ CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
+ [W0, W1, W2, W3, W4, W5, W6, W7]>>,
+ CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
+ CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+
+ // If more than will fit in registers, pass them on the stack instead.
+ CCIfType<[i1, i8, i16, f16], CCAssignToStack<8, 8>>,
+ CCIfType<[i32, f32], CCAssignToStack<8, 8>>,
+ CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
+ CCAssignToStack<8, 8>>,
+ CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCAssignToStack<16, 16>>
+]>;
+
+def RetCC_AArch64_AAPCS : CallingConv<[
+ CCIfType<[iPTR], CCBitConvertToType<i64>>,
+ CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
+ CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
+
+ CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
+
+ // Big endian vectors must be passed as if they were 1-element vectors so that
+ // their lanes are in a consistent order.
+ CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8],
+ CCBitConvertToType<f64>>>,
+ CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8],
+ CCBitConvertToType<f128>>>,
+
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
+ [X0, X1, X2, X3, X4, X5, X6, X7]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
+ [W0, W1, W2, W3, W4, W5, W6, W7]>>,
+ CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
+ CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
+]>;
+
+// Vararg functions on windows pass floats in integer registers
+def CC_AArch64_Win64_VarArg : CallingConv<[
+ CCIfType<[f16, f32], CCPromoteToType<f64>>,
+ CCIfType<[f64], CCBitConvertToType<i64>>,
+ CCDelegateTo<CC_AArch64_AAPCS>
+]>;
+
+
+// Darwin uses a calling convention which differs in only two ways
+// from the standard one at this level:
+// + i128s (i.e. split i64s) don't need even registers.
+// + Stack slots are sized as needed rather than being at least 64-bit.
+def CC_AArch64_DarwinPCS : CallingConv<[
+ CCIfType<[iPTR], CCBitConvertToType<i64>>,
+ CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
+ CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
+
+ // An SRet is passed in X8, not X0 like a normal pointer parameter.
+ CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
+
+ // Put ByVal arguments directly on the stack. Minimum size and alignment of a
+ // slot is 64-bit.
+ CCIfByVal<CCPassByVal<8, 8>>,
+
+ // Pass SwiftSelf in a callee saved register.
+ CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
+
+ // A SwiftError is passed in X21.
+ CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
+
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
+
+ // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
+ // up to eight each of GPR and FPR.
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
+ [X0, X1, X2, X3, X4, X5, X6, X7]>>,
+ // i128 is split to two i64s, we can't fit half to register X7.
+ CCIfType<[i64],
+ CCIfSplit<CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6],
+ [W0, W1, W2, W3, W4, W5, W6]>>>,
+ // i128 is split to two i64s, and its stack alignment is 16 bytes.
+ CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,
+
+ CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
+ [W0, W1, W2, W3, W4, W5, W6, W7]>>,
+ CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
+ CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+
+ // If more than will fit in registers, pass them on the stack instead.
+ CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>,
+ CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>,
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+ CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
+ CCAssignToStack<8, 8>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCAssignToStack<16, 16>>
+]>;
+
+def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
+ CCIfType<[iPTR], CCBitConvertToType<i64>>,
+ CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
+ CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
+
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Stack_Block">>,
+
+ // Handle all scalar types as either i64 or f64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+ CCIfType<[f16, f32], CCPromoteToType<f64>>,
+
+ // Everything is on the stack.
+ // i128 is split to two i64s, and its stack alignment is 16 bytes.
+ CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
+ CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
+ CCAssignToStack<8, 8>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCAssignToStack<16, 16>>
+]>;
+
+// The WebKit_JS calling convention only passes the first argument (the callee)
+// in register and the remaining arguments on stack. We allow 32bit stack slots,
+// so that WebKit can write partial values in the stack and define the other
+// 32bit quantity as undef.
+def CC_AArch64_WebKit_JS : CallingConv<[
+ // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>,
+
+ // Pass the remaining arguments on the stack instead.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+ CCIfType<[i64, f64], CCAssignToStack<8, 8>>
+]>;
+
+def RetCC_AArch64_WebKit_JS : CallingConv<[
+ CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
+ [X0, X1, X2, X3, X4, X5, X6, X7]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
+ [W0, W1, W2, W3, W4, W5, W6, W7]>>,
+ CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM64 Calling Convention for GHC
+//===----------------------------------------------------------------------===//
+
+// This calling convention is specific to the Glasgow Haskell Compiler.
+// The only documentation is the GHC source code, specifically the C header
+// file:
+//
+// https://github.com/ghc/ghc/blob/master/includes/stg/MachRegs.h
+//
+// which defines the registers for the Spineless Tagless G-Machine (STG) that
+// GHC uses to implement lazy evaluation. The generic STG machine has a set of
+// registers which are mapped to appropriate set of architecture specific
+// registers for each CPU architecture.
+//
+// The STG Machine is documented here:
+//
+// https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/GeneratedCode
+//
+// The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI
+// register mapping".
+
+def CC_AArch64_GHC : CallingConv<[
+ CCIfType<[iPTR], CCBitConvertToType<i64>>,
+
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
+ CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>,
+ CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>,
+
+ // Promote i8/i16/i32 arguments to i64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
+ CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>>
+]>;
+
+// FIXME: LR is only callee-saved in the sense that *we* preserve it and are
+// presumably a callee to someone. External functions may not do so, but this
+// is currently safe since BL has LR as an implicit-def and what happens after a
+// tail call doesn't matter.
+//
+// It would be better to model its preservation semantics properly (create a
+// vreg on entry, use it in RET & tail call generation; make that vreg def if we
+// end up saving LR as part of a call frame). Watch this space...
+def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
+ X23, X24, X25, X26, X27, X28,
+ D8, D9, D10, D11,
+ D12, D13, D14, D15)>;
+
+// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
+// 'this' and the pointer return value are both passed in X0 in these cases,
+// this can be partially modelled by treating X0 as a callee-saved register;
+// only the resulting RegMask is used; the SaveList is ignored
+//
+// (For generic ARM 64-bit ABI code, clang will not generate constructors or
+// destructors with 'this' returns, so this RegMask will not be used in that
+// case)
+def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>;
+
+def CSR_AArch64_AAPCS_SwiftError
+ : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>;
+
+// The function used by Darwin to obtain the address of a thread-local variable
+// guarantees more than a normal AAPCS function. x16 and x17 are used on the
+// fast path for calculation, but other registers except X0 (argument/return)
+// and LR (it is a call, after all) are preserved.
+def CSR_AArch64_TLS_Darwin
+ : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17),
+ FP,
+ (sequence "Q%u", 0, 31))>;
+
+// We can only handle a register pair with adjacent registers, the register pair
+// should belong to the same class as well. Since the access function on the
+// fast path calls a function that follows CSR_AArch64_TLS_Darwin,
+// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin.
+def CSR_AArch64_CXX_TLS_Darwin
+ : CalleeSavedRegs<(add CSR_AArch64_AAPCS,
+ (sub (sequence "X%u", 1, 28), X15, X16, X17, X18),
+ (sequence "D%u", 0, 31))>;
+
+// CSRs that are handled by prologue, epilogue.
+def CSR_AArch64_CXX_TLS_Darwin_PE
+ : CalleeSavedRegs<(add LR, FP)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_AArch64_CXX_TLS_Darwin_ViaCopy
+ : CalleeSavedRegs<(sub CSR_AArch64_CXX_TLS_Darwin, LR, FP)>;
+
+// The ELF stub used for TLS-descriptor access saves every feasible
+// register. Only X0 and LR are clobbered.
+def CSR_AArch64_TLS_ELF
+ : CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP,
+ (sequence "Q%u", 0, 31))>;
+
+def CSR_AArch64_AllRegs
+ : CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP,
+ (sequence "X%u", 0, 28), FP, LR, SP,
+ (sequence "B%u", 0, 31), (sequence "H%u", 0, 31),
+ (sequence "S%u", 0, 31), (sequence "D%u", 0, 31),
+ (sequence "Q%u", 0, 31))>;
+
+def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>;
+
+def CSR_AArch64_RT_MostRegs : CalleeSavedRegs<(add CSR_AArch64_AAPCS,
+ (sequence "X%u", 9, 15))>;
+
+def CSR_AArch64_StackProbe_Windows
+ : CalleeSavedRegs<(add (sequence "X%u", 0, 15),
+ (sequence "X%u", 18, 28), FP, SP,
+ (sequence "Q%u", 0, 31))>;
+
+// Variants of the standard calling conventions for shadow call stack.
+// These all preserve x18 in addition to any other registers.
+def CSR_AArch64_NoRegs_SCS
+ : CalleeSavedRegs<(add CSR_AArch64_NoRegs, X18)>;
+def CSR_AArch64_AllRegs_SCS
+ : CalleeSavedRegs<(add CSR_AArch64_AllRegs, X18)>;
+def CSR_AArch64_CXX_TLS_Darwin_SCS
+ : CalleeSavedRegs<(add CSR_AArch64_CXX_TLS_Darwin, X18)>;
+def CSR_AArch64_AAPCS_SwiftError_SCS
+ : CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>;
+def CSR_AArch64_RT_MostRegs_SCS
+ : CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>;
+def CSR_AArch64_AAPCS_SCS
+ : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>;
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td
new file mode 100644
index 000000000..35cd7735c
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td
@@ -0,0 +1,426 @@
+//=- AArch64InstrAtomics.td - AArch64 Atomic codegen support -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Atomic operand code-gen constructs.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------
+// Atomic fences
+//===----------------------------------
+let AddedComplexity = 15, Size = 0 in
+def CompilerBarrier : Pseudo<(outs), (ins i32imm:$ordering),
+ [(atomic_fence imm:$ordering, 0)]>, Sched<[]>;
+def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>;
+def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>;
+
+//===----------------------------------
+// Atomic loads
+//===----------------------------------
+
+// When they're actually atomic, only one addressing mode (GPR64sp) is
+// supported, but when they're relaxed and anything can be used, all the
+// standard modes would be valid and may give efficiency gains.
+
+// A atomic load operation that actually needs acquire semantics.
+class acquiring_load<PatFrag base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr)> {
+ let IsAtomic = 1;
+ let IsAtomicOrderingAcquireOrStronger = 1;
+}
+
+// An atomic load operation that does not need either acquire or release
+// semantics.
+class relaxed_load<PatFrag base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr)> {
+ let IsAtomic = 1;
+ let IsAtomicOrderingAcquireOrStronger = 0;
+}
+
+// 8-bit loads
+def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
+def : Pat<(relaxed_load<atomic_load_8> (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend8:$offset)),
+ (LDRBBroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)>;
+def : Pat<(relaxed_load<atomic_load_8> (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend8:$offset)),
+ (LDRBBroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$offset)>;
+def : Pat<(relaxed_load<atomic_load_8> (am_indexed8 GPR64sp:$Rn,
+ uimm12s1:$offset)),
+ (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
+def : Pat<(relaxed_load<atomic_load_8>
+ (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
+ (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
+
+// 16-bit loads
+def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
+def : Pat<(relaxed_load<atomic_load_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)),
+ (LDRHHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>;
+def : Pat<(relaxed_load<atomic_load_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)),
+ (LDRHHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>;
+def : Pat<(relaxed_load<atomic_load_16> (am_indexed16 GPR64sp:$Rn,
+ uimm12s2:$offset)),
+ (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
+def : Pat<(relaxed_load<atomic_load_16>
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
+ (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
+
+// 32-bit loads
+def : Pat<(acquiring_load<atomic_load_32> GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>;
+def : Pat<(relaxed_load<atomic_load_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend32:$extend)),
+ (LDRWroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
+def : Pat<(relaxed_load<atomic_load_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend32:$extend)),
+ (LDRWroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
+def : Pat<(relaxed_load<atomic_load_32> (am_indexed32 GPR64sp:$Rn,
+ uimm12s4:$offset)),
+ (LDRWui GPR64sp:$Rn, uimm12s4:$offset)>;
+def : Pat<(relaxed_load<atomic_load_32>
+ (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
+ (LDURWi GPR64sp:$Rn, simm9:$offset)>;
+
+// 64-bit loads
+def : Pat<(acquiring_load<atomic_load_64> GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>;
+def : Pat<(relaxed_load<atomic_load_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend64:$extend)),
+ (LDRXroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
+def : Pat<(relaxed_load<atomic_load_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend64:$extend)),
+ (LDRXroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
+def : Pat<(relaxed_load<atomic_load_64> (am_indexed64 GPR64sp:$Rn,
+ uimm12s8:$offset)),
+ (LDRXui GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(relaxed_load<atomic_load_64>
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (LDURXi GPR64sp:$Rn, simm9:$offset)>;
+
+//===----------------------------------
+// Atomic stores
+//===----------------------------------
+
+// When they're actually atomic, only one addressing mode (GPR64sp) is
+// supported, but when they're relaxed and anything can be used, all the
+// standard modes would be valid and may give efficiency gains.
+
+// A store operation that actually needs release semantics.
+class releasing_store<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> {
+ let IsAtomic = 1;
+ let IsAtomicOrderingReleaseOrStronger = 1;
+}
+
+// An atomic store operation that doesn't actually need to be atomic on AArch64.
+class relaxed_store<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> {
+ let IsAtomic = 1;
+ let IsAtomicOrderingReleaseOrStronger = 0;
+}
+
+// 8-bit stores
+def : Pat<(releasing_store<atomic_store_8> GPR64sp:$ptr, GPR32:$val),
+ (STLRB GPR32:$val, GPR64sp:$ptr)>;
+def : Pat<(relaxed_store<atomic_store_8>
+ (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend),
+ GPR32:$val),
+ (STRBBroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend)>;
+def : Pat<(relaxed_store<atomic_store_8>
+ (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend),
+ GPR32:$val),
+ (STRBBroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend)>;
+def : Pat<(relaxed_store<atomic_store_8>
+ (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), GPR32:$val),
+ (STRBBui GPR32:$val, GPR64sp:$Rn, uimm12s1:$offset)>;
+def : Pat<(relaxed_store<atomic_store_8>
+ (am_unscaled8 GPR64sp:$Rn, simm9:$offset), GPR32:$val),
+ (STURBBi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>;
+
+// 16-bit stores
+def : Pat<(releasing_store<atomic_store_16> GPR64sp:$ptr, GPR32:$val),
+ (STLRH GPR32:$val, GPR64sp:$ptr)>;
+def : Pat<(relaxed_store<atomic_store_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend),
+ GPR32:$val),
+ (STRHHroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>;
+def : Pat<(relaxed_store<atomic_store_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend),
+ GPR32:$val),
+ (STRHHroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>;
+def : Pat<(relaxed_store<atomic_store_16>
+ (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), GPR32:$val),
+ (STRHHui GPR32:$val, GPR64sp:$Rn, uimm12s2:$offset)>;
+def : Pat<(relaxed_store<atomic_store_16>
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset), GPR32:$val),
+ (STURHHi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>;
+
+// 32-bit stores
+def : Pat<(releasing_store<atomic_store_32> GPR64sp:$ptr, GPR32:$val),
+ (STLRW GPR32:$val, GPR64sp:$ptr)>;
+def : Pat<(relaxed_store<atomic_store_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend32:$extend),
+ GPR32:$val),
+ (STRWroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
+def : Pat<(relaxed_store<atomic_store_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend32:$extend),
+ GPR32:$val),
+ (STRWroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
+def : Pat<(relaxed_store<atomic_store_32>
+ (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), GPR32:$val),
+ (STRWui GPR32:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
+def : Pat<(relaxed_store<atomic_store_32>
+ (am_unscaled32 GPR64sp:$Rn, simm9:$offset), GPR32:$val),
+ (STURWi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>;
+
+// 64-bit stores
+def : Pat<(releasing_store<atomic_store_64> GPR64sp:$ptr, GPR64:$val),
+ (STLRX GPR64:$val, GPR64sp:$ptr)>;
+def : Pat<(relaxed_store<atomic_store_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend),
+ GPR64:$val),
+ (STRXroW GPR64:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
+def : Pat<(relaxed_store<atomic_store_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend),
+ GPR64:$val),
+ (STRXroX GPR64:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
+def : Pat<(relaxed_store<atomic_store_64>
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset), GPR64:$val),
+ (STRXui GPR64:$val, GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(relaxed_store<atomic_store_64>
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val),
+ (STURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>;
+
+//===----------------------------------
+// Low-level exclusive operations
+//===----------------------------------
+
+// Load-exclusives.
+
+def ldxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def ldxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def ldxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+def ldxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+def : Pat<(ldxr_1 GPR64sp:$addr),
+ (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>;
+def : Pat<(ldxr_2 GPR64sp:$addr),
+ (SUBREG_TO_REG (i64 0), (LDXRH GPR64sp:$addr), sub_32)>;
+def : Pat<(ldxr_4 GPR64sp:$addr),
+ (SUBREG_TO_REG (i64 0), (LDXRW GPR64sp:$addr), sub_32)>;
+def : Pat<(ldxr_8 GPR64sp:$addr), (LDXRX GPR64sp:$addr)>;
+
+def : Pat<(and (ldxr_1 GPR64sp:$addr), 0xff),
+ (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>;
+def : Pat<(and (ldxr_2 GPR64sp:$addr), 0xffff),
+ (SUBREG_TO_REG (i64 0), (LDXRH GPR64sp:$addr), sub_32)>;
+def : Pat<(and (ldxr_4 GPR64sp:$addr), 0xffffffff),
+ (SUBREG_TO_REG (i64 0), (LDXRW GPR64sp:$addr), sub_32)>;
+
+// Load-exclusives.
+
+def ldaxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def ldaxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def ldaxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+def ldaxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+def : Pat<(ldaxr_1 GPR64sp:$addr),
+ (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>;
+def : Pat<(ldaxr_2 GPR64sp:$addr),
+ (SUBREG_TO_REG (i64 0), (LDAXRH GPR64sp:$addr), sub_32)>;
+def : Pat<(ldaxr_4 GPR64sp:$addr),
+ (SUBREG_TO_REG (i64 0), (LDAXRW GPR64sp:$addr), sub_32)>;
+def : Pat<(ldaxr_8 GPR64sp:$addr), (LDAXRX GPR64sp:$addr)>;
+
+def : Pat<(and (ldaxr_1 GPR64sp:$addr), 0xff),
+ (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>;
+def : Pat<(and (ldaxr_2 GPR64sp:$addr), 0xffff),
+ (SUBREG_TO_REG (i64 0), (LDAXRH GPR64sp:$addr), sub_32)>;
+def : Pat<(and (ldaxr_4 GPR64sp:$addr), 0xffffffff),
+ (SUBREG_TO_REG (i64 0), (LDAXRW GPR64sp:$addr), sub_32)>;
+
+// Store-exclusives.
+
+def stxr_1 : PatFrag<(ops node:$val, node:$ptr),
+ (int_aarch64_stxr node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def stxr_2 : PatFrag<(ops node:$val, node:$ptr),
+ (int_aarch64_stxr node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def stxr_4 : PatFrag<(ops node:$val, node:$ptr),
+ (int_aarch64_stxr node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+def stxr_8 : PatFrag<(ops node:$val, node:$ptr),
+ (int_aarch64_stxr node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+
+def : Pat<(stxr_1 GPR64:$val, GPR64sp:$addr),
+ (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stxr_2 GPR64:$val, GPR64sp:$addr),
+ (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stxr_4 GPR64:$val, GPR64sp:$addr),
+ (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stxr_8 GPR64:$val, GPR64sp:$addr),
+ (STXRX GPR64:$val, GPR64sp:$addr)>;
+
+def : Pat<(stxr_1 (zext (and GPR32:$val, 0xff)), GPR64sp:$addr),
+ (STXRB GPR32:$val, GPR64sp:$addr)>;
+def : Pat<(stxr_2 (zext (and GPR32:$val, 0xffff)), GPR64sp:$addr),
+ (STXRH GPR32:$val, GPR64sp:$addr)>;
+def : Pat<(stxr_4 (zext GPR32:$val), GPR64sp:$addr),
+ (STXRW GPR32:$val, GPR64sp:$addr)>;
+
+def : Pat<(stxr_1 (and GPR64:$val, 0xff), GPR64sp:$addr),
+ (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stxr_2 (and GPR64:$val, 0xffff), GPR64sp:$addr),
+ (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr),
+ (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+
+// Store-release-exclusives.
+
+def stlxr_1 : PatFrag<(ops node:$val, node:$ptr),
+ (int_aarch64_stlxr node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def stlxr_2 : PatFrag<(ops node:$val, node:$ptr),
+ (int_aarch64_stlxr node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def stlxr_4 : PatFrag<(ops node:$val, node:$ptr),
+ (int_aarch64_stlxr node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+def stlxr_8 : PatFrag<(ops node:$val, node:$ptr),
+ (int_aarch64_stlxr node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+
+def : Pat<(stlxr_1 GPR64:$val, GPR64sp:$addr),
+ (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stlxr_2 GPR64:$val, GPR64sp:$addr),
+ (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stlxr_4 GPR64:$val, GPR64sp:$addr),
+ (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stlxr_8 GPR64:$val, GPR64sp:$addr),
+ (STLXRX GPR64:$val, GPR64sp:$addr)>;
+
+def : Pat<(stlxr_1 (zext (and GPR32:$val, 0xff)), GPR64sp:$addr),
+ (STLXRB GPR32:$val, GPR64sp:$addr)>;
+def : Pat<(stlxr_2 (zext (and GPR32:$val, 0xffff)), GPR64sp:$addr),
+ (STLXRH GPR32:$val, GPR64sp:$addr)>;
+def : Pat<(stlxr_4 (zext GPR32:$val), GPR64sp:$addr),
+ (STLXRW GPR32:$val, GPR64sp:$addr)>;
+
+def : Pat<(stlxr_1 (and GPR64:$val, 0xff), GPR64sp:$addr),
+ (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stlxr_2 (and GPR64:$val, 0xffff), GPR64sp:$addr),
+ (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stlxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr),
+ (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+
+
+// And clear exclusive.
+
+def : Pat<(int_aarch64_clrex), (CLREX 0xf)>;
+
+//===----------------------------------
+// Atomic cmpxchg for -O0
+//===----------------------------------
+
+// The fast register allocator used during -O0 inserts spills to cover any VRegs
+// live across basic block boundaries. When this happens between an LDXR and an
+// STXR it can clear the exclusive monitor, causing all cmpxchg attempts to
+// fail.
+
+// Unfortunately, this means we have to have an alternative (expanded
+// post-regalloc) path for -O0 compilations. Fortunately this path can be
+// significantly more naive than the standard expansion: we conservatively
+// assume seq_cst, strong cmpxchg and omit clrex on failure.
+
+let Constraints = "@earlyclobber $Rd,@earlyclobber $scratch",
+ mayLoad = 1, mayStore = 1 in {
+def CMP_SWAP_8 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch),
+ (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>,
+ Sched<[WriteAtomic]>;
+
+def CMP_SWAP_16 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch),
+ (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>,
+ Sched<[WriteAtomic]>;
+
+def CMP_SWAP_32 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch),
+ (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>,
+ Sched<[WriteAtomic]>;
+
+def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$scratch),
+ (ins GPR64:$addr, GPR64:$desired, GPR64:$new), []>,
+ Sched<[WriteAtomic]>;
+}
+
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $scratch",
+ mayLoad = 1, mayStore = 1 in
+def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$scratch),
+ (ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi,
+ GPR64:$newLo, GPR64:$newHi), []>,
+ Sched<[WriteAtomic]>;
+
+// v8.1 Atomic instructions:
+let Predicates = [HasLSE] in {
+ defm : LDOPregister_patterns<"LDADD", "atomic_load_add">;
+ defm : LDOPregister_patterns<"LDSET", "atomic_load_or">;
+ defm : LDOPregister_patterns<"LDEOR", "atomic_load_xor">;
+ defm : LDOPregister_patterns<"LDCLR", "atomic_load_clr">;
+ defm : LDOPregister_patterns<"LDSMAX", "atomic_load_max">;
+ defm : LDOPregister_patterns<"LDSMIN", "atomic_load_min">;
+ defm : LDOPregister_patterns<"LDUMAX", "atomic_load_umax">;
+ defm : LDOPregister_patterns<"LDUMIN", "atomic_load_umin">;
+ defm : LDOPregister_patterns<"SWP", "atomic_swap">;
+ defm : CASregister_patterns<"CAS", "atomic_cmp_swap">;
+
+ // These two patterns are only needed for global isel, selection dag isel
+ // converts atomic load-sub into a sub and atomic load-add, and likewise for
+ // and -> clr.
+ defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">;
+ defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">;
+}
+
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td
new file mode 100644
index 000000000..7caf32dbd
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td
@@ -0,0 +1,10402 @@
+//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe AArch64 instructions format here
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<2> val> {
+ bits<2> Value = val;
+}
+
+def PseudoFrm : Format<0>;
+def NormalFrm : Format<1>; // Do we need any others?
+
+// AArch64 Instruction Format
+class AArch64Inst<Format f, string cstr> : Instruction {
+ field bits<32> Inst; // Instruction encoding.
+ // Mask of bits that cause an encoding to be UNPREDICTABLE.
+ // If a bit is set, then if the corresponding bit in the
+ // target encoding differs from its value in the "Inst" field,
+ // the instruction is UNPREDICTABLE (SoftFail in abstract parlance).
+ field bits<32> Unpredictable = 0;
+ // SoftFail is the generic name for this field, but we alias it so
+ // as to make it more obvious what it means in ARM-land.
+ field bits<32> SoftFail = Unpredictable;
+ let Namespace = "AArch64";
+ Format F = f;
+ bits<2> Form = F.Value;
+ let Pattern = [];
+ let Constraints = cstr;
+}
+
+class InstSubst<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>, Requires<[UseNegativeImmediates]>;
+
+// Pseudo instructions (don't have encoding information)
+class Pseudo<dag oops, dag iops, list<dag> pattern, string cstr = "">
+ : AArch64Inst<PseudoFrm, cstr> {
+ dag OutOperandList = oops;
+ dag InOperandList = iops;
+ let Pattern = pattern;
+ let isCodeGenOnly = 1;
+}
+
+// Real instructions (have encoding information)
+class EncodedI<string cstr, list<dag> pattern> : AArch64Inst<NormalFrm, cstr> {
+ let Pattern = pattern;
+ let Size = 4;
+}
+
+// Enum describing whether an instruction is
+// destructive in its first source operand.
+class DestructiveInstTypeEnum<bits<1> val> {
+ bits<1> Value = val;
+}
+def NotDestructive : DestructiveInstTypeEnum<0>;
+def Destructive : DestructiveInstTypeEnum<1>;
+
+// Normal instructions
+class I<dag oops, dag iops, string asm, string operands, string cstr,
+ list<dag> pattern>
+ : EncodedI<cstr, pattern> {
+ dag OutOperandList = oops;
+ dag InOperandList = iops;
+ let AsmString = !strconcat(asm, operands);
+
+ // Destructive operations (SVE)
+ DestructiveInstTypeEnum DestructiveInstType = NotDestructive;
+ ElementSizeEnum ElementSize = ElementSizeB;
+
+ let TSFlags{3} = DestructiveInstType.Value;
+ let TSFlags{2-0} = ElementSize.Value;
+}
+
+class TriOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>;
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class UnOpFrag<dag res> : PatFrag<(ops node:$LHS), res>;
+
+// Helper fragment for an extract of the high portion of a 128-bit vector.
+def extract_high_v16i8 :
+ UnOpFrag<(extract_subvector (v16i8 node:$LHS), (i64 8))>;
+def extract_high_v8i16 :
+ UnOpFrag<(extract_subvector (v8i16 node:$LHS), (i64 4))>;
+def extract_high_v4i32 :
+ UnOpFrag<(extract_subvector (v4i32 node:$LHS), (i64 2))>;
+def extract_high_v2i64 :
+ UnOpFrag<(extract_subvector (v2i64 node:$LHS), (i64 1))>;
+
+//===----------------------------------------------------------------------===//
+// Asm Operand Classes.
+//
+
+// Shifter operand for arithmetic shifted encodings.
+def ShifterOperand : AsmOperandClass {
+ let Name = "Shifter";
+}
+
+// Shifter operand for mov immediate encodings.
+def MovImm32ShifterOperand : AsmOperandClass {
+ let SuperClasses = [ShifterOperand];
+ let Name = "MovImm32Shifter";
+ let RenderMethod = "addShifterOperands";
+ let DiagnosticType = "InvalidMovImm32Shift";
+}
+def MovImm64ShifterOperand : AsmOperandClass {
+ let SuperClasses = [ShifterOperand];
+ let Name = "MovImm64Shifter";
+ let RenderMethod = "addShifterOperands";
+ let DiagnosticType = "InvalidMovImm64Shift";
+}
+
+// Shifter operand for arithmetic register shifted encodings.
+class ArithmeticShifterOperand<int width> : AsmOperandClass {
+ let SuperClasses = [ShifterOperand];
+ let Name = "ArithmeticShifter" # width;
+ let PredicateMethod = "isArithmeticShifter<" # width # ">";
+ let RenderMethod = "addShifterOperands";
+ let DiagnosticType = "AddSubRegShift" # width;
+}
+
+def ArithmeticShifterOperand32 : ArithmeticShifterOperand<32>;
+def ArithmeticShifterOperand64 : ArithmeticShifterOperand<64>;
+
+// Shifter operand for logical register shifted encodings.
+class LogicalShifterOperand<int width> : AsmOperandClass {
+ let SuperClasses = [ShifterOperand];
+ let Name = "LogicalShifter" # width;
+ let PredicateMethod = "isLogicalShifter<" # width # ">";
+ let RenderMethod = "addShifterOperands";
+ let DiagnosticType = "AddSubRegShift" # width;
+}
+
+def LogicalShifterOperand32 : LogicalShifterOperand<32>;
+def LogicalShifterOperand64 : LogicalShifterOperand<64>;
+
+// Shifter operand for logical vector 128/64-bit shifted encodings.
+def LogicalVecShifterOperand : AsmOperandClass {
+ let SuperClasses = [ShifterOperand];
+ let Name = "LogicalVecShifter";
+ let RenderMethod = "addShifterOperands";
+}
+def LogicalVecHalfWordShifterOperand : AsmOperandClass {
+ let SuperClasses = [LogicalVecShifterOperand];
+ let Name = "LogicalVecHalfWordShifter";
+ let RenderMethod = "addShifterOperands";
+}
+
+// The "MSL" shifter on the vector MOVI instruction.
+def MoveVecShifterOperand : AsmOperandClass {
+ let SuperClasses = [ShifterOperand];
+ let Name = "MoveVecShifter";
+ let RenderMethod = "addShifterOperands";
+}
+
+// Extend operand for arithmetic encodings.
+def ExtendOperand : AsmOperandClass {
+ let Name = "Extend";
+ let DiagnosticType = "AddSubRegExtendLarge";
+}
+def ExtendOperand64 : AsmOperandClass {
+ let SuperClasses = [ExtendOperand];
+ let Name = "Extend64";
+ let DiagnosticType = "AddSubRegExtendSmall";
+}
+// 'extend' that's a lsl of a 64-bit register.
+def ExtendOperandLSL64 : AsmOperandClass {
+ let SuperClasses = [ExtendOperand];
+ let Name = "ExtendLSL64";
+ let RenderMethod = "addExtend64Operands";
+ let DiagnosticType = "AddSubRegExtendLarge";
+}
+
+// 8-bit floating-point immediate encodings.
+def FPImmOperand : AsmOperandClass {
+ let Name = "FPImm";
+ let ParserMethod = "tryParseFPImm<true>";
+ let DiagnosticType = "InvalidFPImm";
+}
+
+def CondCode : AsmOperandClass {
+ let Name = "CondCode";
+ let DiagnosticType = "InvalidCondCode";
+}
+
+// A 32-bit register pasrsed as 64-bit
+def GPR32as64Operand : AsmOperandClass {
+ let Name = "GPR32as64";
+ let ParserMethod =
+ "tryParseGPROperand<false, RegConstraintEqualityTy::EqualsSubReg>";
+}
+def GPR32as64 : RegisterOperand<GPR32> {
+ let ParserMatchClass = GPR32as64Operand;
+}
+
+// A 64-bit register pasrsed as 32-bit
+def GPR64as32Operand : AsmOperandClass {
+ let Name = "GPR64as32";
+ let ParserMethod =
+ "tryParseGPROperand<false, RegConstraintEqualityTy::EqualsSuperReg>";
+}
+def GPR64as32 : RegisterOperand<GPR64, "printGPR64as32"> {
+ let ParserMatchClass = GPR64as32Operand;
+}
+
+// 8-bit immediate for AdvSIMD where 64-bit values of the form:
+// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
+// are encoded as the eight bit value 'abcdefgh'.
+def SIMDImmType10Operand : AsmOperandClass { let Name = "SIMDImmType10"; }
+
+class UImmScaledMemoryIndexed<int Width, int Scale> : AsmOperandClass {
+ let Name = "UImm" # Width # "s" # Scale;
+ let DiagnosticType = "InvalidMemoryIndexed" # Scale # "UImm" # Width;
+ let RenderMethod = "addImmScaledOperands<" # Scale # ">";
+ let PredicateMethod = "isUImmScaled<" # Width # ", " # Scale # ">";
+}
+
+class SImmScaledMemoryIndexed<int Width, int Scale> : AsmOperandClass {
+ let Name = "SImm" # Width # "s" # Scale;
+ let DiagnosticType = "InvalidMemoryIndexed" # Scale # "SImm" # Width;
+ let RenderMethod = "addImmScaledOperands<" # Scale # ">";
+ let PredicateMethod = "isSImmScaled<" # Width # ", " # Scale # ">";
+}
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// ADR[P] instruction labels.
+def AdrpOperand : AsmOperandClass {
+ let Name = "AdrpLabel";
+ let ParserMethod = "tryParseAdrpLabel";
+ let DiagnosticType = "InvalidLabel";
+}
+def adrplabel : Operand<i64> {
+ let EncoderMethod = "getAdrLabelOpValue";
+ let PrintMethod = "printAdrpLabel";
+ let ParserMatchClass = AdrpOperand;
+}
+
+def AdrOperand : AsmOperandClass {
+ let Name = "AdrLabel";
+ let ParserMethod = "tryParseAdrLabel";
+ let DiagnosticType = "InvalidLabel";
+}
+def adrlabel : Operand<i64> {
+ let EncoderMethod = "getAdrLabelOpValue";
+ let ParserMatchClass = AdrOperand;
+}
+
+class SImmOperand<int width> : AsmOperandClass {
+ let Name = "SImm" # width;
+ let DiagnosticType = "InvalidMemoryIndexedSImm" # width;
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isSImm<" # width # ">";
+}
+
+// Authenticated loads for v8.3 can have scaled 10-bit immediate offsets.
+def SImm10s8Operand : SImmScaledMemoryIndexed<10, 8>;
+def simm10Scaled : Operand<i64> {
+ let ParserMatchClass = SImm10s8Operand;
+ let DecoderMethod = "DecodeSImm<10>";
+ let PrintMethod = "printImmScale<8>";
+}
+
+// uimm6 predicate - True if the immediate is in the range [0, 63].
+def UImm6Operand : AsmOperandClass {
+ let Name = "UImm6";
+ let DiagnosticType = "InvalidImm0_63";
+}
+
+def uimm6 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> {
+ let ParserMatchClass = UImm6Operand;
+}
+
+def SImm9Operand : SImmOperand<9>;
+def simm9 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -256 && Imm < 256; }]> {
+ let ParserMatchClass = SImm9Operand;
+ let DecoderMethod = "DecodeSImm<9>";
+}
+
+def SImm8Operand : SImmOperand<8>;
+def simm8 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -128 && Imm < 127; }]> {
+ let ParserMatchClass = SImm8Operand;
+ let DecoderMethod = "DecodeSImm<8>";
+}
+
+def SImm6Operand : SImmOperand<6>;
+def simm6_32b : Operand<i32>, ImmLeaf<i32, [{ return Imm >= -32 && Imm < 32; }]> {
+ let ParserMatchClass = SImm6Operand;
+ let DecoderMethod = "DecodeSImm<6>";
+}
+
+def SImm5Operand : SImmOperand<5>;
+def simm5_64b : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -16 && Imm < 16; }]> {
+ let ParserMatchClass = SImm5Operand;
+ let DecoderMethod = "DecodeSImm<5>";
+}
+
+def simm5_32b : Operand<i32>, ImmLeaf<i32, [{ return Imm >= -16 && Imm < 16; }]> {
+ let ParserMatchClass = SImm5Operand;
+ let DecoderMethod = "DecodeSImm<5>";
+}
+
+// simm7sN predicate - True if the immediate is a multiple of N in the range
+// [-64 * N, 63 * N].
+
+def SImm7s4Operand : SImmScaledMemoryIndexed<7, 4>;
+def SImm7s8Operand : SImmScaledMemoryIndexed<7, 8>;
+def SImm7s16Operand : SImmScaledMemoryIndexed<7, 16>;
+
+def simm7s4 : Operand<i32> {
+ let ParserMatchClass = SImm7s4Operand;
+ let PrintMethod = "printImmScale<4>";
+}
+
+def simm7s8 : Operand<i32> {
+ let ParserMatchClass = SImm7s8Operand;
+ let PrintMethod = "printImmScale<8>";
+}
+
+def simm7s16 : Operand<i32> {
+ let ParserMatchClass = SImm7s16Operand;
+ let PrintMethod = "printImmScale<16>";
+}
+
+def am_indexed7s8 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S8", []>;
+def am_indexed7s16 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S16", []>;
+def am_indexed7s32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S32", []>;
+def am_indexed7s64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S64", []>;
+def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;
+
+// uimm5sN predicate - True if the immediate is a multiple of N in the range
+// [0 * N, 32 * N].
+def UImm5s2Operand : UImmScaledMemoryIndexed<5, 2>;
+def UImm5s4Operand : UImmScaledMemoryIndexed<5, 4>;
+def UImm5s8Operand : UImmScaledMemoryIndexed<5, 8>;
+
+def uimm5s2 : Operand<i64>, ImmLeaf<i64,
+ [{ return Imm >= 0 && Imm < (32*2) && ((Imm % 2) == 0); }]> {
+ let ParserMatchClass = UImm5s2Operand;
+ let PrintMethod = "printImmScale<2>";
+}
+def uimm5s4 : Operand<i64>, ImmLeaf<i64,
+ [{ return Imm >= 0 && Imm < (32*4) && ((Imm % 4) == 0); }]> {
+ let ParserMatchClass = UImm5s4Operand;
+ let PrintMethod = "printImmScale<4>";
+}
+def uimm5s8 : Operand<i64>, ImmLeaf<i64,
+ [{ return Imm >= 0 && Imm < (32*8) && ((Imm % 8) == 0); }]> {
+ let ParserMatchClass = UImm5s8Operand;
+ let PrintMethod = "printImmScale<8>";
+}
+
+// uimm6sN predicate - True if the immediate is a multiple of N in the range
+// [0 * N, 64 * N].
+def UImm6s1Operand : UImmScaledMemoryIndexed<6, 1>;
+def UImm6s2Operand : UImmScaledMemoryIndexed<6, 2>;
+def UImm6s4Operand : UImmScaledMemoryIndexed<6, 4>;
+def UImm6s8Operand : UImmScaledMemoryIndexed<6, 8>;
+
+def uimm6s1 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> {
+ let ParserMatchClass = UImm6s1Operand;
+}
+def uimm6s2 : Operand<i64>, ImmLeaf<i64,
+[{ return Imm >= 0 && Imm < (64*2) && ((Imm % 2) == 0); }]> {
+ let PrintMethod = "printImmScale<2>";
+ let ParserMatchClass = UImm6s2Operand;
+}
+def uimm6s4 : Operand<i64>, ImmLeaf<i64,
+[{ return Imm >= 0 && Imm < (64*4) && ((Imm % 4) == 0); }]> {
+ let PrintMethod = "printImmScale<4>";
+ let ParserMatchClass = UImm6s4Operand;
+}
+def uimm6s8 : Operand<i64>, ImmLeaf<i64,
+[{ return Imm >= 0 && Imm < (64*8) && ((Imm % 8) == 0); }]> {
+ let PrintMethod = "printImmScale<8>";
+ let ParserMatchClass = UImm6s8Operand;
+}
+
+// simm6sN predicate - True if the immediate is a multiple of N in the range
+// [-32 * N, 31 * N].
+def SImm6s1Operand : SImmScaledMemoryIndexed<6, 1>;
+def simm6s1 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -32 && Imm < 32; }]> {
+ let ParserMatchClass = SImm6s1Operand;
+ let DecoderMethod = "DecodeSImm<6>";
+}
+
+// simm4sN predicate - True if the immediate is a multiple of N in the range
+// [ -8* N, 7 * N].
+def SImm4s1Operand : SImmScaledMemoryIndexed<4, 1>;
+def SImm4s2Operand : SImmScaledMemoryIndexed<4, 2>;
+def SImm4s3Operand : SImmScaledMemoryIndexed<4, 3>;
+def SImm4s4Operand : SImmScaledMemoryIndexed<4, 4>;
+def SImm4s16Operand : SImmScaledMemoryIndexed<4, 16>;
+
+def simm4s1 : Operand<i64>, ImmLeaf<i64,
+[{ return Imm >=-8 && Imm <= 7; }]> {
+ let ParserMatchClass = SImm4s1Operand;
+ let DecoderMethod = "DecodeSImm<4>";
+}
+
+def simm4s2 : Operand<i64>, ImmLeaf<i64,
+[{ return Imm >=-16 && Imm <= 14 && (Imm % 2) == 0x0; }]> {
+ let PrintMethod = "printImmScale<2>";
+ let ParserMatchClass = SImm4s2Operand;
+ let DecoderMethod = "DecodeSImm<4>";
+}
+
+def simm4s3 : Operand<i64>, ImmLeaf<i64,
+[{ return Imm >=-24 && Imm <= 21 && (Imm % 3) == 0x0; }]> {
+ let PrintMethod = "printImmScale<3>";
+ let ParserMatchClass = SImm4s3Operand;
+ let DecoderMethod = "DecodeSImm<4>";
+}
+
+def simm4s4 : Operand<i64>, ImmLeaf<i64,
+[{ return Imm >=-32 && Imm <= 28 && (Imm % 4) == 0x0; }]> {
+ let PrintMethod = "printImmScale<4>";
+ let ParserMatchClass = SImm4s4Operand;
+ let DecoderMethod = "DecodeSImm<4>";
+}
+def simm4s16 : Operand<i64>, ImmLeaf<i64,
+[{ return Imm >=-128 && Imm <= 112 && (Imm % 16) == 0x0; }]> {
+ let PrintMethod = "printImmScale<16>";
+ let ParserMatchClass = SImm4s16Operand;
+ let DecoderMethod = "DecodeSImm<4>";
+}
+
+class AsmImmRange<int Low, int High> : AsmOperandClass {
+ let Name = "Imm" # Low # "_" # High;
+ let DiagnosticType = "InvalidImm" # Low # "_" # High;
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isImmInRange<" # Low # "," # High # ">";
+}
+
+def Imm1_8Operand : AsmImmRange<1, 8>;
+def Imm1_16Operand : AsmImmRange<1, 16>;
+def Imm1_32Operand : AsmImmRange<1, 32>;
+def Imm1_64Operand : AsmImmRange<1, 64>;
+
+class BranchTarget<int N> : AsmOperandClass {
+ let Name = "BranchTarget" # N;
+ let DiagnosticType = "InvalidLabel";
+ let PredicateMethod = "isBranchTarget<" # N # ">";
+}
+
+class PCRelLabel<int N> : BranchTarget<N> {
+ let Name = "PCRelLabel" # N;
+}
+
+def BranchTarget14Operand : BranchTarget<14>;
+def BranchTarget26Operand : BranchTarget<26>;
+def PCRelLabel19Operand : PCRelLabel<19>;
+
+def MovZSymbolG3AsmOperand : AsmOperandClass {
+ let Name = "MovZSymbolG3";
+ let RenderMethod = "addImmOperands";
+}
+
+def movz_symbol_g3 : Operand<i32> {
+ let ParserMatchClass = MovZSymbolG3AsmOperand;
+}
+
+def MovZSymbolG2AsmOperand : AsmOperandClass {
+ let Name = "MovZSymbolG2";
+ let RenderMethod = "addImmOperands";
+}
+
+def movz_symbol_g2 : Operand<i32> {
+ let ParserMatchClass = MovZSymbolG2AsmOperand;
+}
+
+def MovZSymbolG1AsmOperand : AsmOperandClass {
+ let Name = "MovZSymbolG1";
+ let RenderMethod = "addImmOperands";
+}
+
+def movz_symbol_g1 : Operand<i32> {
+ let ParserMatchClass = MovZSymbolG1AsmOperand;
+}
+
+def MovZSymbolG0AsmOperand : AsmOperandClass {
+ let Name = "MovZSymbolG0";
+ let RenderMethod = "addImmOperands";
+}
+
+def movz_symbol_g0 : Operand<i32> {
+ let ParserMatchClass = MovZSymbolG0AsmOperand;
+}
+
+def MovKSymbolG3AsmOperand : AsmOperandClass {
+ let Name = "MovKSymbolG3";
+ let RenderMethod = "addImmOperands";
+}
+
+def movk_symbol_g3 : Operand<i32> {
+ let ParserMatchClass = MovKSymbolG3AsmOperand;
+}
+
+def MovKSymbolG2AsmOperand : AsmOperandClass {
+ let Name = "MovKSymbolG2";
+ let RenderMethod = "addImmOperands";
+}
+
+def movk_symbol_g2 : Operand<i32> {
+ let ParserMatchClass = MovKSymbolG2AsmOperand;
+}
+
+def MovKSymbolG1AsmOperand : AsmOperandClass {
+ let Name = "MovKSymbolG1";
+ let RenderMethod = "addImmOperands";
+}
+
+def movk_symbol_g1 : Operand<i32> {
+ let ParserMatchClass = MovKSymbolG1AsmOperand;
+}
+
+def MovKSymbolG0AsmOperand : AsmOperandClass {
+ let Name = "MovKSymbolG0";
+ let RenderMethod = "addImmOperands";
+}
+
+def movk_symbol_g0 : Operand<i32> {
+ let ParserMatchClass = MovKSymbolG0AsmOperand;
+}
+
+class fixedpoint_i32<ValueType FloatVT>
+ : Operand<FloatVT>,
+ ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<32>", [fpimm, ld]> {
+ let EncoderMethod = "getFixedPointScaleOpValue";
+ let DecoderMethod = "DecodeFixedPointScaleImm32";
+ let ParserMatchClass = Imm1_32Operand;
+}
+
+class fixedpoint_i64<ValueType FloatVT>
+ : Operand<FloatVT>,
+ ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<64>", [fpimm, ld]> {
+ let EncoderMethod = "getFixedPointScaleOpValue";
+ let DecoderMethod = "DecodeFixedPointScaleImm64";
+ let ParserMatchClass = Imm1_64Operand;
+}
+
+def fixedpoint_f16_i32 : fixedpoint_i32<f16>;
+def fixedpoint_f32_i32 : fixedpoint_i32<f32>;
+def fixedpoint_f64_i32 : fixedpoint_i32<f64>;
+
+def fixedpoint_f16_i64 : fixedpoint_i64<f16>;
+def fixedpoint_f32_i64 : fixedpoint_i64<f32>;
+def fixedpoint_f64_i64 : fixedpoint_i64<f64>;
+
+def vecshiftR8 : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
+}]> {
+ let EncoderMethod = "getVecShiftR8OpValue";
+ let DecoderMethod = "DecodeVecShiftR8Imm";
+ let ParserMatchClass = Imm1_8Operand;
+}
+def vecshiftR16 : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
+}]> {
+ let EncoderMethod = "getVecShiftR16OpValue";
+ let DecoderMethod = "DecodeVecShiftR16Imm";
+ let ParserMatchClass = Imm1_16Operand;
+}
+def vecshiftR16Narrow : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
+}]> {
+ let EncoderMethod = "getVecShiftR16OpValue";
+ let DecoderMethod = "DecodeVecShiftR16ImmNarrow";
+ let ParserMatchClass = Imm1_8Operand;
+}
+def vecshiftR32 : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
+}]> {
+ let EncoderMethod = "getVecShiftR32OpValue";
+ let DecoderMethod = "DecodeVecShiftR32Imm";
+ let ParserMatchClass = Imm1_32Operand;
+}
+def vecshiftR32Narrow : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
+}]> {
+ let EncoderMethod = "getVecShiftR32OpValue";
+ let DecoderMethod = "DecodeVecShiftR32ImmNarrow";
+ let ParserMatchClass = Imm1_16Operand;
+}
+def vecshiftR64 : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65);
+}]> {
+ let EncoderMethod = "getVecShiftR64OpValue";
+ let DecoderMethod = "DecodeVecShiftR64Imm";
+ let ParserMatchClass = Imm1_64Operand;
+}
+def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
+}]> {
+ let EncoderMethod = "getVecShiftR64OpValue";
+ let DecoderMethod = "DecodeVecShiftR64ImmNarrow";
+ let ParserMatchClass = Imm1_32Operand;
+}
+
+def Imm0_1Operand : AsmImmRange<0, 1>;
+def Imm0_7Operand : AsmImmRange<0, 7>;
+def Imm0_15Operand : AsmImmRange<0, 15>;
+def Imm0_31Operand : AsmImmRange<0, 31>;
+def Imm0_63Operand : AsmImmRange<0, 63>;
+
+def vecshiftL8 : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) < 8);
+}]> {
+ let EncoderMethod = "getVecShiftL8OpValue";
+ let DecoderMethod = "DecodeVecShiftL8Imm";
+ let ParserMatchClass = Imm0_7Operand;
+}
+def vecshiftL16 : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) < 16);
+}]> {
+ let EncoderMethod = "getVecShiftL16OpValue";
+ let DecoderMethod = "DecodeVecShiftL16Imm";
+ let ParserMatchClass = Imm0_15Operand;
+}
+def vecshiftL32 : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) < 32);
+}]> {
+ let EncoderMethod = "getVecShiftL32OpValue";
+ let DecoderMethod = "DecodeVecShiftL32Imm";
+ let ParserMatchClass = Imm0_31Operand;
+}
+def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) < 64);
+}]> {
+ let EncoderMethod = "getVecShiftL64OpValue";
+ let DecoderMethod = "DecodeVecShiftL64Imm";
+ let ParserMatchClass = Imm0_63Operand;
+}
+
+
+// Crazy immediate formats used by 32-bit and 64-bit logical immediate
+// instructions for splatting repeating bit patterns across the immediate.
+def logical_imm32_XFORM : SDNodeXForm<imm, [{
+ uint64_t enc = AArch64_AM::encodeLogicalImmediate(N->getZExtValue(), 32);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
+}]>;
+def logical_imm64_XFORM : SDNodeXForm<imm, [{
+ uint64_t enc = AArch64_AM::encodeLogicalImmediate(N->getZExtValue(), 64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
+}]>;
+
+let DiagnosticType = "LogicalSecondSource" in {
+ def LogicalImm32Operand : AsmOperandClass {
+ let Name = "LogicalImm32";
+ let PredicateMethod = "isLogicalImm<int32_t>";
+ let RenderMethod = "addLogicalImmOperands<int32_t>";
+ }
+ def LogicalImm64Operand : AsmOperandClass {
+ let Name = "LogicalImm64";
+ let PredicateMethod = "isLogicalImm<int64_t>";
+ let RenderMethod = "addLogicalImmOperands<int64_t>";
+ }
+ def LogicalImm32NotOperand : AsmOperandClass {
+ let Name = "LogicalImm32Not";
+ let PredicateMethod = "isLogicalImm<int32_t>";
+ let RenderMethod = "addLogicalImmNotOperands<int32_t>";
+ }
+ def LogicalImm64NotOperand : AsmOperandClass {
+ let Name = "LogicalImm64Not";
+ let PredicateMethod = "isLogicalImm<int64_t>";
+ let RenderMethod = "addLogicalImmNotOperands<int64_t>";
+ }
+}
+def logical_imm32 : Operand<i32>, IntImmLeaf<i32, [{
+ return AArch64_AM::isLogicalImmediate(Imm.getZExtValue(), 32);
+}], logical_imm32_XFORM> {
+ let PrintMethod = "printLogicalImm<int32_t>";
+ let ParserMatchClass = LogicalImm32Operand;
+}
+def logical_imm64 : Operand<i64>, IntImmLeaf<i64, [{
+ return AArch64_AM::isLogicalImmediate(Imm.getZExtValue(), 64);
+}], logical_imm64_XFORM> {
+ let PrintMethod = "printLogicalImm<int64_t>";
+ let ParserMatchClass = LogicalImm64Operand;
+}
+def logical_imm32_not : Operand<i32> {
+ let ParserMatchClass = LogicalImm32NotOperand;
+}
+def logical_imm64_not : Operand<i64> {
+ let ParserMatchClass = LogicalImm64NotOperand;
+}
+
+// imm0_65535 predicate - True if the immediate is in the range [0,65535].
+def Imm0_65535Operand : AsmImmRange<0, 65535>;
+def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint32_t)Imm) < 65536;
+}]> {
+ let ParserMatchClass = Imm0_65535Operand;
+ let PrintMethod = "printImmHex";
+}
+
+// imm0_255 predicate - True if the immediate is in the range [0,255].
+def Imm0_255Operand : AsmImmRange<0,255>;
+
+def imm0_255 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint32_t)Imm) < 256;
+}]> {
+ let ParserMatchClass = Imm0_255Operand;
+ let PrintMethod = "printImm";
+}
+
+// imm0_127 predicate - True if the immediate is in the range [0,127]
+def Imm0_127Operand : AsmImmRange<0, 127>;
+def imm0_127 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint32_t)Imm) < 128;
+}]> {
+ let ParserMatchClass = Imm0_127Operand;
+ let PrintMethod = "printImm";
+}
+
+// NOTE: These imm0_N operands have to be of type i64 because i64 is the size
+// for all shift-amounts.
+
+// imm0_63 predicate - True if the immediate is in the range [0,63]
+def imm0_63 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 64;
+}]> {
+ let ParserMatchClass = Imm0_63Operand;
+}
+
+// imm0_31 predicate - True if the immediate is in the range [0,31]
+def imm0_31 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 32;
+}]> {
+ let ParserMatchClass = Imm0_31Operand;
+}
+
+// True if the 32-bit immediate is in the range [0,31]
+def imm32_0_31 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 32;
+}]> {
+ let ParserMatchClass = Imm0_31Operand;
+}
+
+// imm0_1 predicate - True if the immediate is in the range [0,1]
+def imm0_1 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 2;
+}]> {
+ let ParserMatchClass = Imm0_1Operand;
+}
+
+// imm0_15 predicate - True if the immediate is in the range [0,15]
+def imm0_15 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 16;
+}]> {
+ let ParserMatchClass = Imm0_15Operand;
+}
+
+// imm0_7 predicate - True if the immediate is in the range [0,7]
+def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 8;
+}]> {
+ let ParserMatchClass = Imm0_7Operand;
+}
+
+// imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15]
+def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint32_t)Imm) < 16;
+}]> {
+ let ParserMatchClass = Imm0_15Operand;
+}
+
+// An arithmetic shifter operand:
+// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr
+// {5-0} - imm6
+class arith_shift<ValueType Ty, int width> : Operand<Ty> {
+ let PrintMethod = "printShifter";
+ let ParserMatchClass = !cast<AsmOperandClass>(
+ "ArithmeticShifterOperand" # width);
+}
+
+def arith_shift32 : arith_shift<i32, 32>;
+def arith_shift64 : arith_shift<i64, 64>;
+
+class arith_shifted_reg<ValueType Ty, RegisterClass regclass, int width>
+ : Operand<Ty>,
+ ComplexPattern<Ty, 2, "SelectArithShiftedRegister", []> {
+ let PrintMethod = "printShiftedRegister";
+ let MIOperandInfo = (ops regclass, !cast<Operand>("arith_shift" # width));
+}
+
+def arith_shifted_reg32 : arith_shifted_reg<i32, GPR32, 32>;
+def arith_shifted_reg64 : arith_shifted_reg<i64, GPR64, 64>;
+
+// An arithmetic shifter operand:
+// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr, 11 = ror
+// {5-0} - imm6
+class logical_shift<int width> : Operand<i32> {
+ let PrintMethod = "printShifter";
+ let ParserMatchClass = !cast<AsmOperandClass>(
+ "LogicalShifterOperand" # width);
+}
+
+def logical_shift32 : logical_shift<32>;
+def logical_shift64 : logical_shift<64>;
+
+class logical_shifted_reg<ValueType Ty, RegisterClass regclass, Operand shiftop>
+ : Operand<Ty>,
+ ComplexPattern<Ty, 2, "SelectLogicalShiftedRegister", []> {
+ let PrintMethod = "printShiftedRegister";
+ let MIOperandInfo = (ops regclass, shiftop);
+}
+
+def logical_shifted_reg32 : logical_shifted_reg<i32, GPR32, logical_shift32>;
+def logical_shifted_reg64 : logical_shifted_reg<i64, GPR64, logical_shift64>;
+
+// A logical vector shifter operand:
+// {7-6} - shift type: 00 = lsl
+// {5-0} - imm6: #0, #8, #16, or #24
+def logical_vec_shift : Operand<i32> {
+ let PrintMethod = "printShifter";
+ let EncoderMethod = "getVecShifterOpValue";
+ let ParserMatchClass = LogicalVecShifterOperand;
+}
+
+// A logical vector half-word shifter operand:
+// {7-6} - shift type: 00 = lsl
+// {5-0} - imm6: #0 or #8
+def logical_vec_hw_shift : Operand<i32> {
+ let PrintMethod = "printShifter";
+ let EncoderMethod = "getVecShifterOpValue";
+ let ParserMatchClass = LogicalVecHalfWordShifterOperand;
+}
+
+// A vector move shifter operand:
+// {0} - imm1: #8 or #16
+def move_vec_shift : Operand<i32> {
+ let PrintMethod = "printShifter";
+ let EncoderMethod = "getMoveVecShifterOpValue";
+ let ParserMatchClass = MoveVecShifterOperand;
+}
+
+let DiagnosticType = "AddSubSecondSource" in {
+ def AddSubImmOperand : AsmOperandClass {
+ let Name = "AddSubImm";
+ let ParserMethod = "tryParseImmWithOptionalShift";
+ let RenderMethod = "addImmWithOptionalShiftOperands<12>";
+ }
+ def AddSubImmNegOperand : AsmOperandClass {
+ let Name = "AddSubImmNeg";
+ let ParserMethod = "tryParseImmWithOptionalShift";
+ let RenderMethod = "addImmNegWithOptionalShiftOperands<12>";
+ }
+}
+// An ADD/SUB immediate shifter operand:
+// second operand:
+// {7-6} - shift type: 00 = lsl
+// {5-0} - imm6: #0 or #12
+class addsub_shifted_imm<ValueType Ty>
+ : Operand<Ty>, ComplexPattern<Ty, 2, "SelectArithImmed", [imm]> {
+ let PrintMethod = "printAddSubImm";
+ let EncoderMethod = "getAddSubImmOpValue";
+ let ParserMatchClass = AddSubImmOperand;
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+class addsub_shifted_imm_neg<ValueType Ty>
+ : Operand<Ty> {
+ let EncoderMethod = "getAddSubImmOpValue";
+ let ParserMatchClass = AddSubImmNegOperand;
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+def addsub_shifted_imm32 : addsub_shifted_imm<i32>;
+def addsub_shifted_imm64 : addsub_shifted_imm<i64>;
+def addsub_shifted_imm32_neg : addsub_shifted_imm_neg<i32>;
+def addsub_shifted_imm64_neg : addsub_shifted_imm_neg<i64>;
+
+def gi_addsub_shifted_imm32 :
+ GIComplexOperandMatcher<s32, "selectArithImmed">,
+ GIComplexPatternEquiv<addsub_shifted_imm32>;
+
+def gi_addsub_shifted_imm64 :
+ GIComplexOperandMatcher<s64, "selectArithImmed">,
+ GIComplexPatternEquiv<addsub_shifted_imm64>;
+
+class neg_addsub_shifted_imm<ValueType Ty>
+ : Operand<Ty>, ComplexPattern<Ty, 2, "SelectNegArithImmed", [imm]> {
+ let PrintMethod = "printAddSubImm";
+ let EncoderMethod = "getAddSubImmOpValue";
+ let ParserMatchClass = AddSubImmOperand;
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+def neg_addsub_shifted_imm32 : neg_addsub_shifted_imm<i32>;
+def neg_addsub_shifted_imm64 : neg_addsub_shifted_imm<i64>;
+
+// An extend operand:
+// {5-3} - extend type
+// {2-0} - imm3
+def arith_extend : Operand<i32> {
+ let PrintMethod = "printArithExtend";
+ let ParserMatchClass = ExtendOperand;
+}
+def arith_extend64 : Operand<i32> {
+ let PrintMethod = "printArithExtend";
+ let ParserMatchClass = ExtendOperand64;
+}
+
+// 'extend' that's a lsl of a 64-bit register.
+def arith_extendlsl64 : Operand<i32> {
+ let PrintMethod = "printArithExtend";
+ let ParserMatchClass = ExtendOperandLSL64;
+}
+
+class arith_extended_reg32<ValueType Ty> : Operand<Ty>,
+ ComplexPattern<Ty, 2, "SelectArithExtendedRegister", []> {
+ let PrintMethod = "printExtendedRegister";
+ let MIOperandInfo = (ops GPR32, arith_extend);
+}
+
+class arith_extended_reg32to64<ValueType Ty> : Operand<Ty>,
+ ComplexPattern<Ty, 2, "SelectArithExtendedRegister", []> {
+ let PrintMethod = "printExtendedRegister";
+ let MIOperandInfo = (ops GPR32, arith_extend64);
+}
+
+// Floating-point immediate.
+def fpimm16 : Operand<f16>,
+ FPImmLeaf<f16, [{
+ return AArch64_AM::getFP16Imm(Imm) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = AArch64_AM::getFP16Imm(InVal);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
+ }]>> {
+ let ParserMatchClass = FPImmOperand;
+ let PrintMethod = "printFPImmOperand";
+}
+def fpimm32 : Operand<f32>,
+ FPImmLeaf<f32, [{
+ return AArch64_AM::getFP32Imm(Imm) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = AArch64_AM::getFP32Imm(InVal);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
+ }]>> {
+ let ParserMatchClass = FPImmOperand;
+ let PrintMethod = "printFPImmOperand";
+}
+def fpimm64 : Operand<f64>,
+ FPImmLeaf<f64, [{
+ return AArch64_AM::getFP64Imm(Imm) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = AArch64_AM::getFP64Imm(InVal);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
+ }]>> {
+ let ParserMatchClass = FPImmOperand;
+ let PrintMethod = "printFPImmOperand";
+}
+
+def fpimm8 : Operand<i32> {
+ let ParserMatchClass = FPImmOperand;
+ let PrintMethod = "printFPImmOperand";
+}
+
+def fpimm0 : FPImmLeaf<fAny, [{
+ return Imm.isExactlyValue(+0.0);
+}]>;
+
+// Vector lane operands
+class AsmVectorIndex<int Min, int Max, string NamePrefix=""> : AsmOperandClass {
+ let Name = NamePrefix # "IndexRange" # Min # "_" # Max;
+ let DiagnosticType = "Invalid" # Name;
+ let PredicateMethod = "isVectorIndex<" # Min # ", " # Max # ">";
+ let RenderMethod = "addVectorIndexOperands";
+}
+
+class AsmVectorIndexOpnd<AsmOperandClass mc, code pred>
+ : Operand<i64>, ImmLeaf<i64, pred> {
+ let ParserMatchClass = mc;
+ let PrintMethod = "printVectorIndex";
+}
+
+def VectorIndex1Operand : AsmVectorIndex<1, 1>;
+def VectorIndexBOperand : AsmVectorIndex<0, 15>;
+def VectorIndexHOperand : AsmVectorIndex<0, 7>;
+def VectorIndexSOperand : AsmVectorIndex<0, 3>;
+def VectorIndexDOperand : AsmVectorIndex<0, 1>;
+
+def VectorIndex1 : AsmVectorIndexOpnd<VectorIndex1Operand, [{ return ((uint64_t)Imm) == 1; }]>;
+def VectorIndexB : AsmVectorIndexOpnd<VectorIndexBOperand, [{ return ((uint64_t)Imm) < 16; }]>;
+def VectorIndexH : AsmVectorIndexOpnd<VectorIndexHOperand, [{ return ((uint64_t)Imm) < 8; }]>;
+def VectorIndexS : AsmVectorIndexOpnd<VectorIndexSOperand, [{ return ((uint64_t)Imm) < 4; }]>;
+def VectorIndexD : AsmVectorIndexOpnd<VectorIndexDOperand, [{ return ((uint64_t)Imm) < 2; }]>;
+
+def SVEVectorIndexExtDupBOperand : AsmVectorIndex<0, 63, "SVE">;
+def SVEVectorIndexExtDupHOperand : AsmVectorIndex<0, 31, "SVE">;
+def SVEVectorIndexExtDupSOperand : AsmVectorIndex<0, 15, "SVE">;
+def SVEVectorIndexExtDupDOperand : AsmVectorIndex<0, 7, "SVE">;
+def SVEVectorIndexExtDupQOperand : AsmVectorIndex<0, 3, "SVE">;
+
+def sve_elm_idx_extdup_b
+ : AsmVectorIndexOpnd<SVEVectorIndexExtDupBOperand, [{ return ((uint64_t)Imm) < 64; }]>;
+def sve_elm_idx_extdup_h
+ : AsmVectorIndexOpnd<SVEVectorIndexExtDupHOperand, [{ return ((uint64_t)Imm) < 32; }]>;
+def sve_elm_idx_extdup_s
+ : AsmVectorIndexOpnd<SVEVectorIndexExtDupSOperand, [{ return ((uint64_t)Imm) < 16; }]>;
+def sve_elm_idx_extdup_d
+ : AsmVectorIndexOpnd<SVEVectorIndexExtDupDOperand, [{ return ((uint64_t)Imm) < 8; }]>;
+def sve_elm_idx_extdup_q
+ : AsmVectorIndexOpnd<SVEVectorIndexExtDupQOperand, [{ return ((uint64_t)Imm) < 4; }]>;
+
+// 8-bit immediate for AdvSIMD where 64-bit values of the form:
+// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
+// are encoded as the eight bit value 'abcdefgh'.
+def simdimmtype10 : Operand<i32>,
+ FPImmLeaf<f64, [{
+ return AArch64_AM::isAdvSIMDModImmType10(
+ Imm.bitcastToAPInt().getZExtValue());
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = AArch64_AM::encodeAdvSIMDModImmType10(N->getValueAPF()
+ .bitcastToAPInt()
+ .getZExtValue());
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
+ }]>> {
+ let ParserMatchClass = SIMDImmType10Operand;
+ let PrintMethod = "printSIMDType10Operand";
+}
+
+
+//---
+// System management
+//---
+
+// Base encoding for system instruction operands.
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
+class BaseSystemI<bit L, dag oops, dag iops, string asm, string operands,
+ list<dag> pattern = []>
+ : I<oops, iops, asm, operands, "", pattern> {
+ let Inst{31-22} = 0b1101010100;
+ let Inst{21} = L;
+}
+
+// System instructions which do not have an Rt register.
+class SimpleSystemI<bit L, dag iops, string asm, string operands,
+ list<dag> pattern = []>
+ : BaseSystemI<L, (outs), iops, asm, operands, pattern> {
+ let Inst{4-0} = 0b11111;
+}
+
+// System instructions which have an Rt register.
+class RtSystemI<bit L, dag oops, dag iops, string asm, string operands>
+ : BaseSystemI<L, oops, iops, asm, operands>,
+ Sched<[WriteSys]> {
+ bits<5> Rt;
+ let Inst{4-0} = Rt;
+}
+
+// Hint instructions that take both a CRm and a 3-bit immediate.
+// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
+// model patterns with sufficiently fine granularity
+let mayStore = 1, mayLoad = 1, hasSideEffects = 1 in
+ class HintI<string mnemonic>
+ : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#"\t$imm", "",
+ [(int_aarch64_hint imm0_127:$imm)]>,
+ Sched<[WriteHint]> {
+ bits <7> imm;
+ let Inst{20-12} = 0b000110010;
+ let Inst{11-5} = imm;
+ }
+
+// System instructions taking a single literal operand which encodes into
+// CRm. op2 differentiates the opcodes.
+def BarrierAsmOperand : AsmOperandClass {
+ let Name = "Barrier";
+ let ParserMethod = "tryParseBarrierOperand";
+}
+def barrier_op : Operand<i32> {
+ let PrintMethod = "printBarrierOption";
+ let ParserMatchClass = BarrierAsmOperand;
+}
+class CRmSystemI<Operand crmtype, bits<3> opc, string asm,
+ list<dag> pattern = []>
+ : SimpleSystemI<0, (ins crmtype:$CRm), asm, "\t$CRm", pattern>,
+ Sched<[WriteBarrier]> {
+ bits<4> CRm;
+ let Inst{20-12} = 0b000110011;
+ let Inst{11-8} = CRm;
+ let Inst{7-5} = opc;
+}
+
+class SystemNoOperands<bits<3> op2, string asm, list<dag> pattern = []>
+ : SimpleSystemI<0, (ins), asm, "", pattern>,
+ Sched<[]> {
+ bits<4> CRm;
+ let CRm = 0b0011;
+ let Inst{31-12} = 0b11010101000000110010;
+ let Inst{11-8} = CRm;
+ let Inst{7-5} = op2;
+ let Inst{4-0} = 0b11111;
+}
+
+// MRS/MSR system instructions. These have different operand classes because
+// a different subset of registers can be accessed through each instruction.
+def MRSSystemRegisterOperand : AsmOperandClass {
+ let Name = "MRSSystemRegister";
+ let ParserMethod = "tryParseSysReg";
+ let DiagnosticType = "MRS";
+}
+// concatenation of op0, op1, CRn, CRm, op2. 16-bit immediate.
+def mrs_sysreg_op : Operand<i32> {
+ let ParserMatchClass = MRSSystemRegisterOperand;
+ let DecoderMethod = "DecodeMRSSystemRegister";
+ let PrintMethod = "printMRSSystemRegister";
+}
+
+def MSRSystemRegisterOperand : AsmOperandClass {
+ let Name = "MSRSystemRegister";
+ let ParserMethod = "tryParseSysReg";
+ let DiagnosticType = "MSR";
+}
+def msr_sysreg_op : Operand<i32> {
+ let ParserMatchClass = MSRSystemRegisterOperand;
+ let DecoderMethod = "DecodeMSRSystemRegister";
+ let PrintMethod = "printMSRSystemRegister";
+}
+
+def PSBHintOperand : AsmOperandClass {
+ let Name = "PSBHint";
+ let ParserMethod = "tryParsePSBHint";
+}
+def psbhint_op : Operand<i32> {
+ let ParserMatchClass = PSBHintOperand;
+ let PrintMethod = "printPSBHintOp";
+ let MCOperandPredicate = [{
+ // Check, if operand is valid, to fix exhaustive aliasing in disassembly.
+ // "psb" is an alias to "hint" only for certain values of CRm:Op2 fields.
+ if (!MCOp.isImm())
+ return false;
+ return AArch64PSBHint::lookupPSBByEncoding(MCOp.getImm()) != nullptr;
+ }];
+}
+
+class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
+ "mrs", "\t$Rt, $systemreg"> {
+ bits<16> systemreg;
+ let Inst{20-5} = systemreg;
+}
+
+// FIXME: Some of these def NZCV, others don't. Best way to model that?
+// Explicitly modeling each of the system register as a register class
+// would do it, but feels like overkill at this point.
+class MSRI : RtSystemI<0, (outs), (ins msr_sysreg_op:$systemreg, GPR64:$Rt),
+ "msr", "\t$systemreg, $Rt"> {
+ bits<16> systemreg;
+ let Inst{20-5} = systemreg;
+}
+
+def SystemPStateFieldWithImm0_15Operand : AsmOperandClass {
+ let Name = "SystemPStateFieldWithImm0_15";
+ let ParserMethod = "tryParseSysReg";
+}
+def pstatefield4_op : Operand<i32> {
+ let ParserMatchClass = SystemPStateFieldWithImm0_15Operand;
+ let PrintMethod = "printSystemPStateField";
+}
+
+let Defs = [NZCV] in
+class MSRpstateImm0_15
+ : SimpleSystemI<0, (ins pstatefield4_op:$pstatefield, imm0_15:$imm),
+ "msr", "\t$pstatefield, $imm">,
+ Sched<[WriteSys]> {
+ bits<6> pstatefield;
+ bits<4> imm;
+ let Inst{20-19} = 0b00;
+ let Inst{18-16} = pstatefield{5-3};
+ let Inst{15-12} = 0b0100;
+ let Inst{11-8} = imm;
+ let Inst{7-5} = pstatefield{2-0};
+
+ let DecoderMethod = "DecodeSystemPStateInstruction";
+ // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns
+ // Fail the decoder should attempt to decode the instruction as MSRI.
+ let hasCompleteDecoder = 0;
+}
+
+def SystemPStateFieldWithImm0_1Operand : AsmOperandClass {
+ let Name = "SystemPStateFieldWithImm0_1";
+ let ParserMethod = "tryParseSysReg";
+}
+def pstatefield1_op : Operand<i32> {
+ let ParserMatchClass = SystemPStateFieldWithImm0_1Operand;
+ let PrintMethod = "printSystemPStateField";
+}
+
+let Defs = [NZCV] in
+class MSRpstateImm0_1
+ : SimpleSystemI<0, (ins pstatefield1_op:$pstatefield, imm0_1:$imm),
+ "msr", "\t$pstatefield, $imm">,
+ Sched<[WriteSys]> {
+ bits<6> pstatefield;
+ bit imm;
+ let Inst{20-19} = 0b00;
+ let Inst{18-16} = pstatefield{5-3};
+ let Inst{15-9} = 0b0100000;
+ let Inst{8} = imm;
+ let Inst{7-5} = pstatefield{2-0};
+
+ let DecoderMethod = "DecodeSystemPStateInstruction";
+ // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns
+ // Fail the decoder should attempt to decode the instruction as MSRI.
+ let hasCompleteDecoder = 0;
+}
+
+// SYS and SYSL generic system instructions.
+def SysCRAsmOperand : AsmOperandClass {
+ let Name = "SysCR";
+ let ParserMethod = "tryParseSysCROperand";
+}
+
+def sys_cr_op : Operand<i32> {
+ let PrintMethod = "printSysCROperand";
+ let ParserMatchClass = SysCRAsmOperand;
+}
+
+class SystemXtI<bit L, string asm>
+ : RtSystemI<L, (outs),
+ (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, GPR64:$Rt),
+ asm, "\t$op1, $Cn, $Cm, $op2, $Rt"> {
+ bits<3> op1;
+ bits<4> Cn;
+ bits<4> Cm;
+ bits<3> op2;
+ let Inst{20-19} = 0b01;
+ let Inst{18-16} = op1;
+ let Inst{15-12} = Cn;
+ let Inst{11-8} = Cm;
+ let Inst{7-5} = op2;
+}
+
+class SystemLXtI<bit L, string asm>
+ : RtSystemI<L, (outs),
+ (ins GPR64:$Rt, imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2),
+ asm, "\t$Rt, $op1, $Cn, $Cm, $op2"> {
+ bits<3> op1;
+ bits<4> Cn;
+ bits<4> Cm;
+ bits<3> op2;
+ let Inst{20-19} = 0b01;
+ let Inst{18-16} = op1;
+ let Inst{15-12} = Cn;
+ let Inst{11-8} = Cm;
+ let Inst{7-5} = op2;
+}
+
+
+// Branch (register) instructions:
+//
+// case opc of
+// 0001 blr
+// 0000 br
+// 0101 dret
+// 0100 eret
+// 0010 ret
+// otherwise UNDEFINED
+class BaseBranchReg<bits<4> opc, dag oops, dag iops, string asm,
+ string operands, list<dag> pattern>
+ : I<oops, iops, asm, operands, "", pattern>, Sched<[WriteBrReg]> {
+ let Inst{31-25} = 0b1101011;
+ let Inst{24-21} = opc;
+ let Inst{20-16} = 0b11111;
+ let Inst{15-10} = 0b000000;
+ let Inst{4-0} = 0b00000;
+}
+
+class BranchReg<bits<4> opc, string asm, list<dag> pattern>
+ : BaseBranchReg<opc, (outs), (ins GPR64:$Rn), asm, "\t$Rn", pattern> {
+ bits<5> Rn;
+ let Inst{9-5} = Rn;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1, isReturn = 1 in
+class SpecialReturn<bits<4> opc, string asm>
+ : BaseBranchReg<opc, (outs), (ins), asm, "", []> {
+ let Inst{9-5} = 0b11111;
+}
+
+let mayLoad = 1 in
+class RCPCLoad<bits<2> sz, string asm, RegisterClass RC>
+ : I<(outs RC:$Rt), (ins GPR64sp0:$Rn), asm, "\t$Rt, [$Rn]", "", []>,
+ Sched<[]> {
+ bits<5> Rn;
+ bits<5> Rt;
+ let Inst{31-30} = sz;
+ let Inst{29-10} = 0b11100010111111110000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+}
+
+class AuthBase<bits<1> M, dag oops, dag iops, string asm, string operands,
+ list<dag> pattern>
+ : I<oops, iops, asm, operands, "", pattern>, Sched<[]> {
+ let Inst{31-25} = 0b1101011;
+ let Inst{20-11} = 0b1111100001;
+ let Inst{10} = M;
+ let Inst{4-0} = 0b11111;
+}
+
+class AuthBranchTwoOperands<bits<1> op, bits<1> M, string asm>
+ : AuthBase<M, (outs), (ins GPR64:$Rn, GPR64sp:$Rm), asm, "\t$Rn, $Rm", []> {
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{24-22} = 0b100;
+ let Inst{21} = op;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rm;
+}
+
+class AuthOneOperand<bits<3> opc, bits<1> M, string asm>
+ : AuthBase<M, (outs), (ins GPR64:$Rn), asm, "\t$Rn", []> {
+ bits<5> Rn;
+ let Inst{24} = 0;
+ let Inst{23-21} = opc;
+ let Inst{9-5} = Rn;
+}
+
+class AuthReturn<bits<3> op, bits<1> M, string asm>
+ : AuthBase<M, (outs), (ins), asm, "", []> {
+ let Inst{24} = 0;
+ let Inst{23-21} = op;
+ let Inst{9-0} = 0b1111111111;
+}
+
+let mayLoad = 1 in
+class BaseAuthLoad<bit M, bit W, dag oops, dag iops, string asm,
+ string operands, string cstr, Operand opr>
+ : I<oops, iops, asm, operands, cstr, []>, Sched<[]> {
+ bits<10> offset;
+ bits<5> Rn;
+ bits<5> Rt;
+ let Inst{31-24} = 0b11111000;
+ let Inst{23} = M;
+ let Inst{22} = offset{9};
+ let Inst{21} = 1;
+ let Inst{20-12} = offset{8-0};
+ let Inst{11} = W;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+}
+
+multiclass AuthLoad<bit M, string asm, Operand opr> {
+ def indexed : BaseAuthLoad<M, 0, (outs GPR64:$Rt),
+ (ins GPR64sp:$Rn, opr:$offset),
+ asm, "\t$Rt, [$Rn, $offset]", "", opr>;
+ def writeback : BaseAuthLoad<M, 1, (outs GPR64sp:$wback, GPR64:$Rt),
+ (ins GPR64sp:$Rn, opr:$offset),
+ asm, "\t$Rt, [$Rn, $offset]!",
+ "$Rn = $wback,@earlyclobber $wback", opr>;
+
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "indexed") GPR64:$Rt, GPR64sp:$Rn, 0)>;
+}
+
+//---
+// Conditional branch instruction.
+//---
+
+// Condition code.
+// 4-bit immediate. Pretty-printed as <cc>
+def ccode : Operand<i32> {
+ let PrintMethod = "printCondCode";
+ let ParserMatchClass = CondCode;
+}
+def inv_ccode : Operand<i32> {
+ // AL and NV are invalid in the aliases which use inv_ccode
+ let PrintMethod = "printInverseCondCode";
+ let ParserMatchClass = CondCode;
+ let MCOperandPredicate = [{
+ return MCOp.isImm() &&
+ MCOp.getImm() != AArch64CC::AL &&
+ MCOp.getImm() != AArch64CC::NV;
+ }];
+}
+
+// Conditional branch target. 19-bit immediate. The low two bits of the target
+// offset are implied zero and so are not part of the immediate.
+def am_brcond : Operand<OtherVT> {
+ let EncoderMethod = "getCondBranchTargetOpValue";
+ let DecoderMethod = "DecodePCRelLabel19";
+ let PrintMethod = "printAlignedLabel";
+ let ParserMatchClass = PCRelLabel19Operand;
+ let OperandType = "OPERAND_PCREL";
+}
+
+class BranchCond : I<(outs), (ins ccode:$cond, am_brcond:$target),
+ "b", ".$cond\t$target", "",
+ [(AArch64brcond bb:$target, imm:$cond, NZCV)]>,
+ Sched<[WriteBr]> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let Uses = [NZCV];
+
+ bits<4> cond;
+ bits<19> target;
+ let Inst{31-24} = 0b01010100;
+ let Inst{23-5} = target;
+ let Inst{4} = 0;
+ let Inst{3-0} = cond;
+}
+
+//---
+// Compare-and-branch instructions.
+//---
+class BaseCmpBranch<RegisterClass regtype, bit op, string asm, SDNode node>
+ : I<(outs), (ins regtype:$Rt, am_brcond:$target),
+ asm, "\t$Rt, $target", "",
+ [(node regtype:$Rt, bb:$target)]>,
+ Sched<[WriteBr]> {
+ let isBranch = 1;
+ let isTerminator = 1;
+
+ bits<5> Rt;
+ bits<19> target;
+ let Inst{30-25} = 0b011010;
+ let Inst{24} = op;
+ let Inst{23-5} = target;
+ let Inst{4-0} = Rt;
+}
+
+multiclass CmpBranch<bit op, string asm, SDNode node> {
+ def W : BaseCmpBranch<GPR32, op, asm, node> {
+ let Inst{31} = 0;
+ }
+ def X : BaseCmpBranch<GPR64, op, asm, node> {
+ let Inst{31} = 1;
+ }
+}
+
+//---
+// Test-bit-and-branch instructions.
+//---
+// Test-and-branch target. 14-bit sign-extended immediate. The low two bits of
+// the target offset are implied zero and so are not part of the immediate.
+def am_tbrcond : Operand<OtherVT> {
+ let EncoderMethod = "getTestBranchTargetOpValue";
+ let PrintMethod = "printAlignedLabel";
+ let ParserMatchClass = BranchTarget14Operand;
+ let OperandType = "OPERAND_PCREL";
+}
+
+// AsmOperand classes to emit (or not) special diagnostics
+def TBZImm0_31Operand : AsmOperandClass {
+ let Name = "TBZImm0_31";
+ let PredicateMethod = "isImmInRange<0,31>";
+ let RenderMethod = "addImmOperands";
+}
+def TBZImm32_63Operand : AsmOperandClass {
+ let Name = "Imm32_63";
+ let PredicateMethod = "isImmInRange<32,63>";
+ let DiagnosticType = "InvalidImm0_63";
+ let RenderMethod = "addImmOperands";
+}
+
+class tbz_imm0_31<AsmOperandClass matcher> : Operand<i64>, ImmLeaf<i64, [{
+ return (((uint32_t)Imm) < 32);
+}]> {
+ let ParserMatchClass = matcher;
+}
+
+def tbz_imm0_31_diag : tbz_imm0_31<Imm0_31Operand>;
+def tbz_imm0_31_nodiag : tbz_imm0_31<TBZImm0_31Operand>;
+
+def tbz_imm32_63 : Operand<i64>, ImmLeaf<i64, [{
+ return (((uint32_t)Imm) > 31) && (((uint32_t)Imm) < 64);
+}]> {
+ let ParserMatchClass = TBZImm32_63Operand;
+}
+
+class BaseTestBranch<RegisterClass regtype, Operand immtype,
+ bit op, string asm, SDNode node>
+ : I<(outs), (ins regtype:$Rt, immtype:$bit_off, am_tbrcond:$target),
+ asm, "\t$Rt, $bit_off, $target", "",
+ [(node regtype:$Rt, immtype:$bit_off, bb:$target)]>,
+ Sched<[WriteBr]> {
+ let isBranch = 1;
+ let isTerminator = 1;
+
+ bits<5> Rt;
+ bits<6> bit_off;
+ bits<14> target;
+
+ let Inst{30-25} = 0b011011;
+ let Inst{24} = op;
+ let Inst{23-19} = bit_off{4-0};
+ let Inst{18-5} = target;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodeTestAndBranch";
+}
+
+multiclass TestBranch<bit op, string asm, SDNode node> {
+ def W : BaseTestBranch<GPR32, tbz_imm0_31_diag, op, asm, node> {
+ let Inst{31} = 0;
+ }
+
+ def X : BaseTestBranch<GPR64, tbz_imm32_63, op, asm, node> {
+ let Inst{31} = 1;
+ }
+
+ // Alias X-reg with 0-31 imm to W-Reg.
+ def : InstAlias<asm # "\t$Rd, $imm, $target",
+ (!cast<Instruction>(NAME#"W") GPR32as64:$Rd,
+ tbz_imm0_31_nodiag:$imm, am_tbrcond:$target), 0>;
+ def : Pat<(node GPR64:$Rn, tbz_imm0_31_diag:$imm, bb:$target),
+ (!cast<Instruction>(NAME#"W") (EXTRACT_SUBREG GPR64:$Rn, sub_32),
+ tbz_imm0_31_diag:$imm, bb:$target)>;
+}
+
+//---
+// Unconditional branch (immediate) instructions.
+//---
+def am_b_target : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTargetOpValue";
+ let PrintMethod = "printAlignedLabel";
+ let ParserMatchClass = BranchTarget26Operand;
+ let OperandType = "OPERAND_PCREL";
+}
+def am_bl_target : Operand<i64> {
+ let EncoderMethod = "getBranchTargetOpValue";
+ let PrintMethod = "printAlignedLabel";
+ let ParserMatchClass = BranchTarget26Operand;
+ let OperandType = "OPERAND_PCREL";
+}
+
+class BImm<bit op, dag iops, string asm, list<dag> pattern>
+ : I<(outs), iops, asm, "\t$addr", "", pattern>, Sched<[WriteBr]> {
+ bits<26> addr;
+ let Inst{31} = op;
+ let Inst{30-26} = 0b00101;
+ let Inst{25-0} = addr;
+
+ let DecoderMethod = "DecodeUnconditionalBranch";
+}
+
+class BranchImm<bit op, string asm, list<dag> pattern>
+ : BImm<op, (ins am_b_target:$addr), asm, pattern>;
+class CallImm<bit op, string asm, list<dag> pattern>
+ : BImm<op, (ins am_bl_target:$addr), asm, pattern>;
+
+//---
+// Basic one-operand data processing instructions.
+//---
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseOneOperandData<bits<3> opc, RegisterClass regtype, string asm,
+ SDPatternOperator node>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "",
+ [(set regtype:$Rd, (node regtype:$Rn))]>,
+ Sched<[WriteI, ReadI]> {
+ bits<5> Rd;
+ bits<5> Rn;
+
+ let Inst{30-13} = 0b101101011000000000;
+ let Inst{12-10} = opc;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+multiclass OneOperandData<bits<3> opc, string asm,
+ SDPatternOperator node = null_frag> {
+ def Wr : BaseOneOperandData<opc, GPR32, asm, node> {
+ let Inst{31} = 0;
+ }
+
+ def Xr : BaseOneOperandData<opc, GPR64, asm, node> {
+ let Inst{31} = 1;
+ }
+}
+
+class OneWRegData<bits<3> opc, string asm, SDPatternOperator node>
+ : BaseOneOperandData<opc, GPR32, asm, node> {
+ let Inst{31} = 0;
+}
+
+class OneXRegData<bits<3> opc, string asm, SDPatternOperator node>
+ : BaseOneOperandData<opc, GPR64, asm, node> {
+ let Inst{31} = 1;
+}
+
+class SignAuthOneData<bits<3> opcode_prefix, bits<2> opcode, string asm>
+ : I<(outs GPR64:$Rd), (ins GPR64sp:$Rn), asm, "\t$Rd, $Rn", "",
+ []>,
+ Sched<[WriteI, ReadI]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-15} = 0b11011010110000010;
+ let Inst{14-12} = opcode_prefix;
+ let Inst{11-10} = opcode;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class SignAuthZero<bits<3> opcode_prefix, bits<2> opcode, string asm>
+ : I<(outs GPR64:$Rd), (ins), asm, "\t$Rd", "", []>, Sched<[]> {
+ bits<5> Rd;
+ let Inst{31-15} = 0b11011010110000010;
+ let Inst{14-12} = opcode_prefix;
+ let Inst{11-10} = opcode;
+ let Inst{9-5} = 0b11111;
+ let Inst{4-0} = Rd;
+}
+
+class SignAuthTwoOperand<bits<4> opc, string asm,
+ SDPatternOperator OpNode>
+ : I<(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64sp:$Rm),
+ asm, "\t$Rd, $Rn, $Rm", "",
+ [(set GPR64:$Rd, (OpNode GPR64:$Rn, GPR64sp:$Rm))]>,
+ Sched<[WriteI, ReadI, ReadI]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-21} = 0b10011010110;
+ let Inst{20-16} = Rm;
+ let Inst{15-14} = 0b00;
+ let Inst{13-10} = opc;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+// Base class for the Armv8.4-A 8 and 16-bit flag manipulation instructions
+class BaseFlagManipulation<bit sf, bit sz, dag iops, string asm, string ops>
+ : I<(outs), iops, asm, ops, "", []>,
+ Sched<[WriteI, ReadI, ReadI]> {
+ let Uses = [NZCV];
+ bits<5> Rn;
+ let Inst{31} = sf;
+ let Inst{30-15} = 0b0111010000000000;
+ let Inst{14} = sz;
+ let Inst{13-10} = 0b0010;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = 0b01101;
+}
+
+class FlagRotate<dag iops, string asm, string ops>
+ : BaseFlagManipulation<0b1, 0b0, iops, asm, ops> {
+ bits<6> imm;
+ bits<4> mask;
+ let Inst{20-15} = imm;
+ let Inst{13-10} = 0b0001;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = mask;
+}
+
+//---
+// Basic two-operand data processing instructions.
+//---
+class BaseBaseAddSubCarry<bit isSub, RegisterClass regtype, string asm,
+ list<dag> pattern>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
+ asm, "\t$Rd, $Rn, $Rm", "", pattern>,
+ Sched<[WriteI, ReadI, ReadI]> {
+ let Uses = [NZCV];
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{30} = isSub;
+ let Inst{28-21} = 0b11010000;
+ let Inst{20-16} = Rm;
+ let Inst{15-10} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class BaseAddSubCarry<bit isSub, RegisterClass regtype, string asm,
+ SDNode OpNode>
+ : BaseBaseAddSubCarry<isSub, regtype, asm,
+ [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm, NZCV))]>;
+
+class BaseAddSubCarrySetFlags<bit isSub, RegisterClass regtype, string asm,
+ SDNode OpNode>
+ : BaseBaseAddSubCarry<isSub, regtype, asm,
+ [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm, NZCV)),
+ (implicit NZCV)]> {
+ let Defs = [NZCV];
+}
+
+multiclass AddSubCarry<bit isSub, string asm, string asm_setflags,
+ SDNode OpNode, SDNode OpNode_setflags> {
+ def Wr : BaseAddSubCarry<isSub, GPR32, asm, OpNode> {
+ let Inst{31} = 0;
+ let Inst{29} = 0;
+ }
+ def Xr : BaseAddSubCarry<isSub, GPR64, asm, OpNode> {
+ let Inst{31} = 1;
+ let Inst{29} = 0;
+ }
+
+ // Sets flags.
+ def SWr : BaseAddSubCarrySetFlags<isSub, GPR32, asm_setflags,
+ OpNode_setflags> {
+ let Inst{31} = 0;
+ let Inst{29} = 1;
+ }
+ def SXr : BaseAddSubCarrySetFlags<isSub, GPR64, asm_setflags,
+ OpNode_setflags> {
+ let Inst{31} = 1;
+ let Inst{29} = 1;
+ }
+}
+
+class BaseTwoOperand<bits<4> opc, RegisterClass regtype, string asm,
+ SDPatternOperator OpNode>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
+ asm, "\t$Rd, $Rn, $Rm", "",
+ [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{30-21} = 0b0011010110;
+ let Inst{20-16} = Rm;
+ let Inst{15-14} = 0b00;
+ let Inst{13-10} = opc;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class BaseDiv<bit isSigned, RegisterClass regtype, string asm,
+ SDPatternOperator OpNode>
+ : BaseTwoOperand<{0,0,1,?}, regtype, asm, OpNode> {
+ let Inst{10} = isSigned;
+}
+
+multiclass Div<bit isSigned, string asm, SDPatternOperator OpNode> {
+ def Wr : BaseDiv<isSigned, GPR32, asm, OpNode>,
+ Sched<[WriteID32, ReadID, ReadID]> {
+ let Inst{31} = 0;
+ }
+ def Xr : BaseDiv<isSigned, GPR64, asm, OpNode>,
+ Sched<[WriteID64, ReadID, ReadID]> {
+ let Inst{31} = 1;
+ }
+}
+
+class BaseShift<bits<2> shift_type, RegisterClass regtype, string asm,
+ SDPatternOperator OpNode = null_frag>
+ : BaseTwoOperand<{1,0,?,?}, regtype, asm, OpNode>,
+ Sched<[WriteIS, ReadI]> {
+ let Inst{11-10} = shift_type;
+}
+
+multiclass Shift<bits<2> shift_type, string asm, SDNode OpNode> {
+ def Wr : BaseShift<shift_type, GPR32, asm> {
+ let Inst{31} = 0;
+ }
+
+ def Xr : BaseShift<shift_type, GPR64, asm, OpNode> {
+ let Inst{31} = 1;
+ }
+
+ def : Pat<(i32 (OpNode GPR32:$Rn, i64:$Rm)),
+ (!cast<Instruction>(NAME # "Wr") GPR32:$Rn,
+ (EXTRACT_SUBREG i64:$Rm, sub_32))>;
+
+ def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (zext GPR32:$Rm)))),
+ (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>;
+
+ def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (anyext GPR32:$Rm)))),
+ (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>;
+
+ def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (sext GPR32:$Rm)))),
+ (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>;
+}
+
+class ShiftAlias<string asm, Instruction inst, RegisterClass regtype>
+ : InstAlias<asm#"\t$dst, $src1, $src2",
+ (inst regtype:$dst, regtype:$src1, regtype:$src2), 0>;
+
+class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype,
+ RegisterClass addtype, string asm,
+ list<dag> pattern>
+ : I<(outs addtype:$Rd), (ins multype:$Rn, multype:$Rm, addtype:$Ra),
+ asm, "\t$Rd, $Rn, $Rm, $Ra", "", pattern> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<5> Ra;
+ let Inst{30-24} = 0b0011011;
+ let Inst{23-21} = opc;
+ let Inst{20-16} = Rm;
+ let Inst{15} = isSub;
+ let Inst{14-10} = Ra;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass MulAccum<bit isSub, string asm, SDNode AccNode> {
+ // MADD/MSUB generation is decided by MachineCombiner.cpp
+ def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm,
+ [/*(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))*/]>,
+ Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> {
+ let Inst{31} = 0;
+ }
+
+ def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm,
+ [/*(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))*/]>,
+ Sched<[WriteIM64, ReadIM, ReadIM, ReadIMA]> {
+ let Inst{31} = 1;
+ }
+}
+
+class WideMulAccum<bit isSub, bits<3> opc, string asm,
+ SDNode AccNode, SDNode ExtNode>
+ : BaseMulAccum<isSub, opc, GPR32, GPR64, asm,
+ [(set GPR64:$Rd, (AccNode GPR64:$Ra,
+ (mul (ExtNode GPR32:$Rn), (ExtNode GPR32:$Rm))))]>,
+ Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> {
+ let Inst{31} = 1;
+}
+
+class MulHi<bits<3> opc, string asm, SDNode OpNode>
+ : I<(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm),
+ asm, "\t$Rd, $Rn, $Rm", "",
+ [(set GPR64:$Rd, (OpNode GPR64:$Rn, GPR64:$Rm))]>,
+ Sched<[WriteIM64, ReadIM, ReadIM]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-24} = 0b10011011;
+ let Inst{23-21} = opc;
+ let Inst{20-16} = Rm;
+ let Inst{15} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+
+ // The Ra field of SMULH and UMULH is unused: it should be assembled as 31
+ // (i.e. all bits 1) but is ignored by the processor.
+ let PostEncoderMethod = "fixMulHigh";
+}
+
+class MulAccumWAlias<string asm, Instruction inst>
+ : InstAlias<asm#"\t$dst, $src1, $src2",
+ (inst GPR32:$dst, GPR32:$src1, GPR32:$src2, WZR)>;
+class MulAccumXAlias<string asm, Instruction inst>
+ : InstAlias<asm#"\t$dst, $src1, $src2",
+ (inst GPR64:$dst, GPR64:$src1, GPR64:$src2, XZR)>;
+class WideMulAccumAlias<string asm, Instruction inst>
+ : InstAlias<asm#"\t$dst, $src1, $src2",
+ (inst GPR64:$dst, GPR32:$src1, GPR32:$src2, XZR)>;
+
+class BaseCRC32<bit sf, bits<2> sz, bit C, RegisterClass StreamReg,
+ SDPatternOperator OpNode, string asm>
+ : I<(outs GPR32:$Rd), (ins GPR32:$Rn, StreamReg:$Rm),
+ asm, "\t$Rd, $Rn, $Rm", "",
+ [(set GPR32:$Rd, (OpNode GPR32:$Rn, StreamReg:$Rm))]>,
+ Sched<[WriteISReg, ReadI, ReadISReg]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+
+ let Inst{31} = sf;
+ let Inst{30-21} = 0b0011010110;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = 0b010;
+ let Inst{12} = C;
+ let Inst{11-10} = sz;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+ let Predicates = [HasCRC];
+}
+
+//---
+// Address generation.
+//---
+
+class ADRI<bit page, string asm, Operand adr, list<dag> pattern>
+ : I<(outs GPR64:$Xd), (ins adr:$label), asm, "\t$Xd, $label", "",
+ pattern>,
+ Sched<[WriteI]> {
+ bits<5> Xd;
+ bits<21> label;
+ let Inst{31} = page;
+ let Inst{30-29} = label{1-0};
+ let Inst{28-24} = 0b10000;
+ let Inst{23-5} = label{20-2};
+ let Inst{4-0} = Xd;
+
+ let DecoderMethod = "DecodeAdrInstruction";
+}
+
+//---
+// Move immediate.
+//---
+
+def movimm32_imm : Operand<i32> {
+ let ParserMatchClass = Imm0_65535Operand;
+ let EncoderMethod = "getMoveWideImmOpValue";
+ let PrintMethod = "printImm";
+}
+def movimm32_shift : Operand<i32> {
+ let PrintMethod = "printShifter";
+ let ParserMatchClass = MovImm32ShifterOperand;
+}
+def movimm64_shift : Operand<i32> {
+ let PrintMethod = "printShifter";
+ let ParserMatchClass = MovImm64ShifterOperand;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseMoveImmediate<bits<2> opc, RegisterClass regtype, Operand shifter,
+ string asm>
+ : I<(outs regtype:$Rd), (ins movimm32_imm:$imm, shifter:$shift),
+ asm, "\t$Rd, $imm$shift", "", []>,
+ Sched<[WriteImm]> {
+ bits<5> Rd;
+ bits<16> imm;
+ bits<6> shift;
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100101;
+ let Inst{22-21} = shift{5-4};
+ let Inst{20-5} = imm;
+ let Inst{4-0} = Rd;
+
+ let DecoderMethod = "DecodeMoveImmInstruction";
+}
+
+multiclass MoveImmediate<bits<2> opc, string asm> {
+ def Wi : BaseMoveImmediate<opc, GPR32, movimm32_shift, asm> {
+ let Inst{31} = 0;
+ }
+
+ def Xi : BaseMoveImmediate<opc, GPR64, movimm64_shift, asm> {
+ let Inst{31} = 1;
+ }
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseInsertImmediate<bits<2> opc, RegisterClass regtype, Operand shifter,
+ string asm>
+ : I<(outs regtype:$Rd),
+ (ins regtype:$src, movimm32_imm:$imm, shifter:$shift),
+ asm, "\t$Rd, $imm$shift", "$src = $Rd", []>,
+ Sched<[WriteI, ReadI]> {
+ bits<5> Rd;
+ bits<16> imm;
+ bits<6> shift;
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100101;
+ let Inst{22-21} = shift{5-4};
+ let Inst{20-5} = imm;
+ let Inst{4-0} = Rd;
+
+ let DecoderMethod = "DecodeMoveImmInstruction";
+}
+
+multiclass InsertImmediate<bits<2> opc, string asm> {
+ def Wi : BaseInsertImmediate<opc, GPR32, movimm32_shift, asm> {
+ let Inst{31} = 0;
+ }
+
+ def Xi : BaseInsertImmediate<opc, GPR64, movimm64_shift, asm> {
+ let Inst{31} = 1;
+ }
+}
+
+//---
+// Add/Subtract
+//---
+
+class BaseAddSubImm<bit isSub, bit setFlags, RegisterClass dstRegtype,
+ RegisterClass srcRegtype, addsub_shifted_imm immtype,
+ string asm, SDPatternOperator OpNode>
+ : I<(outs dstRegtype:$Rd), (ins srcRegtype:$Rn, immtype:$imm),
+ asm, "\t$Rd, $Rn, $imm", "",
+ [(set dstRegtype:$Rd, (OpNode srcRegtype:$Rn, immtype:$imm))]>,
+ Sched<[WriteI, ReadI]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<14> imm;
+ let Inst{30} = isSub;
+ let Inst{29} = setFlags;
+ let Inst{28-24} = 0b10001;
+ let Inst{23-22} = imm{13-12}; // '00' => lsl #0, '01' => lsl #12
+ let Inst{21-10} = imm{11-0};
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+ let DecoderMethod = "DecodeBaseAddSubImm";
+}
+
+class BaseAddSubRegPseudo<RegisterClass regtype,
+ SDPatternOperator OpNode>
+ : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
+ [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>,
+ Sched<[WriteI, ReadI, ReadI]>;
+
+class BaseAddSubSReg<bit isSub, bit setFlags, RegisterClass regtype,
+ arith_shifted_reg shifted_regtype, string asm,
+ SDPatternOperator OpNode>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm),
+ asm, "\t$Rd, $Rn, $Rm", "",
+ [(set regtype:$Rd, (OpNode regtype:$Rn, shifted_regtype:$Rm))]>,
+ Sched<[WriteISReg, ReadI, ReadISReg]> {
+ // The operands are in order to match the 'addr' MI operands, so we
+ // don't need an encoder method and by-name matching. Just use the default
+ // in-order handling. Since we're using by-order, make sure the names
+ // do not match.
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<8> shift;
+ let Inst{30} = isSub;
+ let Inst{29} = setFlags;
+ let Inst{28-24} = 0b01011;
+ let Inst{23-22} = shift{7-6};
+ let Inst{21} = 0;
+ let Inst{20-16} = src2;
+ let Inst{15-10} = shift{5-0};
+ let Inst{9-5} = src1;
+ let Inst{4-0} = dst;
+
+ let DecoderMethod = "DecodeThreeAddrSRegInstruction";
+}
+
+class BaseAddSubEReg<bit isSub, bit setFlags, RegisterClass dstRegtype,
+ RegisterClass src1Regtype, Operand src2Regtype,
+ string asm, SDPatternOperator OpNode>
+ : I<(outs dstRegtype:$R1),
+ (ins src1Regtype:$R2, src2Regtype:$R3),
+ asm, "\t$R1, $R2, $R3", "",
+ [(set dstRegtype:$R1, (OpNode src1Regtype:$R2, src2Regtype:$R3))]>,
+ Sched<[WriteIEReg, ReadI, ReadIEReg]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<6> ext;
+ let Inst{30} = isSub;
+ let Inst{29} = setFlags;
+ let Inst{28-24} = 0b01011;
+ let Inst{23-21} = 0b001;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = ext{5-3};
+ let Inst{12-10} = ext{2-0};
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+
+ let DecoderMethod = "DecodeAddSubERegInstruction";
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseAddSubEReg64<bit isSub, bit setFlags, RegisterClass dstRegtype,
+ RegisterClass src1Regtype, RegisterClass src2Regtype,
+ Operand ext_op, string asm>
+ : I<(outs dstRegtype:$Rd),
+ (ins src1Regtype:$Rn, src2Regtype:$Rm, ext_op:$ext),
+ asm, "\t$Rd, $Rn, $Rm$ext", "", []>,
+ Sched<[WriteIEReg, ReadI, ReadIEReg]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<6> ext;
+ let Inst{30} = isSub;
+ let Inst{29} = setFlags;
+ let Inst{28-24} = 0b01011;
+ let Inst{23-21} = 0b001;
+ let Inst{20-16} = Rm;
+ let Inst{15} = ext{5};
+ let Inst{12-10} = ext{2-0};
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+
+ let DecoderMethod = "DecodeAddSubERegInstruction";
+}
+
+// Aliases for register+register add/subtract.
+class AddSubRegAlias<string asm, Instruction inst, RegisterClass dstRegtype,
+ RegisterClass src1Regtype, RegisterClass src2Regtype,
+ int shiftExt>
+ : InstAlias<asm#"\t$dst, $src1, $src2",
+ (inst dstRegtype:$dst, src1Regtype:$src1, src2Regtype:$src2,
+ shiftExt)>;
+
+multiclass AddSub<bit isSub, string mnemonic, string alias,
+ SDPatternOperator OpNode = null_frag> {
+ let hasSideEffects = 0, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ // Add/Subtract immediate
+ // Increase the weight of the immediate variant to try to match it before
+ // the extended register variant.
+ // We used to match the register variant before the immediate when the
+ // register argument could be implicitly zero-extended.
+ let AddedComplexity = 6 in
+ def Wri : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32,
+ mnemonic, OpNode> {
+ let Inst{31} = 0;
+ }
+ let AddedComplexity = 6 in
+ def Xri : BaseAddSubImm<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64,
+ mnemonic, OpNode> {
+ let Inst{31} = 1;
+ }
+
+ // Add/Subtract register - Only used for CodeGen
+ def Wrr : BaseAddSubRegPseudo<GPR32, OpNode>;
+ def Xrr : BaseAddSubRegPseudo<GPR64, OpNode>;
+
+ // Add/Subtract shifted register
+ def Wrs : BaseAddSubSReg<isSub, 0, GPR32, arith_shifted_reg32, mnemonic,
+ OpNode> {
+ let Inst{31} = 0;
+ }
+ def Xrs : BaseAddSubSReg<isSub, 0, GPR64, arith_shifted_reg64, mnemonic,
+ OpNode> {
+ let Inst{31} = 1;
+ }
+ }
+
+ // Add/Subtract extended register
+ let AddedComplexity = 1, hasSideEffects = 0 in {
+ def Wrx : BaseAddSubEReg<isSub, 0, GPR32sp, GPR32sp,
+ arith_extended_reg32<i32>, mnemonic, OpNode> {
+ let Inst{31} = 0;
+ }
+ def Xrx : BaseAddSubEReg<isSub, 0, GPR64sp, GPR64sp,
+ arith_extended_reg32to64<i64>, mnemonic, OpNode> {
+ let Inst{31} = 1;
+ }
+ }
+
+ def Xrx64 : BaseAddSubEReg64<isSub, 0, GPR64sp, GPR64sp, GPR64,
+ arith_extendlsl64, mnemonic> {
+ // UXTX and SXTX only.
+ let Inst{14-13} = 0b11;
+ let Inst{31} = 1;
+ }
+
+ // add Rd, Rb, -imm -> sub Rd, Rn, imm
+ def : InstSubst<alias#"\t$Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32sp:$Rn,
+ addsub_shifted_imm32_neg:$imm), 0>;
+ def : InstSubst<alias#"\t$Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64sp:$Rn,
+ addsub_shifted_imm64_neg:$imm), 0>;
+
+ // Register/register aliases with no shift when SP is not used.
+ def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"),
+ GPR32, GPR32, GPR32, 0>;
+ def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Xrs"),
+ GPR64, GPR64, GPR64, 0>;
+
+ // Register/register aliases with no shift when either the destination or
+ // first source register is SP.
+ def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrx"),
+ GPR32sponly, GPR32sp, GPR32, 16>; // UXTW #0
+ def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrx"),
+ GPR32sp, GPR32sponly, GPR32, 16>; // UXTW #0
+ def : AddSubRegAlias<mnemonic,
+ !cast<Instruction>(NAME#"Xrx64"),
+ GPR64sponly, GPR64sp, GPR64, 24>; // UXTX #0
+ def : AddSubRegAlias<mnemonic,
+ !cast<Instruction>(NAME#"Xrx64"),
+ GPR64sp, GPR64sponly, GPR64, 24>; // UXTX #0
+}
+
+multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
+ string alias, string cmpAlias> {
+ let isCompare = 1, Defs = [NZCV] in {
+ // Add/Subtract immediate
+ def Wri : BaseAddSubImm<isSub, 1, GPR32, GPR32sp, addsub_shifted_imm32,
+ mnemonic, OpNode> {
+ let Inst{31} = 0;
+ }
+ def Xri : BaseAddSubImm<isSub, 1, GPR64, GPR64sp, addsub_shifted_imm64,
+ mnemonic, OpNode> {
+ let Inst{31} = 1;
+ }
+
+ // Add/Subtract register
+ def Wrr : BaseAddSubRegPseudo<GPR32, OpNode>;
+ def Xrr : BaseAddSubRegPseudo<GPR64, OpNode>;
+
+ // Add/Subtract shifted register
+ def Wrs : BaseAddSubSReg<isSub, 1, GPR32, arith_shifted_reg32, mnemonic,
+ OpNode> {
+ let Inst{31} = 0;
+ }
+ def Xrs : BaseAddSubSReg<isSub, 1, GPR64, arith_shifted_reg64, mnemonic,
+ OpNode> {
+ let Inst{31} = 1;
+ }
+
+ // Add/Subtract extended register
+ let AddedComplexity = 1 in {
+ def Wrx : BaseAddSubEReg<isSub, 1, GPR32, GPR32sp,
+ arith_extended_reg32<i32>, mnemonic, OpNode> {
+ let Inst{31} = 0;
+ }
+ def Xrx : BaseAddSubEReg<isSub, 1, GPR64, GPR64sp,
+ arith_extended_reg32<i64>, mnemonic, OpNode> {
+ let Inst{31} = 1;
+ }
+ }
+
+ def Xrx64 : BaseAddSubEReg64<isSub, 1, GPR64, GPR64sp, GPR64,
+ arith_extendlsl64, mnemonic> {
+ // UXTX and SXTX only.
+ let Inst{14-13} = 0b11;
+ let Inst{31} = 1;
+ }
+ } // Defs = [NZCV]
+
+ // Support negative immediates, e.g. adds Rd, Rn, -imm -> subs Rd, Rn, imm
+ def : InstSubst<alias#"\t$Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32sp:$Rn,
+ addsub_shifted_imm32_neg:$imm), 0>;
+ def : InstSubst<alias#"\t$Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64sp:$Rn,
+ addsub_shifted_imm64_neg:$imm), 0>;
+
+ // Compare aliases
+ def : InstAlias<cmp#"\t$src, $imm", (!cast<Instruction>(NAME#"Wri")
+ WZR, GPR32sp:$src, addsub_shifted_imm32:$imm), 5>;
+ def : InstAlias<cmp#"\t$src, $imm", (!cast<Instruction>(NAME#"Xri")
+ XZR, GPR64sp:$src, addsub_shifted_imm64:$imm), 5>;
+ def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Wrx")
+ WZR, GPR32sp:$src1, GPR32:$src2, arith_extend:$sh), 4>;
+ def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx")
+ XZR, GPR64sp:$src1, GPR32:$src2, arith_extend:$sh), 4>;
+ def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx64")
+ XZR, GPR64sp:$src1, GPR64:$src2, arith_extendlsl64:$sh), 4>;
+ def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Wrs")
+ WZR, GPR32:$src1, GPR32:$src2, arith_shift32:$sh), 4>;
+ def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Xrs")
+ XZR, GPR64:$src1, GPR64:$src2, arith_shift64:$sh), 4>;
+
+ // Support negative immediates, e.g. cmp Rn, -imm -> cmn Rn, imm
+ def : InstSubst<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Wri")
+ WZR, GPR32sp:$src, addsub_shifted_imm32_neg:$imm), 0>;
+ def : InstSubst<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Xri")
+ XZR, GPR64sp:$src, addsub_shifted_imm64_neg:$imm), 0>;
+
+ // Compare shorthands
+ def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Wrs")
+ WZR, GPR32:$src1, GPR32:$src2, 0), 5>;
+ def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Xrs")
+ XZR, GPR64:$src1, GPR64:$src2, 0), 5>;
+ def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Wrx")
+ WZR, GPR32sponly:$src1, GPR32:$src2, 16), 5>;
+ def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Xrx64")
+ XZR, GPR64sponly:$src1, GPR64:$src2, 24), 5>;
+
+ // Register/register aliases with no shift when SP is not used.
+ def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"),
+ GPR32, GPR32, GPR32, 0>;
+ def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Xrs"),
+ GPR64, GPR64, GPR64, 0>;
+
+ // Register/register aliases with no shift when the first source register
+ // is SP.
+ def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrx"),
+ GPR32, GPR32sponly, GPR32, 16>; // UXTW #0
+ def : AddSubRegAlias<mnemonic,
+ !cast<Instruction>(NAME#"Xrx64"),
+ GPR64, GPR64sponly, GPR64, 24>; // UXTX #0
+}
+
+//---
+// Extract
+//---
+def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisPtrTy<3>]>;
+def AArch64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>;
+
+class BaseExtractImm<RegisterClass regtype, Operand imm_type, string asm,
+ list<dag> patterns>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, imm_type:$imm),
+ asm, "\t$Rd, $Rn, $Rm, $imm", "", patterns>,
+ Sched<[WriteExtr, ReadExtrHi]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<6> imm;
+
+ let Inst{30-23} = 0b00100111;
+ let Inst{21} = 0;
+ let Inst{20-16} = Rm;
+ let Inst{15-10} = imm;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass ExtractImm<string asm> {
+ def Wrri : BaseExtractImm<GPR32, imm0_31, asm,
+ [(set GPR32:$Rd,
+ (AArch64Extr GPR32:$Rn, GPR32:$Rm, imm0_31:$imm))]> {
+ let Inst{31} = 0;
+ let Inst{22} = 0;
+ // imm<5> must be zero.
+ let imm{5} = 0;
+ }
+ def Xrri : BaseExtractImm<GPR64, imm0_63, asm,
+ [(set GPR64:$Rd,
+ (AArch64Extr GPR64:$Rn, GPR64:$Rm, imm0_63:$imm))]> {
+
+ let Inst{31} = 1;
+ let Inst{22} = 1;
+ }
+}
+
+//---
+// Bitfield
+//---
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseBitfieldImm<bits<2> opc,
+ RegisterClass regtype, Operand imm_type, string asm>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, imm_type:$immr, imm_type:$imms),
+ asm, "\t$Rd, $Rn, $immr, $imms", "", []>,
+ Sched<[WriteIS, ReadI]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<6> immr;
+ bits<6> imms;
+
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100110;
+ let Inst{21-16} = immr;
+ let Inst{15-10} = imms;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass BitfieldImm<bits<2> opc, string asm> {
+ def Wri : BaseBitfieldImm<opc, GPR32, imm0_31, asm> {
+ let Inst{31} = 0;
+ let Inst{22} = 0;
+ // imms<5> and immr<5> must be zero, else ReservedValue().
+ let Inst{21} = 0;
+ let Inst{15} = 0;
+ }
+ def Xri : BaseBitfieldImm<opc, GPR64, imm0_63, asm> {
+ let Inst{31} = 1;
+ let Inst{22} = 1;
+ }
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseBitfieldImmWith2RegArgs<bits<2> opc,
+ RegisterClass regtype, Operand imm_type, string asm>
+ : I<(outs regtype:$Rd), (ins regtype:$src, regtype:$Rn, imm_type:$immr,
+ imm_type:$imms),
+ asm, "\t$Rd, $Rn, $immr, $imms", "$src = $Rd", []>,
+ Sched<[WriteIS, ReadI]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<6> immr;
+ bits<6> imms;
+
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100110;
+ let Inst{21-16} = immr;
+ let Inst{15-10} = imms;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass BitfieldImmWith2RegArgs<bits<2> opc, string asm> {
+ def Wri : BaseBitfieldImmWith2RegArgs<opc, GPR32, imm0_31, asm> {
+ let Inst{31} = 0;
+ let Inst{22} = 0;
+ // imms<5> and immr<5> must be zero, else ReservedValue().
+ let Inst{21} = 0;
+ let Inst{15} = 0;
+ }
+ def Xri : BaseBitfieldImmWith2RegArgs<opc, GPR64, imm0_63, asm> {
+ let Inst{31} = 1;
+ let Inst{22} = 1;
+ }
+}
+
+//---
+// Logical
+//---
+
+// Logical (immediate)
+class BaseLogicalImm<bits<2> opc, RegisterClass dregtype,
+ RegisterClass sregtype, Operand imm_type, string asm,
+ list<dag> pattern>
+ : I<(outs dregtype:$Rd), (ins sregtype:$Rn, imm_type:$imm),
+ asm, "\t$Rd, $Rn, $imm", "", pattern>,
+ Sched<[WriteI, ReadI]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<13> imm;
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100100;
+ let Inst{22} = imm{12};
+ let Inst{21-16} = imm{11-6};
+ let Inst{15-10} = imm{5-0};
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+
+ let DecoderMethod = "DecodeLogicalImmInstruction";
+}
+
+// Logical (shifted register)
+class BaseLogicalSReg<bits<2> opc, bit N, RegisterClass regtype,
+ logical_shifted_reg shifted_regtype, string asm,
+ list<dag> pattern>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm),
+ asm, "\t$Rd, $Rn, $Rm", "", pattern>,
+ Sched<[WriteISReg, ReadI, ReadISReg]> {
+ // The operands are in order to match the 'addr' MI operands, so we
+ // don't need an encoder method and by-name matching. Just use the default
+ // in-order handling. Since we're using by-order, make sure the names
+ // do not match.
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<8> shift;
+ let Inst{30-29} = opc;
+ let Inst{28-24} = 0b01010;
+ let Inst{23-22} = shift{7-6};
+ let Inst{21} = N;
+ let Inst{20-16} = src2;
+ let Inst{15-10} = shift{5-0};
+ let Inst{9-5} = src1;
+ let Inst{4-0} = dst;
+
+ let DecoderMethod = "DecodeThreeAddrSRegInstruction";
+}
+
+// Aliases for register+register logical instructions.
+class LogicalRegAlias<string asm, Instruction inst, RegisterClass regtype>
+ : InstAlias<asm#"\t$dst, $src1, $src2",
+ (inst regtype:$dst, regtype:$src1, regtype:$src2, 0)>;
+
+multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode,
+ string Alias> {
+ let AddedComplexity = 6, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+ def Wri : BaseLogicalImm<opc, GPR32sp, GPR32, logical_imm32, mnemonic,
+ [(set GPR32sp:$Rd, (OpNode GPR32:$Rn,
+ logical_imm32:$imm))]> {
+ let Inst{31} = 0;
+ let Inst{22} = 0; // 64-bit version has an additional bit of immediate.
+ }
+ let AddedComplexity = 6, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+ def Xri : BaseLogicalImm<opc, GPR64sp, GPR64, logical_imm64, mnemonic,
+ [(set GPR64sp:$Rd, (OpNode GPR64:$Rn,
+ logical_imm64:$imm))]> {
+ let Inst{31} = 1;
+ }
+
+ def : InstSubst<Alias # "\t$Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn,
+ logical_imm32_not:$imm), 0>;
+ def : InstSubst<Alias # "\t$Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn,
+ logical_imm64_not:$imm), 0>;
+}
+
+multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode,
+ string Alias> {
+ let isCompare = 1, Defs = [NZCV] in {
+ def Wri : BaseLogicalImm<opc, GPR32, GPR32, logical_imm32, mnemonic,
+ [(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_imm32:$imm))]> {
+ let Inst{31} = 0;
+ let Inst{22} = 0; // 64-bit version has an additional bit of immediate.
+ }
+ def Xri : BaseLogicalImm<opc, GPR64, GPR64, logical_imm64, mnemonic,
+ [(set GPR64:$Rd, (OpNode GPR64:$Rn, logical_imm64:$imm))]> {
+ let Inst{31} = 1;
+ }
+ } // end Defs = [NZCV]
+
+ def : InstSubst<Alias # "\t$Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32:$Rn,
+ logical_imm32_not:$imm), 0>;
+ def : InstSubst<Alias # "\t$Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64:$Rn,
+ logical_imm64_not:$imm), 0>;
+}
+
+class BaseLogicalRegPseudo<RegisterClass regtype, SDPatternOperator OpNode>
+ : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
+ [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>,
+ Sched<[WriteI, ReadI, ReadI]>;
+
+// Split from LogicalImm as not all instructions have both.
+multiclass LogicalReg<bits<2> opc, bit N, string mnemonic,
+ SDPatternOperator OpNode> {
+ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ def Wrr : BaseLogicalRegPseudo<GPR32, OpNode>;
+ def Xrr : BaseLogicalRegPseudo<GPR64, OpNode>;
+ }
+
+ def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic,
+ [(set GPR32:$Rd, (OpNode GPR32:$Rn,
+ logical_shifted_reg32:$Rm))]> {
+ let Inst{31} = 0;
+ }
+ def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic,
+ [(set GPR64:$Rd, (OpNode GPR64:$Rn,
+ logical_shifted_reg64:$Rm))]> {
+ let Inst{31} = 1;
+ }
+
+ def : LogicalRegAlias<mnemonic,
+ !cast<Instruction>(NAME#"Wrs"), GPR32>;
+ def : LogicalRegAlias<mnemonic,
+ !cast<Instruction>(NAME#"Xrs"), GPR64>;
+}
+
+// Split from LogicalReg to allow setting NZCV Defs
+multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic,
+ SDPatternOperator OpNode = null_frag> {
+ let Defs = [NZCV], mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ def Wrr : BaseLogicalRegPseudo<GPR32, OpNode>;
+ def Xrr : BaseLogicalRegPseudo<GPR64, OpNode>;
+
+ def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic,
+ [(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_shifted_reg32:$Rm))]> {
+ let Inst{31} = 0;
+ }
+ def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic,
+ [(set GPR64:$Rd, (OpNode GPR64:$Rn, logical_shifted_reg64:$Rm))]> {
+ let Inst{31} = 1;
+ }
+ } // Defs = [NZCV]
+
+ def : LogicalRegAlias<mnemonic,
+ !cast<Instruction>(NAME#"Wrs"), GPR32>;
+ def : LogicalRegAlias<mnemonic,
+ !cast<Instruction>(NAME#"Xrs"), GPR64>;
+}
+
+//---
+// Conditionally set flags
+//---
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseCondComparisonImm<bit op, RegisterClass regtype, ImmLeaf immtype,
+ string mnemonic, SDNode OpNode>
+ : I<(outs), (ins regtype:$Rn, immtype:$imm, imm32_0_15:$nzcv, ccode:$cond),
+ mnemonic, "\t$Rn, $imm, $nzcv, $cond", "",
+ [(set NZCV, (OpNode regtype:$Rn, immtype:$imm, (i32 imm:$nzcv),
+ (i32 imm:$cond), NZCV))]>,
+ Sched<[WriteI, ReadI]> {
+ let Uses = [NZCV];
+ let Defs = [NZCV];
+
+ bits<5> Rn;
+ bits<5> imm;
+ bits<4> nzcv;
+ bits<4> cond;
+
+ let Inst{30} = op;
+ let Inst{29-21} = 0b111010010;
+ let Inst{20-16} = imm;
+ let Inst{15-12} = cond;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = nzcv;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseCondComparisonReg<bit op, RegisterClass regtype, string mnemonic,
+ SDNode OpNode>
+ : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond),
+ mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "",
+ [(set NZCV, (OpNode regtype:$Rn, regtype:$Rm, (i32 imm:$nzcv),
+ (i32 imm:$cond), NZCV))]>,
+ Sched<[WriteI, ReadI, ReadI]> {
+ let Uses = [NZCV];
+ let Defs = [NZCV];
+
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> nzcv;
+ bits<4> cond;
+
+ let Inst{30} = op;
+ let Inst{29-21} = 0b111010010;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = cond;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Rn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = nzcv;
+}
+
+multiclass CondComparison<bit op, string mnemonic, SDNode OpNode> {
+ // immediate operand variants
+ def Wi : BaseCondComparisonImm<op, GPR32, imm32_0_31, mnemonic, OpNode> {
+ let Inst{31} = 0;
+ }
+ def Xi : BaseCondComparisonImm<op, GPR64, imm0_31, mnemonic, OpNode> {
+ let Inst{31} = 1;
+ }
+ // register operand variants
+ def Wr : BaseCondComparisonReg<op, GPR32, mnemonic, OpNode> {
+ let Inst{31} = 0;
+ }
+ def Xr : BaseCondComparisonReg<op, GPR64, mnemonic, OpNode> {
+ let Inst{31} = 1;
+ }
+}
+
+//---
+// Conditional select
+//---
+
+class BaseCondSelect<bit op, bits<2> op2, RegisterClass regtype, string asm>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond),
+ asm, "\t$Rd, $Rn, $Rm, $cond", "",
+ [(set regtype:$Rd,
+ (AArch64csel regtype:$Rn, regtype:$Rm, (i32 imm:$cond), NZCV))]>,
+ Sched<[WriteI, ReadI, ReadI]> {
+ let Uses = [NZCV];
+
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> cond;
+
+ let Inst{30} = op;
+ let Inst{29-21} = 0b011010100;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = cond;
+ let Inst{11-10} = op2;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass CondSelect<bit op, bits<2> op2, string asm> {
+ def Wr : BaseCondSelect<op, op2, GPR32, asm> {
+ let Inst{31} = 0;
+ }
+ def Xr : BaseCondSelect<op, op2, GPR64, asm> {
+ let Inst{31} = 1;
+ }
+}
+
+class BaseCondSelectOp<bit op, bits<2> op2, RegisterClass regtype, string asm,
+ PatFrag frag>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond),
+ asm, "\t$Rd, $Rn, $Rm, $cond", "",
+ [(set regtype:$Rd,
+ (AArch64csel regtype:$Rn, (frag regtype:$Rm),
+ (i32 imm:$cond), NZCV))]>,
+ Sched<[WriteI, ReadI, ReadI]> {
+ let Uses = [NZCV];
+
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> cond;
+
+ let Inst{30} = op;
+ let Inst{29-21} = 0b011010100;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = cond;
+ let Inst{11-10} = op2;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+def inv_cond_XFORM : SDNodeXForm<imm, [{
+ AArch64CC::CondCode CC = static_cast<AArch64CC::CondCode>(N->getZExtValue());
+ return CurDAG->getTargetConstant(AArch64CC::getInvertedCondCode(CC), SDLoc(N),
+ MVT::i32);
+}]>;
+
+multiclass CondSelectOp<bit op, bits<2> op2, string asm, PatFrag frag> {
+ def Wr : BaseCondSelectOp<op, op2, GPR32, asm, frag> {
+ let Inst{31} = 0;
+ }
+ def Xr : BaseCondSelectOp<op, op2, GPR64, asm, frag> {
+ let Inst{31} = 1;
+ }
+
+ def : Pat<(AArch64csel (frag GPR32:$Rm), GPR32:$Rn, (i32 imm:$cond), NZCV),
+ (!cast<Instruction>(NAME # Wr) GPR32:$Rn, GPR32:$Rm,
+ (inv_cond_XFORM imm:$cond))>;
+
+ def : Pat<(AArch64csel (frag GPR64:$Rm), GPR64:$Rn, (i32 imm:$cond), NZCV),
+ (!cast<Instruction>(NAME # Xr) GPR64:$Rn, GPR64:$Rm,
+ (inv_cond_XFORM imm:$cond))>;
+}
+
+//---
+// Special Mask Value
+//---
+def maski8_or_more : Operand<i32>,
+ ImmLeaf<i32, [{ return (Imm & 0xff) == 0xff; }]> {
+}
+def maski16_or_more : Operand<i32>,
+ ImmLeaf<i32, [{ return (Imm & 0xffff) == 0xffff; }]> {
+}
+
+
+//---
+// Load/store
+//---
+
+// (unsigned immediate)
+// Indexed for 8-bit registers. offset is in range [0,4095].
+def am_indexed8 : ComplexPattern<i64, 2, "SelectAddrModeIndexed8", []>;
+def am_indexed16 : ComplexPattern<i64, 2, "SelectAddrModeIndexed16", []>;
+def am_indexed32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed32", []>;
+def am_indexed64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed64", []>;
+def am_indexed128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed128", []>;
+
+def gi_am_indexed8 :
+ GIComplexOperandMatcher<s64, "selectAddrModeIndexed<8>">,
+ GIComplexPatternEquiv<am_indexed8>;
+def gi_am_indexed16 :
+ GIComplexOperandMatcher<s64, "selectAddrModeIndexed<16>">,
+ GIComplexPatternEquiv<am_indexed16>;
+def gi_am_indexed32 :
+ GIComplexOperandMatcher<s64, "selectAddrModeIndexed<32>">,
+ GIComplexPatternEquiv<am_indexed32>;
+def gi_am_indexed64 :
+ GIComplexOperandMatcher<s64, "selectAddrModeIndexed<64>">,
+ GIComplexPatternEquiv<am_indexed64>;
+def gi_am_indexed128 :
+ GIComplexOperandMatcher<s64, "selectAddrModeIndexed<128>">,
+ GIComplexPatternEquiv<am_indexed128>;
+
+class UImm12OffsetOperand<int Scale> : AsmOperandClass {
+ let Name = "UImm12Offset" # Scale;
+ let RenderMethod = "addUImm12OffsetOperands<" # Scale # ">";
+ let PredicateMethod = "isUImm12Offset<" # Scale # ">";
+ let DiagnosticType = "InvalidMemoryIndexed" # Scale;
+}
+
+def UImm12OffsetScale1Operand : UImm12OffsetOperand<1>;
+def UImm12OffsetScale2Operand : UImm12OffsetOperand<2>;
+def UImm12OffsetScale4Operand : UImm12OffsetOperand<4>;
+def UImm12OffsetScale8Operand : UImm12OffsetOperand<8>;
+def UImm12OffsetScale16Operand : UImm12OffsetOperand<16>;
+
+class uimm12_scaled<int Scale> : Operand<i64> {
+ let ParserMatchClass
+ = !cast<AsmOperandClass>("UImm12OffsetScale" # Scale # "Operand");
+ let EncoderMethod
+ = "getLdStUImm12OpValue<AArch64::fixup_aarch64_ldst_imm12_scale" # Scale # ">";
+ let PrintMethod = "printUImm12Offset<" # Scale # ">";
+}
+
+def uimm12s1 : uimm12_scaled<1>;
+def uimm12s2 : uimm12_scaled<2>;
+def uimm12s4 : uimm12_scaled<4>;
+def uimm12s8 : uimm12_scaled<8>;
+def uimm12s16 : uimm12_scaled<16>;
+
+class BaseLoadStoreUI<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
+ string asm, list<dag> pattern>
+ : I<oops, iops, asm, "\t$Rt, [$Rn, $offset]", "", pattern> {
+ bits<5> Rt;
+
+ bits<5> Rn;
+ bits<12> offset;
+
+ let Inst{31-30} = sz;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b01;
+ let Inst{23-22} = opc;
+ let Inst{21-10} = offset;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodeUnsignedLdStInstruction";
+}
+
+multiclass LoadUI<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ Operand indextype, string asm, list<dag> pattern> {
+ let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+ def ui : BaseLoadStoreUI<sz, V, opc, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, indextype:$offset),
+ asm, pattern>,
+ Sched<[WriteLD]>;
+
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>;
+}
+
+multiclass StoreUI<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ Operand indextype, string asm, list<dag> pattern> {
+ let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+ def ui : BaseLoadStoreUI<sz, V, opc, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, indextype:$offset),
+ asm, pattern>,
+ Sched<[WriteST]>;
+
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>;
+}
+
+// Same as StoreUI, but take a RegisterOperand. This is used by GlobalISel to
+// substitute zero-registers automatically.
+//
+// TODO: Roll out zero-register subtitution to GPR32/GPR64 and fold this back
+// into StoreUI.
+multiclass StoreUIz<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ Operand indextype, string asm, list<dag> pattern> {
+ let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+ def ui : BaseLoadStoreUI<sz, V, opc, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, indextype:$offset),
+ asm, pattern>,
+ Sched<[WriteST]>;
+
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>;
+}
+
+def PrefetchOperand : AsmOperandClass {
+ let Name = "Prefetch";
+ let ParserMethod = "tryParsePrefetch";
+}
+def prfop : Operand<i32> {
+ let PrintMethod = "printPrefetchOp";
+ let ParserMatchClass = PrefetchOperand;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
+class PrefetchUI<bits<2> sz, bit V, bits<2> opc, string asm, list<dag> pat>
+ : BaseLoadStoreUI<sz, V, opc,
+ (outs), (ins prfop:$Rt, GPR64sp:$Rn, uimm12s8:$offset),
+ asm, pat>,
+ Sched<[WriteLD]>;
+
+//---
+// Load literal
+//---
+
+// Load literal address: 19-bit immediate. The low two bits of the target
+// offset are implied zero and so are not part of the immediate.
+def am_ldrlit : Operand<iPTR> {
+ let EncoderMethod = "getLoadLiteralOpValue";
+ let DecoderMethod = "DecodePCRelLabel19";
+ let PrintMethod = "printAlignedLabel";
+ let ParserMatchClass = PCRelLabel19Operand;
+ let OperandType = "OPERAND_PCREL";
+}
+
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+class LoadLiteral<bits<2> opc, bit V, RegisterOperand regtype, string asm>
+ : I<(outs regtype:$Rt), (ins am_ldrlit:$label),
+ asm, "\t$Rt, $label", "", []>,
+ Sched<[WriteLD]> {
+ bits<5> Rt;
+ bits<19> label;
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b011;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b00;
+ let Inst{23-5} = label;
+ let Inst{4-0} = Rt;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
+class PrefetchLiteral<bits<2> opc, bit V, string asm, list<dag> pat>
+ : I<(outs), (ins prfop:$Rt, am_ldrlit:$label),
+ asm, "\t$Rt, $label", "", pat>,
+ Sched<[WriteLD]> {
+ bits<5> Rt;
+ bits<19> label;
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b011;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b00;
+ let Inst{23-5} = label;
+ let Inst{4-0} = Rt;
+}
+
+//---
+// Load/store register offset
+//---
+
+def ro_Xindexed8 : ComplexPattern<i64, 4, "SelectAddrModeXRO<8>", []>;
+def ro_Xindexed16 : ComplexPattern<i64, 4, "SelectAddrModeXRO<16>", []>;
+def ro_Xindexed32 : ComplexPattern<i64, 4, "SelectAddrModeXRO<32>", []>;
+def ro_Xindexed64 : ComplexPattern<i64, 4, "SelectAddrModeXRO<64>", []>;
+def ro_Xindexed128 : ComplexPattern<i64, 4, "SelectAddrModeXRO<128>", []>;
+
+def ro_Windexed8 : ComplexPattern<i64, 4, "SelectAddrModeWRO<8>", []>;
+def ro_Windexed16 : ComplexPattern<i64, 4, "SelectAddrModeWRO<16>", []>;
+def ro_Windexed32 : ComplexPattern<i64, 4, "SelectAddrModeWRO<32>", []>;
+def ro_Windexed64 : ComplexPattern<i64, 4, "SelectAddrModeWRO<64>", []>;
+def ro_Windexed128 : ComplexPattern<i64, 4, "SelectAddrModeWRO<128>", []>;
+
+class MemExtendOperand<string Reg, int Width> : AsmOperandClass {
+ let Name = "Mem" # Reg # "Extend" # Width;
+ let PredicateMethod = "isMem" # Reg # "Extend<" # Width # ">";
+ let RenderMethod = "addMemExtendOperands";
+ let DiagnosticType = "InvalidMemory" # Reg # "Extend" # Width;
+}
+
+def MemWExtend8Operand : MemExtendOperand<"W", 8> {
+ // The address "[x0, x1, lsl #0]" actually maps to the variant which performs
+ // the trivial shift.
+ let RenderMethod = "addMemExtend8Operands";
+}
+def MemWExtend16Operand : MemExtendOperand<"W", 16>;
+def MemWExtend32Operand : MemExtendOperand<"W", 32>;
+def MemWExtend64Operand : MemExtendOperand<"W", 64>;
+def MemWExtend128Operand : MemExtendOperand<"W", 128>;
+
+def MemXExtend8Operand : MemExtendOperand<"X", 8> {
+ // The address "[x0, x1, lsl #0]" actually maps to the variant which performs
+ // the trivial shift.
+ let RenderMethod = "addMemExtend8Operands";
+}
+def MemXExtend16Operand : MemExtendOperand<"X", 16>;
+def MemXExtend32Operand : MemExtendOperand<"X", 32>;
+def MemXExtend64Operand : MemExtendOperand<"X", 64>;
+def MemXExtend128Operand : MemExtendOperand<"X", 128>;
+
+class ro_extend<AsmOperandClass ParserClass, string Reg, int Width>
+ : Operand<i32> {
+ let ParserMatchClass = ParserClass;
+ let PrintMethod = "printMemExtend<'" # Reg # "', " # Width # ">";
+ let DecoderMethod = "DecodeMemExtend";
+ let EncoderMethod = "getMemExtendOpValue";
+ let MIOperandInfo = (ops i32imm:$signed, i32imm:$doshift);
+}
+
+def ro_Wextend8 : ro_extend<MemWExtend8Operand, "w", 8>;
+def ro_Wextend16 : ro_extend<MemWExtend16Operand, "w", 16>;
+def ro_Wextend32 : ro_extend<MemWExtend32Operand, "w", 32>;
+def ro_Wextend64 : ro_extend<MemWExtend64Operand, "w", 64>;
+def ro_Wextend128 : ro_extend<MemWExtend128Operand, "w", 128>;
+
+def ro_Xextend8 : ro_extend<MemXExtend8Operand, "x", 8>;
+def ro_Xextend16 : ro_extend<MemXExtend16Operand, "x", 16>;
+def ro_Xextend32 : ro_extend<MemXExtend32Operand, "x", 32>;
+def ro_Xextend64 : ro_extend<MemXExtend64Operand, "x", 64>;
+def ro_Xextend128 : ro_extend<MemXExtend128Operand, "x", 128>;
+
+class ROAddrMode<ComplexPattern windex, ComplexPattern xindex,
+ Operand wextend, Operand xextend> {
+ // CodeGen-level pattern covering the entire addressing mode.
+ ComplexPattern Wpat = windex;
+ ComplexPattern Xpat = xindex;
+
+ // Asm-level Operand covering the valid "uxtw #3" style syntax.
+ Operand Wext = wextend;
+ Operand Xext = xextend;
+}
+
+def ro8 : ROAddrMode<ro_Windexed8, ro_Xindexed8, ro_Wextend8, ro_Xextend8>;
+def ro16 : ROAddrMode<ro_Windexed16, ro_Xindexed16, ro_Wextend16, ro_Xextend16>;
+def ro32 : ROAddrMode<ro_Windexed32, ro_Xindexed32, ro_Wextend32, ro_Xextend32>;
+def ro64 : ROAddrMode<ro_Windexed64, ro_Xindexed64, ro_Wextend64, ro_Xextend64>;
+def ro128 : ROAddrMode<ro_Windexed128, ro_Xindexed128, ro_Wextend128,
+ ro_Xextend128>;
+
+class LoadStore8RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, dag ins, dag outs, list<dag> pat>
+ : I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<2> extend;
+ let Inst{31-30} = sz;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15} = extend{1}; // sign extend Rm?
+ let Inst{14} = 1;
+ let Inst{12} = extend{0}; // do shift?
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+}
+
+class ROInstAlias<string asm, RegisterOperand regtype, Instruction INST>
+ : InstAlias<asm # "\t$Rt, [$Rn, $Rm]",
+ (INST regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, 0, 0)>;
+
+multiclass Load8RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, ValueType Ty, SDPatternOperator loadop> {
+ let AddedComplexity = 10 in
+ def roW : LoadStore8RO<sz, V, opc, regtype, asm,
+ (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend8:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10 in
+ def roX : LoadStore8RO<sz, V, opc, regtype, asm,
+ (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend8:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
+
+multiclass Store8RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, ValueType Ty, SDPatternOperator storeop> {
+ let AddedComplexity = 10 in
+ def roW : LoadStore8RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend8:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10 in
+ def roX : LoadStore8RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend8:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
+
+class LoadStore16RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, dag ins, dag outs, list<dag> pat>
+ : I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<2> extend;
+ let Inst{31-30} = sz;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15} = extend{1}; // sign extend Rm?
+ let Inst{14} = 1;
+ let Inst{12} = extend{0}; // do shift?
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+}
+
+multiclass Load16RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, ValueType Ty, SDPatternOperator loadop> {
+ let AddedComplexity = 10 in
+ def roW : LoadStore16RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10 in
+ def roX : LoadStore16RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
+
+multiclass Store16RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, ValueType Ty, SDPatternOperator storeop> {
+ let AddedComplexity = 10 in
+ def roW : LoadStore16RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10 in
+ def roX : LoadStore16RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
+
+class LoadStore32RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, dag ins, dag outs, list<dag> pat>
+ : I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<2> extend;
+ let Inst{31-30} = sz;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15} = extend{1}; // sign extend Rm?
+ let Inst{14} = 1;
+ let Inst{12} = extend{0}; // do shift?
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+}
+
+multiclass Load32RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, ValueType Ty, SDPatternOperator loadop> {
+ let AddedComplexity = 10 in
+ def roW : LoadStore32RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend32:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10 in
+ def roX : LoadStore32RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend32:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
+
+multiclass Store32RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, ValueType Ty, SDPatternOperator storeop> {
+ let AddedComplexity = 10 in
+ def roW : LoadStore32RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend32:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10 in
+ def roX : LoadStore32RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend32:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
+
+class LoadStore64RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, dag ins, dag outs, list<dag> pat>
+ : I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<2> extend;
+ let Inst{31-30} = sz;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15} = extend{1}; // sign extend Rm?
+ let Inst{14} = 1;
+ let Inst{12} = extend{0}; // do shift?
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+}
+
+multiclass Load64RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, ValueType Ty, SDPatternOperator loadop> {
+ let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+ def roW : LoadStore64RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend64:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+ def roX : LoadStore64RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend64:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
+
+multiclass Store64RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, ValueType Ty, SDPatternOperator storeop> {
+ let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+ def roW : LoadStore64RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend64:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+ def roX : LoadStore64RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend64:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
+
+class LoadStore128RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, dag ins, dag outs, list<dag> pat>
+ : I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<2> extend;
+ let Inst{31-30} = sz;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15} = extend{1}; // sign extend Rm?
+ let Inst{14} = 1;
+ let Inst{12} = extend{0}; // do shift?
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+}
+
+multiclass Load128RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, ValueType Ty, SDPatternOperator loadop> {
+ let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+ def roW : LoadStore128RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend128:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+ def roX : LoadStore128RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend128:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend128:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
+
+multiclass Store128RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, ValueType Ty, SDPatternOperator storeop> {
+ let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+ def roW : LoadStore128RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend),
+ []>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+ def roX : LoadStore128RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend128:$extend),
+ []>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
+class BasePrefetchRO<bits<2> sz, bit V, bits<2> opc, dag outs, dag ins,
+ string asm, list<dag> pat>
+ : I<outs, ins, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat>,
+ Sched<[WriteLD]> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<2> extend;
+ let Inst{31-30} = sz;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15} = extend{1}; // sign extend Rm?
+ let Inst{14} = 1;
+ let Inst{12} = extend{0}; // do shift?
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+}
+
+multiclass PrefetchRO<bits<2> sz, bit V, bits<2> opc, string asm> {
+ def roW : BasePrefetchRO<sz, V, opc, (outs),
+ (ins prfop:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend),
+ asm, [(AArch64Prefetch imm:$Rt,
+ (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend64:$extend))]> {
+ let Inst{13} = 0b0;
+ }
+
+ def roX : BasePrefetchRO<sz, V, opc, (outs),
+ (ins prfop:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend),
+ asm, [(AArch64Prefetch imm:$Rt,
+ (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend64:$extend))]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : InstAlias<"prfm $Rt, [$Rn, $Rm]",
+ (!cast<Instruction>(NAME # "roX") prfop:$Rt,
+ GPR64sp:$Rn, GPR64:$Rm, 0, 0)>;
+}
+
+//---
+// Load/store unscaled immediate
+//---
+
+def am_unscaled8 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled8", []>;
+def am_unscaled16 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled16", []>;
+def am_unscaled32 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled32", []>;
+def am_unscaled64 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled64", []>;
+def am_unscaled128 :ComplexPattern<i64, 2, "SelectAddrModeUnscaled128", []>;
+
+def gi_am_unscaled8 :
+ GIComplexOperandMatcher<s64, "selectAddrModeUnscaled8">,
+ GIComplexPatternEquiv<am_unscaled8>;
+def gi_am_unscaled16 :
+ GIComplexOperandMatcher<s64, "selectAddrModeUnscaled16">,
+ GIComplexPatternEquiv<am_unscaled16>;
+def gi_am_unscaled32 :
+ GIComplexOperandMatcher<s64, "selectAddrModeUnscaled32">,
+ GIComplexPatternEquiv<am_unscaled32>;
+def gi_am_unscaled64 :
+ GIComplexOperandMatcher<s64, "selectAddrModeUnscaled64">,
+ GIComplexPatternEquiv<am_unscaled64>;
+def gi_am_unscaled128 :
+ GIComplexOperandMatcher<s64, "selectAddrModeUnscaled128">,
+ GIComplexPatternEquiv<am_unscaled128>;
+
+
+class BaseLoadStoreUnscale<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
+ string asm, list<dag> pattern>
+ : I<oops, iops, asm, "\t$Rt, [$Rn, $offset]", "", pattern> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<9> offset;
+ let Inst{31-30} = sz;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0;
+ let Inst{20-12} = offset;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodeSignedLdStInstruction";
+}
+
+// Armv8.4 LDAPR & STLR with Immediate Offset instruction
+multiclass BaseLoadUnscaleV84<string asm, bits<2> sz, bits<2> opc,
+ RegisterOperand regtype > {
+ def i : BaseLoadStoreUnscale<sz, 0, opc, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, simm9:$offset), asm, []>,
+ Sched<[WriteST]> {
+ let Inst{29} = 0;
+ let Inst{24} = 1;
+ }
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
+}
+
+multiclass BaseStoreUnscaleV84<string asm, bits<2> sz, bits<2> opc,
+ RegisterOperand regtype > {
+ def i : BaseLoadStoreUnscale<sz, 0, opc, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset),
+ asm, []>,
+ Sched<[WriteST]> {
+ let Inst{29} = 0;
+ let Inst{24} = 1;
+ }
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
+}
+
+multiclass LoadUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, list<dag> pattern> {
+ let AddedComplexity = 1 in // try this before LoadUI
+ def i : BaseLoadStoreUnscale<sz, V, opc, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, simm9:$offset), asm, pattern>,
+ Sched<[WriteLD]>;
+
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
+}
+
+multiclass StoreUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, list<dag> pattern> {
+ let AddedComplexity = 1 in // try this before StoreUI
+ def i : BaseLoadStoreUnscale<sz, V, opc, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset),
+ asm, pattern>,
+ Sched<[WriteST]>;
+
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
+}
+
+multiclass PrefetchUnscaled<bits<2> sz, bit V, bits<2> opc, string asm,
+ list<dag> pat> {
+ let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
+ def i : BaseLoadStoreUnscale<sz, V, opc, (outs),
+ (ins prfop:$Rt, GPR64sp:$Rn, simm9:$offset),
+ asm, pat>,
+ Sched<[WriteLD]>;
+
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "i") prfop:$Rt, GPR64sp:$Rn, 0)>;
+}
+
+//---
+// Load/store unscaled immediate, unprivileged
+//---
+
+class BaseLoadStoreUnprivileged<bits<2> sz, bit V, bits<2> opc,
+ dag oops, dag iops, string asm>
+ : I<oops, iops, asm, "\t$Rt, [$Rn, $offset]", "", []> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<9> offset;
+ let Inst{31-30} = sz;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0;
+ let Inst{20-12} = offset;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodeSignedLdStInstruction";
+}
+
+multiclass LoadUnprivileged<bits<2> sz, bit V, bits<2> opc,
+ RegisterClass regtype, string asm> {
+ let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in
+ def i : BaseLoadStoreUnprivileged<sz, V, opc, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, simm9:$offset), asm>,
+ Sched<[WriteLD]>;
+
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
+}
+
+multiclass StoreUnprivileged<bits<2> sz, bit V, bits<2> opc,
+ RegisterClass regtype, string asm> {
+ let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
+ def i : BaseLoadStoreUnprivileged<sz, V, opc, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset),
+ asm>,
+ Sched<[WriteST]>;
+
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
+}
+
+//---
+// Load/store pre-indexed
+//---
+
+class BaseLoadStorePreIdx<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
+ string asm, string cstr, list<dag> pat>
+ : I<oops, iops, asm, "\t$Rt, [$Rn, $offset]!", cstr, pat> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<9> offset;
+ let Inst{31-30} = sz;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = V;
+ let Inst{25-24} = 0;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0;
+ let Inst{20-12} = offset;
+ let Inst{11-10} = 0b11;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodeSignedLdStInstruction";
+}
+
+let hasSideEffects = 0 in {
+let mayStore = 0, mayLoad = 1 in
+class LoadPreIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm>
+ : BaseLoadStorePreIdx<sz, V, opc,
+ (outs GPR64sp:$wback, regtype:$Rt),
+ (ins GPR64sp:$Rn, simm9:$offset), asm,
+ "$Rn = $wback,@earlyclobber $wback", []>,
+ Sched<[WriteLD, WriteAdr]>;
+
+let mayStore = 1, mayLoad = 0 in
+class StorePreIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, SDPatternOperator storeop, ValueType Ty>
+ : BaseLoadStorePreIdx<sz, V, opc,
+ (outs GPR64sp:$wback),
+ (ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset),
+ asm, "$Rn = $wback,@earlyclobber $wback",
+ [(set GPR64sp:$wback,
+ (storeop (Ty regtype:$Rt), GPR64sp:$Rn, simm9:$offset))]>,
+ Sched<[WriteAdr, WriteST]>;
+} // hasSideEffects = 0
+
+//---
+// Load/store post-indexed
+//---
+
+class BaseLoadStorePostIdx<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
+ string asm, string cstr, list<dag> pat>
+ : I<oops, iops, asm, "\t$Rt, [$Rn], $offset", cstr, pat> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<9> offset;
+ let Inst{31-30} = sz;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b0;
+ let Inst{20-12} = offset;
+ let Inst{11-10} = 0b01;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodeSignedLdStInstruction";
+}
+
+let hasSideEffects = 0 in {
+let mayStore = 0, mayLoad = 1 in
+class LoadPostIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm>
+ : BaseLoadStorePostIdx<sz, V, opc,
+ (outs GPR64sp:$wback, regtype:$Rt),
+ (ins GPR64sp:$Rn, simm9:$offset),
+ asm, "$Rn = $wback,@earlyclobber $wback", []>,
+ Sched<[WriteLD, WriteAdr]>;
+
+let mayStore = 1, mayLoad = 0 in
+class StorePostIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
+ string asm, SDPatternOperator storeop, ValueType Ty>
+ : BaseLoadStorePostIdx<sz, V, opc,
+ (outs GPR64sp:$wback),
+ (ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset),
+ asm, "$Rn = $wback,@earlyclobber $wback",
+ [(set GPR64sp:$wback,
+ (storeop (Ty regtype:$Rt), GPR64sp:$Rn, simm9:$offset))]>,
+ Sched<[WriteAdr, WriteST]>;
+} // hasSideEffects = 0
+
+
+//---
+// Load/store pair
+//---
+
+// (indexed, offset)
+
+class BaseLoadStorePairOffset<bits<2> opc, bit V, bit L, dag oops, dag iops,
+ string asm>
+ : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, $offset]", "", []> {
+ bits<5> Rt;
+ bits<5> Rt2;
+ bits<5> Rn;
+ bits<7> offset;
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = V;
+ let Inst{25-23} = 0b010;
+ let Inst{22} = L;
+ let Inst{21-15} = offset;
+ let Inst{14-10} = Rt2;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodePairLdStInstruction";
+}
+
+multiclass LoadPairOffset<bits<2> opc, bit V, RegisterOperand regtype,
+ Operand indextype, string asm> {
+ let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in
+ def i : BaseLoadStorePairOffset<opc, V, 1,
+ (outs regtype:$Rt, regtype:$Rt2),
+ (ins GPR64sp:$Rn, indextype:$offset), asm>,
+ Sched<[WriteLD, WriteLDHi]>;
+
+ def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, 0)>;
+}
+
+
+multiclass StorePairOffset<bits<2> opc, bit V, RegisterOperand regtype,
+ Operand indextype, string asm> {
+ let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+ def i : BaseLoadStorePairOffset<opc, V, 0, (outs),
+ (ins regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, indextype:$offset),
+ asm>,
+ Sched<[WriteSTP]>;
+
+ def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, 0)>;
+}
+
+// (pre-indexed)
+class BaseLoadStorePairPreIdx<bits<2> opc, bit V, bit L, dag oops, dag iops,
+ string asm>
+ : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, $offset]!", "$Rn = $wback,@earlyclobber $wback", []> {
+ bits<5> Rt;
+ bits<5> Rt2;
+ bits<5> Rn;
+ bits<7> offset;
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = V;
+ let Inst{25-23} = 0b011;
+ let Inst{22} = L;
+ let Inst{21-15} = offset;
+ let Inst{14-10} = Rt2;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodePairLdStInstruction";
+}
+
+let hasSideEffects = 0 in {
+let mayStore = 0, mayLoad = 1 in
+class LoadPairPreIdx<bits<2> opc, bit V, RegisterOperand regtype,
+ Operand indextype, string asm>
+ : BaseLoadStorePairPreIdx<opc, V, 1,
+ (outs GPR64sp:$wback, regtype:$Rt, regtype:$Rt2),
+ (ins GPR64sp:$Rn, indextype:$offset), asm>,
+ Sched<[WriteLD, WriteLDHi, WriteAdr]>;
+
+let mayStore = 1, mayLoad = 0 in
+class StorePairPreIdx<bits<2> opc, bit V, RegisterOperand regtype,
+ Operand indextype, string asm>
+ : BaseLoadStorePairPreIdx<opc, V, 0, (outs GPR64sp:$wback),
+ (ins regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, indextype:$offset),
+ asm>,
+ Sched<[WriteAdr, WriteSTP]>;
+} // hasSideEffects = 0
+
+// (post-indexed)
+
+class BaseLoadStorePairPostIdx<bits<2> opc, bit V, bit L, dag oops, dag iops,
+ string asm>
+ : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn], $offset", "$Rn = $wback,@earlyclobber $wback", []> {
+ bits<5> Rt;
+ bits<5> Rt2;
+ bits<5> Rn;
+ bits<7> offset;
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = V;
+ let Inst{25-23} = 0b001;
+ let Inst{22} = L;
+ let Inst{21-15} = offset;
+ let Inst{14-10} = Rt2;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodePairLdStInstruction";
+}
+
+let hasSideEffects = 0 in {
+let mayStore = 0, mayLoad = 1 in
+class LoadPairPostIdx<bits<2> opc, bit V, RegisterOperand regtype,
+ Operand idxtype, string asm>
+ : BaseLoadStorePairPostIdx<opc, V, 1,
+ (outs GPR64sp:$wback, regtype:$Rt, regtype:$Rt2),
+ (ins GPR64sp:$Rn, idxtype:$offset), asm>,
+ Sched<[WriteLD, WriteLDHi, WriteAdr]>;
+
+let mayStore = 1, mayLoad = 0 in
+class StorePairPostIdx<bits<2> opc, bit V, RegisterOperand regtype,
+ Operand idxtype, string asm>
+ : BaseLoadStorePairPostIdx<opc, V, 0, (outs GPR64sp:$wback),
+ (ins regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, idxtype:$offset),
+ asm>,
+ Sched<[WriteAdr, WriteSTP]>;
+} // hasSideEffects = 0
+
+// (no-allocate)
+
+class BaseLoadStorePairNoAlloc<bits<2> opc, bit V, bit L, dag oops, dag iops,
+ string asm>
+ : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, $offset]", "", []> {
+ bits<5> Rt;
+ bits<5> Rt2;
+ bits<5> Rn;
+ bits<7> offset;
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = V;
+ let Inst{25-23} = 0b000;
+ let Inst{22} = L;
+ let Inst{21-15} = offset;
+ let Inst{14-10} = Rt2;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodePairLdStInstruction";
+}
+
+multiclass LoadPairNoAlloc<bits<2> opc, bit V, RegisterClass regtype,
+ Operand indextype, string asm> {
+ let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in
+ def i : BaseLoadStorePairNoAlloc<opc, V, 1,
+ (outs regtype:$Rt, regtype:$Rt2),
+ (ins GPR64sp:$Rn, indextype:$offset), asm>,
+ Sched<[WriteLD, WriteLDHi]>;
+
+
+ def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, 0)>;
+}
+
+multiclass StorePairNoAlloc<bits<2> opc, bit V, RegisterClass regtype,
+ Operand indextype, string asm> {
+ let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in
+ def i : BaseLoadStorePairNoAlloc<opc, V, 0, (outs),
+ (ins regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, indextype:$offset),
+ asm>,
+ Sched<[WriteSTP]>;
+
+ def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, 0)>;
+}
+
+//---
+// Load/store exclusive
+//---
+
+// True exclusive operations write to and/or read from the system's exclusive
+// monitors, which as far as a compiler is concerned can be modelled as a
+// random shared memory address. Hence LoadExclusive mayStore.
+//
+// Since these instructions have the undefined register bits set to 1 in
+// their canonical form, we need a post encoder method to set those bits
+// to 1 when encoding these instructions. We do this using the
+// fixLoadStoreExclusive function. This function has template parameters:
+//
+// fixLoadStoreExclusive<int hasRs, int hasRt2>
+//
+// hasRs indicates that the instruction uses the Rs field, so we won't set
+// it to 1 (and the same for Rt2). We don't need template parameters for
+// the other register fields since Rt and Rn are always used.
+//
+let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
+class BaseLoadStoreExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0,
+ dag oops, dag iops, string asm, string operands>
+ : I<oops, iops, asm, operands, "", []> {
+ let Inst{31-30} = sz;
+ let Inst{29-24} = 0b001000;
+ let Inst{23} = o2;
+ let Inst{22} = L;
+ let Inst{21} = o1;
+ let Inst{15} = o0;
+
+ let DecoderMethod = "DecodeExclusiveLdStInstruction";
+}
+
+// Neither Rs nor Rt2 operands.
+class LoadStoreExclusiveSimple<bits<2> sz, bit o2, bit L, bit o1, bit o0,
+ dag oops, dag iops, string asm, string operands>
+ : BaseLoadStoreExclusive<sz, o2, L, o1, o0, oops, iops, asm, operands> {
+ bits<5> Rt;
+ bits<5> Rn;
+ let Inst{20-16} = 0b11111;
+ let Unpredictable{20-16} = 0b11111;
+ let Inst{14-10} = 0b11111;
+ let Unpredictable{14-10} = 0b11111;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
+}
+
+// Simple load acquires don't set the exclusive monitor
+let mayLoad = 1, mayStore = 0 in
+class LoadAcquire<bits<2> sz, bit o2, bit L, bit o1, bit o0,
+ RegisterClass regtype, string asm>
+ : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs regtype:$Rt),
+ (ins GPR64sp0:$Rn), asm, "\t$Rt, [$Rn]">,
+ Sched<[WriteLD]>;
+
+class LoadExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0,
+ RegisterClass regtype, string asm>
+ : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs regtype:$Rt),
+ (ins GPR64sp0:$Rn), asm, "\t$Rt, [$Rn]">,
+ Sched<[WriteLD]>;
+
+class LoadExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0,
+ RegisterClass regtype, string asm>
+ : BaseLoadStoreExclusive<sz, o2, L, o1, o0,
+ (outs regtype:$Rt, regtype:$Rt2),
+ (ins GPR64sp0:$Rn), asm,
+ "\t$Rt, $Rt2, [$Rn]">,
+ Sched<[WriteLD, WriteLDHi]> {
+ bits<5> Rt;
+ bits<5> Rt2;
+ bits<5> Rn;
+ let Inst{14-10} = Rt2;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let PostEncoderMethod = "fixLoadStoreExclusive<0,1>";
+}
+
+// Simple store release operations do not check the exclusive monitor.
+let mayLoad = 0, mayStore = 1 in
+class StoreRelease<bits<2> sz, bit o2, bit L, bit o1, bit o0,
+ RegisterClass regtype, string asm>
+ : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs),
+ (ins regtype:$Rt, GPR64sp0:$Rn),
+ asm, "\t$Rt, [$Rn]">,
+ Sched<[WriteST]>;
+
+let mayLoad = 1, mayStore = 1 in
+class StoreExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0,
+ RegisterClass regtype, string asm>
+ : BaseLoadStoreExclusive<sz, o2, L, o1, o0, (outs GPR32:$Ws),
+ (ins regtype:$Rt, GPR64sp0:$Rn),
+ asm, "\t$Ws, $Rt, [$Rn]">,
+ Sched<[WriteSTX]> {
+ bits<5> Ws;
+ bits<5> Rt;
+ bits<5> Rn;
+ let Inst{20-16} = Ws;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let Constraints = "@earlyclobber $Ws";
+ let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
+}
+
+class StoreExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0,
+ RegisterClass regtype, string asm>
+ : BaseLoadStoreExclusive<sz, o2, L, o1, o0,
+ (outs GPR32:$Ws),
+ (ins regtype:$Rt, regtype:$Rt2, GPR64sp0:$Rn),
+ asm, "\t$Ws, $Rt, $Rt2, [$Rn]">,
+ Sched<[WriteSTX]> {
+ bits<5> Ws;
+ bits<5> Rt;
+ bits<5> Rt2;
+ bits<5> Rn;
+ let Inst{20-16} = Ws;
+ let Inst{14-10} = Rt2;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let Constraints = "@earlyclobber $Ws";
+}
+
+//---
+// Exception generation
+//---
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
+class ExceptionGeneration<bits<3> op1, bits<2> ll, string asm>
+ : I<(outs), (ins imm0_65535:$imm), asm, "\t$imm", "", []>,
+ Sched<[WriteSys]> {
+ bits<16> imm;
+ let Inst{31-24} = 0b11010100;
+ let Inst{23-21} = op1;
+ let Inst{20-5} = imm;
+ let Inst{4-2} = 0b000;
+ let Inst{1-0} = ll;
+}
+
+let Predicates = [HasFPARMv8] in {
+
+//---
+// Floating point to integer conversion
+//---
+
+class BaseFPToIntegerUnscaled<bits<2> type, bits<2> rmode, bits<3> opcode,
+ RegisterClass srcType, RegisterClass dstType,
+ string asm, list<dag> pattern>
+ : I<(outs dstType:$Rd), (ins srcType:$Rn),
+ asm, "\t$Rd, $Rn", "", pattern>,
+ Sched<[WriteFCvt]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{30-29} = 0b00;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 1;
+ let Inst{20-19} = rmode;
+ let Inst{18-16} = opcode;
+ let Inst{15-10} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseFPToInteger<bits<2> type, bits<2> rmode, bits<3> opcode,
+ RegisterClass srcType, RegisterClass dstType,
+ Operand immType, string asm, list<dag> pattern>
+ : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale),
+ asm, "\t$Rd, $Rn, $scale", "", pattern>,
+ Sched<[WriteFCvt]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<6> scale;
+ let Inst{30-29} = 0b00;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0;
+ let Inst{20-19} = rmode;
+ let Inst{18-16} = opcode;
+ let Inst{15-10} = scale;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
+ SDPatternOperator OpN> {
+ // Unscaled half-precision to 32-bit
+ def UWHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR32, asm,
+ [(set GPR32:$Rd, (OpN FPR16:$Rn))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Predicates = [HasFullFP16];
+ }
+
+ // Unscaled half-precision to 64-bit
+ def UXHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR64, asm,
+ [(set GPR64:$Rd, (OpN FPR16:$Rn))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Predicates = [HasFullFP16];
+ }
+
+ // Unscaled single-precision to 32-bit
+ def UWSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR32, asm,
+ [(set GPR32:$Rd, (OpN FPR32:$Rn))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ }
+
+ // Unscaled single-precision to 64-bit
+ def UXSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR64, asm,
+ [(set GPR64:$Rd, (OpN FPR32:$Rn))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ }
+
+ // Unscaled double-precision to 32-bit
+ def UWDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR32, asm,
+ [(set GPR32:$Rd, (OpN (f64 FPR64:$Rn)))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ }
+
+ // Unscaled double-precision to 64-bit
+ def UXDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR64, asm,
+ [(set GPR64:$Rd, (OpN (f64 FPR64:$Rn)))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ }
+}
+
+multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
+ SDPatternOperator OpN> {
+ // Scaled half-precision to 32-bit
+ def SWHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR32,
+ fixedpoint_f16_i32, asm,
+ [(set GPR32:$Rd, (OpN (fmul FPR16:$Rn,
+ fixedpoint_f16_i32:$scale)))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let scale{5} = 1;
+ let Predicates = [HasFullFP16];
+ }
+
+ // Scaled half-precision to 64-bit
+ def SXHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR64,
+ fixedpoint_f16_i64, asm,
+ [(set GPR64:$Rd, (OpN (fmul FPR16:$Rn,
+ fixedpoint_f16_i64:$scale)))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Predicates = [HasFullFP16];
+ }
+
+ // Scaled single-precision to 32-bit
+ def SWSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR32,
+ fixedpoint_f32_i32, asm,
+ [(set GPR32:$Rd, (OpN (fmul FPR32:$Rn,
+ fixedpoint_f32_i32:$scale)))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let scale{5} = 1;
+ }
+
+ // Scaled single-precision to 64-bit
+ def SXSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR64,
+ fixedpoint_f32_i64, asm,
+ [(set GPR64:$Rd, (OpN (fmul FPR32:$Rn,
+ fixedpoint_f32_i64:$scale)))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ }
+
+ // Scaled double-precision to 32-bit
+ def SWDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR32,
+ fixedpoint_f64_i32, asm,
+ [(set GPR32:$Rd, (OpN (fmul FPR64:$Rn,
+ fixedpoint_f64_i32:$scale)))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let scale{5} = 1;
+ }
+
+ // Scaled double-precision to 64-bit
+ def SXDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR64,
+ fixedpoint_f64_i64, asm,
+ [(set GPR64:$Rd, (OpN (fmul FPR64:$Rn,
+ fixedpoint_f64_i64:$scale)))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ }
+}
+
+//---
+// Integer to floating point conversion
+//---
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseIntegerToFP<bit isUnsigned,
+ RegisterClass srcType, RegisterClass dstType,
+ Operand immType, string asm, list<dag> pattern>
+ : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale),
+ asm, "\t$Rd, $Rn, $scale", "", pattern>,
+ Sched<[WriteFCvt]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<6> scale;
+ let Inst{30-24} = 0b0011110;
+ let Inst{21-17} = 0b00001;
+ let Inst{16} = isUnsigned;
+ let Inst{15-10} = scale;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class BaseIntegerToFPUnscaled<bit isUnsigned,
+ RegisterClass srcType, RegisterClass dstType,
+ ValueType dvt, string asm, SDNode node>
+ : I<(outs dstType:$Rd), (ins srcType:$Rn),
+ asm, "\t$Rd, $Rn", "", [(set (dvt dstType:$Rd), (node srcType:$Rn))]>,
+ Sched<[WriteFCvt]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<6> scale;
+ let Inst{30-24} = 0b0011110;
+ let Inst{21-17} = 0b10001;
+ let Inst{16} = isUnsigned;
+ let Inst{15-10} = 0b000000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> {
+ // Unscaled
+ def UWHri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR16, f16, asm, node> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
+ }
+
+ def UWSri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR32, f32, asm, node> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
+ }
+
+ def UWDri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR64, f64, asm, node> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
+ }
+
+ def UXHri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR16, f16, asm, node> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
+ }
+
+ def UXSri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR32, f32, asm, node> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
+ }
+
+ def UXDri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR64, f64, asm, node> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
+ }
+
+ // Scaled
+ def SWHri: BaseIntegerToFP<isUnsigned, GPR32, FPR16, fixedpoint_f16_i32, asm,
+ [(set FPR16:$Rd,
+ (fdiv (node GPR32:$Rn),
+ fixedpoint_f16_i32:$scale))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let scale{5} = 1;
+ let Predicates = [HasFullFP16];
+ }
+
+ def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint_f32_i32, asm,
+ [(set FPR32:$Rd,
+ (fdiv (node GPR32:$Rn),
+ fixedpoint_f32_i32:$scale))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
+ let scale{5} = 1;
+ }
+
+ def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint_f64_i32, asm,
+ [(set FPR64:$Rd,
+ (fdiv (node GPR32:$Rn),
+ fixedpoint_f64_i32:$scale))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
+ let scale{5} = 1;
+ }
+
+ def SXHri: BaseIntegerToFP<isUnsigned, GPR64, FPR16, fixedpoint_f16_i64, asm,
+ [(set FPR16:$Rd,
+ (fdiv (node GPR64:$Rn),
+ fixedpoint_f16_i64:$scale))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
+ }
+
+ def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint_f32_i64, asm,
+ [(set FPR32:$Rd,
+ (fdiv (node GPR64:$Rn),
+ fixedpoint_f32_i64:$scale))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
+ }
+
+ def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint_f64_i64, asm,
+ [(set FPR64:$Rd,
+ (fdiv (node GPR64:$Rn),
+ fixedpoint_f64_i64:$scale))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
+ }
+}
+
+//---
+// Unscaled integer <-> floating point conversion (i.e. FMOV)
+//---
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseUnscaledConversion<bits<2> rmode, bits<3> opcode,
+ RegisterClass srcType, RegisterClass dstType,
+ string asm>
+ : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "",
+ // We use COPY_TO_REGCLASS for these bitconvert operations.
+ // copyPhysReg() expands the resultant COPY instructions after
+ // regalloc is done. This gives greater freedom for the allocator
+ // and related passes (coalescing, copy propagation, et. al.) to
+ // be more effective.
+ [/*(set (dvt dstType:$Rd), (bitconvert (svt srcType:$Rn)))*/]>,
+ Sched<[WriteFCopy]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{30-24} = 0b0011110;
+ let Inst{21} = 1;
+ let Inst{20-19} = rmode;
+ let Inst{18-16} = opcode;
+ let Inst{15-10} = 0b000000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseUnscaledConversionToHigh<bits<2> rmode, bits<3> opcode,
+ RegisterClass srcType, RegisterOperand dstType, string asm,
+ string kind>
+ : I<(outs dstType:$Rd), (ins srcType:$Rn, VectorIndex1:$idx), asm,
+ "{\t$Rd"#kind#"$idx, $Rn|"#kind#"\t$Rd$idx, $Rn}", "", []>,
+ Sched<[WriteFCopy]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{30-23} = 0b00111101;
+ let Inst{21} = 1;
+ let Inst{20-19} = rmode;
+ let Inst{18-16} = opcode;
+ let Inst{15-10} = 0b000000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+
+ let DecoderMethod = "DecodeFMOVLaneInstruction";
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseUnscaledConversionFromHigh<bits<2> rmode, bits<3> opcode,
+ RegisterOperand srcType, RegisterClass dstType, string asm,
+ string kind>
+ : I<(outs dstType:$Rd), (ins srcType:$Rn, VectorIndex1:$idx), asm,
+ "{\t$Rd, $Rn"#kind#"$idx|"#kind#"\t$Rd, $Rn$idx}", "", []>,
+ Sched<[WriteFCopy]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{30-23} = 0b00111101;
+ let Inst{21} = 1;
+ let Inst{20-19} = rmode;
+ let Inst{18-16} = opcode;
+ let Inst{15-10} = 0b000000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+
+ let DecoderMethod = "DecodeFMOVLaneInstruction";
+}
+
+
+multiclass UnscaledConversion<string asm> {
+ def WHr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR16, asm> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
+ }
+
+ def XHr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR16, asm> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
+ }
+
+ def WSr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR32, asm> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
+ }
+
+ def XDr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR64, asm> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
+ }
+
+ def HWr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR32, asm> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
+ }
+
+ def HXr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR64, asm> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
+ }
+
+ def SWr : BaseUnscaledConversion<0b00, 0b110, FPR32, GPR32, asm> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
+ }
+
+ def DXr : BaseUnscaledConversion<0b00, 0b110, FPR64, GPR64, asm> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
+ }
+
+ def XDHighr : BaseUnscaledConversionToHigh<0b01, 0b111, GPR64, V128,
+ asm, ".d"> {
+ let Inst{31} = 1;
+ let Inst{22} = 0;
+ }
+
+ def DXHighr : BaseUnscaledConversionFromHigh<0b01, 0b110, V128, GPR64,
+ asm, ".d"> {
+ let Inst{31} = 1;
+ let Inst{22} = 0;
+ }
+}
+
+//---
+// Floating point conversion
+//---
+
+class BaseFPConversion<bits<2> type, bits<2> opcode, RegisterClass dstType,
+ RegisterClass srcType, string asm, list<dag> pattern>
+ : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", pattern>,
+ Sched<[WriteFCvt]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-24} = 0b00011110;
+ let Inst{23-22} = type;
+ let Inst{21-17} = 0b10001;
+ let Inst{16-15} = opcode;
+ let Inst{14-10} = 0b10000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass FPConversion<string asm> {
+ // Double-precision to Half-precision
+ def HDr : BaseFPConversion<0b01, 0b11, FPR16, FPR64, asm,
+ [(set FPR16:$Rd, (fpround FPR64:$Rn))]>;
+
+ // Double-precision to Single-precision
+ def SDr : BaseFPConversion<0b01, 0b00, FPR32, FPR64, asm,
+ [(set FPR32:$Rd, (fpround FPR64:$Rn))]>;
+
+ // Half-precision to Double-precision
+ def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm,
+ [(set FPR64:$Rd, (fpextend FPR16:$Rn))]>;
+
+ // Half-precision to Single-precision
+ def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm,
+ [(set FPR32:$Rd, (fpextend FPR16:$Rn))]>;
+
+ // Single-precision to Double-precision
+ def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm,
+ [(set FPR64:$Rd, (fpextend FPR32:$Rn))]>;
+
+ // Single-precision to Half-precision
+ def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm,
+ [(set FPR16:$Rd, (fpround FPR32:$Rn))]>;
+}
+
+//---
+// Single operand floating point data processing
+//---
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
+ ValueType vt, string asm, SDPatternOperator node>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "",
+ [(set (vt regtype:$Rd), (node (vt regtype:$Rn)))]>,
+ Sched<[WriteF]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-24} = 0b00011110;
+ let Inst{21-19} = 0b100;
+ let Inst{18-15} = opcode;
+ let Inst{14-10} = 0b10000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SingleOperandFPData<bits<4> opcode, string asm,
+ SDPatternOperator node = null_frag> {
+ def Hr : BaseSingleOperandFPData<opcode, FPR16, f16, asm, node> {
+ let Inst{23-22} = 0b11; // 16-bit size flag
+ let Predicates = [HasFullFP16];
+ }
+
+ def Sr : BaseSingleOperandFPData<opcode, FPR32, f32, asm, node> {
+ let Inst{23-22} = 0b00; // 32-bit size flag
+ }
+
+ def Dr : BaseSingleOperandFPData<opcode, FPR64, f64, asm, node> {
+ let Inst{23-22} = 0b01; // 64-bit size flag
+ }
+}
+
+//---
+// Two operand floating point data processing
+//---
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseTwoOperandFPData<bits<4> opcode, RegisterClass regtype,
+ string asm, list<dag> pat>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
+ asm, "\t$Rd, $Rn, $Rm", "", pat>,
+ Sched<[WriteF]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-24} = 0b00011110;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass TwoOperandFPData<bits<4> opcode, string asm,
+ SDPatternOperator node = null_frag> {
+ def Hrr : BaseTwoOperandFPData<opcode, FPR16, asm,
+ [(set (f16 FPR16:$Rd),
+ (node (f16 FPR16:$Rn), (f16 FPR16:$Rm)))]> {
+ let Inst{23-22} = 0b11; // 16-bit size flag
+ let Predicates = [HasFullFP16];
+ }
+
+ def Srr : BaseTwoOperandFPData<opcode, FPR32, asm,
+ [(set (f32 FPR32:$Rd),
+ (node (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]> {
+ let Inst{23-22} = 0b00; // 32-bit size flag
+ }
+
+ def Drr : BaseTwoOperandFPData<opcode, FPR64, asm,
+ [(set (f64 FPR64:$Rd),
+ (node (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]> {
+ let Inst{23-22} = 0b01; // 64-bit size flag
+ }
+}
+
+multiclass TwoOperandFPDataNeg<bits<4> opcode, string asm, SDNode node> {
+ def Hrr : BaseTwoOperandFPData<opcode, FPR16, asm,
+ [(set FPR16:$Rd, (fneg (node FPR16:$Rn, (f16 FPR16:$Rm))))]> {
+ let Inst{23-22} = 0b11; // 16-bit size flag
+ let Predicates = [HasFullFP16];
+ }
+
+ def Srr : BaseTwoOperandFPData<opcode, FPR32, asm,
+ [(set FPR32:$Rd, (fneg (node FPR32:$Rn, (f32 FPR32:$Rm))))]> {
+ let Inst{23-22} = 0b00; // 32-bit size flag
+ }
+
+ def Drr : BaseTwoOperandFPData<opcode, FPR64, asm,
+ [(set FPR64:$Rd, (fneg (node FPR64:$Rn, (f64 FPR64:$Rm))))]> {
+ let Inst{23-22} = 0b01; // 64-bit size flag
+ }
+}
+
+
+//---
+// Three operand floating point data processing
+//---
+
+class BaseThreeOperandFPData<bit isNegated, bit isSub,
+ RegisterClass regtype, string asm, list<dag> pat>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, regtype: $Ra),
+ asm, "\t$Rd, $Rn, $Rm, $Ra", "", pat>,
+ Sched<[WriteFMul]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<5> Ra;
+ let Inst{31-24} = 0b00011111;
+ let Inst{21} = isNegated;
+ let Inst{20-16} = Rm;
+ let Inst{15} = isSub;
+ let Inst{14-10} = Ra;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass ThreeOperandFPData<bit isNegated, bit isSub,string asm,
+ SDPatternOperator node> {
+ def Hrrr : BaseThreeOperandFPData<isNegated, isSub, FPR16, asm,
+ [(set FPR16:$Rd,
+ (node (f16 FPR16:$Rn), (f16 FPR16:$Rm), (f16 FPR16:$Ra)))]> {
+ let Inst{23-22} = 0b11; // 16-bit size flag
+ let Predicates = [HasFullFP16];
+ }
+
+ def Srrr : BaseThreeOperandFPData<isNegated, isSub, FPR32, asm,
+ [(set FPR32:$Rd,
+ (node (f32 FPR32:$Rn), (f32 FPR32:$Rm), (f32 FPR32:$Ra)))]> {
+ let Inst{23-22} = 0b00; // 32-bit size flag
+ }
+
+ def Drrr : BaseThreeOperandFPData<isNegated, isSub, FPR64, asm,
+ [(set FPR64:$Rd,
+ (node (f64 FPR64:$Rn), (f64 FPR64:$Rm), (f64 FPR64:$Ra)))]> {
+ let Inst{23-22} = 0b01; // 64-bit size flag
+ }
+}
+
+//---
+// Floating point data comparisons
+//---
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseOneOperandFPComparison<bit signalAllNans,
+ RegisterClass regtype, string asm,
+ list<dag> pat>
+ : I<(outs), (ins regtype:$Rn), asm, "\t$Rn, #0.0", "", pat>,
+ Sched<[WriteFCmp]> {
+ bits<5> Rn;
+ let Inst{31-24} = 0b00011110;
+ let Inst{21} = 1;
+
+ let Inst{15-10} = 0b001000;
+ let Inst{9-5} = Rn;
+ let Inst{4} = signalAllNans;
+ let Inst{3-0} = 0b1000;
+
+ // Rm should be 0b00000 canonically, but we need to accept any value.
+ let PostEncoderMethod = "fixOneOperandFPComparison";
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseTwoOperandFPComparison<bit signalAllNans, RegisterClass regtype,
+ string asm, list<dag> pat>
+ : I<(outs), (ins regtype:$Rn, regtype:$Rm), asm, "\t$Rn, $Rm", "", pat>,
+ Sched<[WriteFCmp]> {
+ bits<5> Rm;
+ bits<5> Rn;
+ let Inst{31-24} = 0b00011110;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15-10} = 0b001000;
+ let Inst{9-5} = Rn;
+ let Inst{4} = signalAllNans;
+ let Inst{3-0} = 0b0000;
+}
+
+multiclass FPComparison<bit signalAllNans, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ let Defs = [NZCV] in {
+ def Hrr : BaseTwoOperandFPComparison<signalAllNans, FPR16, asm,
+ [(OpNode FPR16:$Rn, (f16 FPR16:$Rm)), (implicit NZCV)]> {
+ let Inst{23-22} = 0b11;
+ let Predicates = [HasFullFP16];
+ }
+
+ def Hri : BaseOneOperandFPComparison<signalAllNans, FPR16, asm,
+ [(OpNode (f16 FPR16:$Rn), fpimm0), (implicit NZCV)]> {
+ let Inst{23-22} = 0b11;
+ let Predicates = [HasFullFP16];
+ }
+
+ def Srr : BaseTwoOperandFPComparison<signalAllNans, FPR32, asm,
+ [(OpNode FPR32:$Rn, (f32 FPR32:$Rm)), (implicit NZCV)]> {
+ let Inst{23-22} = 0b00;
+ }
+
+ def Sri : BaseOneOperandFPComparison<signalAllNans, FPR32, asm,
+ [(OpNode (f32 FPR32:$Rn), fpimm0), (implicit NZCV)]> {
+ let Inst{23-22} = 0b00;
+ }
+
+ def Drr : BaseTwoOperandFPComparison<signalAllNans, FPR64, asm,
+ [(OpNode FPR64:$Rn, (f64 FPR64:$Rm)), (implicit NZCV)]> {
+ let Inst{23-22} = 0b01;
+ }
+
+ def Dri : BaseOneOperandFPComparison<signalAllNans, FPR64, asm,
+ [(OpNode (f64 FPR64:$Rn), fpimm0), (implicit NZCV)]> {
+ let Inst{23-22} = 0b01;
+ }
+ } // Defs = [NZCV]
+}
+
+//---
+// Floating point conditional comparisons
+//---
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseFPCondComparison<bit signalAllNans, RegisterClass regtype,
+ string mnemonic, list<dag> pat>
+ : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond),
+ mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", pat>,
+ Sched<[WriteFCmp]> {
+ let Uses = [NZCV];
+ let Defs = [NZCV];
+
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> nzcv;
+ bits<4> cond;
+
+ let Inst{31-24} = 0b00011110;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = cond;
+ let Inst{11-10} = 0b01;
+ let Inst{9-5} = Rn;
+ let Inst{4} = signalAllNans;
+ let Inst{3-0} = nzcv;
+}
+
+multiclass FPCondComparison<bit signalAllNans, string mnemonic,
+ SDPatternOperator OpNode = null_frag> {
+ def Hrr : BaseFPCondComparison<signalAllNans, FPR16, mnemonic,
+ [(set NZCV, (OpNode (f16 FPR16:$Rn), (f16 FPR16:$Rm), (i32 imm:$nzcv),
+ (i32 imm:$cond), NZCV))]> {
+ let Inst{23-22} = 0b11;
+ let Predicates = [HasFullFP16];
+ }
+
+ def Srr : BaseFPCondComparison<signalAllNans, FPR32, mnemonic,
+ [(set NZCV, (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm), (i32 imm:$nzcv),
+ (i32 imm:$cond), NZCV))]> {
+ let Inst{23-22} = 0b00;
+ }
+
+ def Drr : BaseFPCondComparison<signalAllNans, FPR64, mnemonic,
+ [(set NZCV, (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm), (i32 imm:$nzcv),
+ (i32 imm:$cond), NZCV))]> {
+ let Inst{23-22} = 0b01;
+ }
+}
+
+//---
+// Floating point conditional select
+//---
+
+class BaseFPCondSelect<RegisterClass regtype, ValueType vt, string asm>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond),
+ asm, "\t$Rd, $Rn, $Rm, $cond", "",
+ [(set regtype:$Rd,
+ (AArch64csel (vt regtype:$Rn), regtype:$Rm,
+ (i32 imm:$cond), NZCV))]>,
+ Sched<[WriteF]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> cond;
+
+ let Inst{31-24} = 0b00011110;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = cond;
+ let Inst{11-10} = 0b11;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass FPCondSelect<string asm> {
+ let Uses = [NZCV] in {
+ def Hrrr : BaseFPCondSelect<FPR16, f16, asm> {
+ let Inst{23-22} = 0b11;
+ let Predicates = [HasFullFP16];
+ }
+
+ def Srrr : BaseFPCondSelect<FPR32, f32, asm> {
+ let Inst{23-22} = 0b00;
+ }
+
+ def Drrr : BaseFPCondSelect<FPR64, f64, asm> {
+ let Inst{23-22} = 0b01;
+ }
+ } // Uses = [NZCV]
+}
+
+//---
+// Floating move immediate
+//---
+
+class BaseFPMoveImmediate<RegisterClass regtype, Operand fpimmtype, string asm>
+ : I<(outs regtype:$Rd), (ins fpimmtype:$imm), asm, "\t$Rd, $imm", "",
+ [(set regtype:$Rd, fpimmtype:$imm)]>,
+ Sched<[WriteFImm]> {
+ bits<5> Rd;
+ bits<8> imm;
+ let Inst{31-24} = 0b00011110;
+ let Inst{21} = 1;
+ let Inst{20-13} = imm;
+ let Inst{12-5} = 0b10000000;
+ let Inst{4-0} = Rd;
+}
+
+multiclass FPMoveImmediate<string asm> {
+ def Hi : BaseFPMoveImmediate<FPR16, fpimm16, asm> {
+ let Inst{23-22} = 0b11;
+ let Predicates = [HasFullFP16];
+ }
+
+ def Si : BaseFPMoveImmediate<FPR32, fpimm32, asm> {
+ let Inst{23-22} = 0b00;
+ }
+
+ def Di : BaseFPMoveImmediate<FPR64, fpimm64, asm> {
+ let Inst{23-22} = 0b01;
+ }
+}
+} // end of 'let Predicates = [HasFPARMv8]'
+
+//----------------------------------------------------------------------------
+// AdvSIMD
+//----------------------------------------------------------------------------
+
+let Predicates = [HasNEON] in {
+
+//----------------------------------------------------------------------------
+// AdvSIMD three register vector instructions
+//----------------------------------------------------------------------------
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDThreeSameVector<bit Q, bit U, bits<3> size, bits<5> opcode,
+ RegisterOperand regtype, string asm, string kind,
+ list<dag> pattern>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
+ "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
+ "|" # kind # "\t$Rd, $Rn, $Rm|}", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-21} = size;
+ let Inst{20-16} = Rm;
+ let Inst{15-11} = opcode;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
+ RegisterOperand regtype, string asm, string kind,
+ list<dag> pattern>
+ : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm,
+ "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
+ "|" # kind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-21} = size;
+ let Inst{20-16} = Rm;
+ let Inst{15-11} = opcode;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class BaseSIMDThreeSameVectorDot<bit Q, bit U, string asm, string kind1,
+ string kind2, RegisterOperand RegType,
+ ValueType AccumType, ValueType InputType,
+ SDPatternOperator OpNode> :
+ BaseSIMDThreeSameVectorTied<Q, U, 0b100, 0b10010, RegType, asm, kind1,
+ [(set (AccumType RegType:$dst),
+ (OpNode (AccumType RegType:$Rd),
+ (InputType RegType:$Rn),
+ (InputType RegType:$Rm)))]> {
+ let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
+}
+
+multiclass SIMDThreeSameVectorDot<bit U, string asm, SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDThreeSameVectorDot<0, U, asm, ".2s", ".8b", V64,
+ v2i32, v8i8, OpNode>;
+ def v16i8 : BaseSIMDThreeSameVectorDot<1, U, asm, ".4s", ".16b", V128,
+ v4i32, v16i8, OpNode>;
+}
+
+// All operand sizes distinguished in the encoding.
+multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64,
+ asm, ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
+ def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128,
+ asm, ".16b",
+ [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
+ def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64,
+ asm, ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
+ def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128,
+ asm, ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
+ def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64,
+ asm, ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
+ def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128,
+ asm, ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
+ def v2i64 : BaseSIMDThreeSameVector<1, U, 0b111, opc, V128,
+ asm, ".2d",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
+}
+
+// As above, but D sized elements unsupported.
+multiclass SIMDThreeSameVectorBHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64,
+ asm, ".8b",
+ [(set V64:$Rd, (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))]>;
+ def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128,
+ asm, ".16b",
+ [(set V128:$Rd, (v16i8 (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm))))]>;
+ def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64,
+ asm, ".4h",
+ [(set V64:$Rd, (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>;
+ def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128,
+ asm, ".8h",
+ [(set V128:$Rd, (v8i16 (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>;
+ def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64,
+ asm, ".2s",
+ [(set V64:$Rd, (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>;
+ def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128,
+ asm, ".4s",
+ [(set V128:$Rd, (v4i32 (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>;
+}
+
+multiclass SIMDThreeSameVectorBHSTied<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b001, opc, V64,
+ asm, ".8b",
+ [(set (v8i8 V64:$dst),
+ (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
+ def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b001, opc, V128,
+ asm, ".16b",
+ [(set (v16i8 V128:$dst),
+ (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
+ def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b011, opc, V64,
+ asm, ".4h",
+ [(set (v4i16 V64:$dst),
+ (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
+ def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b011, opc, V128,
+ asm, ".8h",
+ [(set (v8i16 V128:$dst),
+ (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
+ def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b101, opc, V64,
+ asm, ".2s",
+ [(set (v2i32 V64:$dst),
+ (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
+ def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b101, opc, V128,
+ asm, ".4s",
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
+}
+
+// As above, but only B sized elements supported.
+multiclass SIMDThreeSameVectorB<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64,
+ asm, ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
+ def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128,
+ asm, ".16b",
+ [(set (v16i8 V128:$Rd),
+ (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
+}
+
+// As above, but only floating point elements supported.
+multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<3> opc,
+ string asm, SDPatternOperator OpNode> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64,
+ asm, ".4h",
+ [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>;
+ def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128,
+ asm, ".8h",
+ [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64,
+ asm, ".2s",
+ [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
+ def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128,
+ asm, ".4s",
+ [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
+ def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128,
+ asm, ".2d",
+ [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
+}
+
+multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<3> opc,
+ string asm,
+ SDPatternOperator OpNode> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64,
+ asm, ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>;
+ def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128,
+ asm, ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64,
+ asm, ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
+ def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128,
+ asm, ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
+ def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128,
+ asm, ".2d",
+ [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
+}
+
+multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<3> opc,
+ string asm, SDPatternOperator OpNode> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDThreeSameVectorTied<0, U, {S,0b10}, {0b00,opc}, V64,
+ asm, ".4h",
+ [(set (v4f16 V64:$dst),
+ (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>;
+ def v8f16 : BaseSIMDThreeSameVectorTied<1, U, {S,0b10}, {0b00,opc}, V128,
+ asm, ".8h",
+ [(set (v8f16 V128:$dst),
+ (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0b01}, {0b11,opc}, V64,
+ asm, ".2s",
+ [(set (v2f32 V64:$dst),
+ (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
+ def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0b01}, {0b11,opc}, V128,
+ asm, ".4s",
+ [(set (v4f32 V128:$dst),
+ (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
+ def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,0b11}, {0b11,opc}, V128,
+ asm, ".2d",
+ [(set (v2f64 V128:$dst),
+ (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
+}
+
+// As above, but D and B sized elements unsupported.
+multiclass SIMDThreeSameVectorHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64,
+ asm, ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
+ def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128,
+ asm, ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
+ def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64,
+ asm, ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
+ def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128,
+ asm, ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
+}
+
+// Logical three vector ops share opcode bits, and only use B sized elements.
+multiclass SIMDLogicalThreeVector<bit U, bits<2> size, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v8i8 : BaseSIMDThreeSameVector<0, U, {size,1}, 0b00011, V64,
+ asm, ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode V64:$Rn, V64:$Rm))]>;
+ def v16i8 : BaseSIMDThreeSameVector<1, U, {size,1}, 0b00011, V128,
+ asm, ".16b",
+ [(set (v16i8 V128:$Rd), (OpNode V128:$Rn, V128:$Rm))]>;
+
+ def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)),
+ (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>;
+ def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)),
+ (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>;
+ def : Pat<(v1i64 (OpNode V64:$LHS, V64:$RHS)),
+ (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>;
+
+ def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)),
+ (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>;
+ def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)),
+ (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>;
+ def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)),
+ (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>;
+}
+
+multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
+ string asm, SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDThreeSameVectorTied<0, U, {size,1}, 0b00011, V64,
+ asm, ".8b",
+ [(set (v8i8 V64:$dst),
+ (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
+ def v16i8 : BaseSIMDThreeSameVectorTied<1, U, {size,1}, 0b00011, V128,
+ asm, ".16b",
+ [(set (v16i8 V128:$dst),
+ (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn),
+ (v16i8 V128:$Rm)))]>;
+
+ def : Pat<(v4i16 (OpNode (v4i16 V64:$LHS), (v4i16 V64:$MHS),
+ (v4i16 V64:$RHS))),
+ (!cast<Instruction>(NAME#"v8i8")
+ V64:$LHS, V64:$MHS, V64:$RHS)>;
+ def : Pat<(v2i32 (OpNode (v2i32 V64:$LHS), (v2i32 V64:$MHS),
+ (v2i32 V64:$RHS))),
+ (!cast<Instruction>(NAME#"v8i8")
+ V64:$LHS, V64:$MHS, V64:$RHS)>;
+ def : Pat<(v1i64 (OpNode (v1i64 V64:$LHS), (v1i64 V64:$MHS),
+ (v1i64 V64:$RHS))),
+ (!cast<Instruction>(NAME#"v8i8")
+ V64:$LHS, V64:$MHS, V64:$RHS)>;
+
+ def : Pat<(v8i16 (OpNode (v8i16 V128:$LHS), (v8i16 V128:$MHS),
+ (v8i16 V128:$RHS))),
+ (!cast<Instruction>(NAME#"v16i8")
+ V128:$LHS, V128:$MHS, V128:$RHS)>;
+ def : Pat<(v4i32 (OpNode (v4i32 V128:$LHS), (v4i32 V128:$MHS),
+ (v4i32 V128:$RHS))),
+ (!cast<Instruction>(NAME#"v16i8")
+ V128:$LHS, V128:$MHS, V128:$RHS)>;
+ def : Pat<(v2i64 (OpNode (v2i64 V128:$LHS), (v2i64 V128:$MHS),
+ (v2i64 V128:$RHS))),
+ (!cast<Instruction>(NAME#"v16i8")
+ V128:$LHS, V128:$MHS, V128:$RHS)>;
+}
+
+
+//----------------------------------------------------------------------------
+// AdvSIMD two register vector instructions.
+//----------------------------------------------------------------------------
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
+ bits<2> size2, RegisterOperand regtype, string asm,
+ string dstkind, string srckind, list<dag> pattern>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
+ "{\t$Rd" # dstkind # ", $Rn" # srckind #
+ "|" # dstkind # "\t$Rd, $Rn}", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = size2;
+ let Inst{18-17} = 0b00;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
+ bits<2> size2, RegisterOperand regtype,
+ string asm, string dstkind, string srckind,
+ list<dag> pattern>
+ : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm,
+ "{\t$Rd" # dstkind # ", $Rn" # srckind #
+ "|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = size2;
+ let Inst{18-17} = 0b00;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+// Supports B, H, and S element sizes.
+multiclass SIMDTwoVectorBHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
+ asm, ".8b", ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
+ def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
+ asm, ".16b", ".16b",
+ [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
+ def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
+ def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
+ def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
+ def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
+}
+
+class BaseSIMDVectorLShiftLongBySize<bit Q, bits<2> size,
+ RegisterOperand regtype, string asm, string dstkind,
+ string srckind, string amount>
+ : I<(outs V128:$Rd), (ins regtype:$Rn), asm,
+ "{\t$Rd" # dstkind # ", $Rn" # srckind # ", #" # amount #
+ "|" # dstkind # "\t$Rd, $Rn, #" # amount # "}", "", []>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29-24} = 0b101110;
+ let Inst{23-22} = size;
+ let Inst{21-10} = 0b100001001110;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDVectorLShiftLongBySizeBHS {
+ let hasSideEffects = 0 in {
+ def v8i8 : BaseSIMDVectorLShiftLongBySize<0, 0b00, V64,
+ "shll", ".8h", ".8b", "8">;
+ def v16i8 : BaseSIMDVectorLShiftLongBySize<1, 0b00, V128,
+ "shll2", ".8h", ".16b", "8">;
+ def v4i16 : BaseSIMDVectorLShiftLongBySize<0, 0b01, V64,
+ "shll", ".4s", ".4h", "16">;
+ def v8i16 : BaseSIMDVectorLShiftLongBySize<1, 0b01, V128,
+ "shll2", ".4s", ".8h", "16">;
+ def v2i32 : BaseSIMDVectorLShiftLongBySize<0, 0b10, V64,
+ "shll", ".2d", ".2s", "32">;
+ def v4i32 : BaseSIMDVectorLShiftLongBySize<1, 0b10, V128,
+ "shll2", ".2d", ".4s", "32">;
+ }
+}
+
+// Supports all element sizes.
+multiclass SIMDLongTwoVector<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
+ asm, ".4h", ".8b",
+ [(set (v4i16 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
+ def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
+ asm, ".8h", ".16b",
+ [(set (v8i16 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
+ def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
+ asm, ".2s", ".4h",
+ [(set (v2i32 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
+ def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
+ asm, ".4s", ".8h",
+ [(set (v4i32 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
+ def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64,
+ asm, ".1d", ".2s",
+ [(set (v1i64 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
+ def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128,
+ asm, ".2d", ".4s",
+ [(set (v2i64 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
+}
+
+multiclass SIMDLongTwoVectorTied<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64,
+ asm, ".4h", ".8b",
+ [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd),
+ (v8i8 V64:$Rn)))]>;
+ def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128,
+ asm, ".8h", ".16b",
+ [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd),
+ (v16i8 V128:$Rn)))]>;
+ def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64,
+ asm, ".2s", ".4h",
+ [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd),
+ (v4i16 V64:$Rn)))]>;
+ def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128,
+ asm, ".4s", ".8h",
+ [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd),
+ (v8i16 V128:$Rn)))]>;
+ def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64,
+ asm, ".1d", ".2s",
+ [(set (v1i64 V64:$dst), (OpNode (v1i64 V64:$Rd),
+ (v2i32 V64:$Rn)))]>;
+ def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128,
+ asm, ".2d", ".4s",
+ [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd),
+ (v4i32 V128:$Rn)))]>;
+}
+
+// Supports all element sizes, except 1xD.
+multiclass SIMDTwoVectorBHSDTied<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64,
+ asm, ".8b", ".8b",
+ [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn)))]>;
+ def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128,
+ asm, ".16b", ".16b",
+ [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>;
+ def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn)))]>;
+ def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn)))]>;
+ def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn)))]>;
+ def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>;
+ def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, 0b00, V128,
+ asm, ".2d", ".2d",
+ [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn)))]>;
+}
+
+multiclass SIMDTwoVectorBHSD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
+ asm, ".8b", ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
+ def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
+ asm, ".16b", ".16b",
+ [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
+ def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
+ def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
+ def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
+ def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
+ def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, 0b00, V128,
+ asm, ".2d", ".2d",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
+}
+
+
+// Supports only B element sizes.
+multiclass SIMDTwoVectorB<bit U, bits<2> size, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, 0b00, V64,
+ asm, ".8b", ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
+ def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, 0b00, V128,
+ asm, ".16b", ".16b",
+ [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
+
+}
+
+// Supports only B and H element sizes.
+multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
+ asm, ".8b", ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode V64:$Rn))]>;
+ def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
+ asm, ".16b", ".16b",
+ [(set (v16i8 V128:$Rd), (OpNode V128:$Rn))]>;
+ def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode V64:$Rn))]>;
+ def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>;
+}
+
+// Supports only S and D element sizes, uses high bit of the size field
+// as an extra opcode bit.
+multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64,
+ asm, ".4h", ".4h",
+ [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>;
+ def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128,
+ asm, ".8h", ".8h",
+ [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
+ asm, ".2s", ".2s",
+ [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
+ def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
+ asm, ".4s", ".4s",
+ [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
+ def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128,
+ asm, ".2d", ".2d",
+ [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
+}
+
+// Supports only S element size.
+multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
+ def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
+}
+
+
+multiclass SIMDTwoVectorFPToInt<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>;
+ def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
+ def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
+ def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128,
+ asm, ".2d", ".2d",
+ [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
+}
+
+multiclass SIMDTwoVectorIntToFP<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64,
+ asm, ".4h", ".4h",
+ [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
+ def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128,
+ asm, ".8h", ".8h",
+ [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
+ asm, ".2s", ".2s",
+ [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
+ def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
+ asm, ".4s", ".4s",
+ [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
+ def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128,
+ asm, ".2d", ".2d",
+ [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
+}
+
+
+class BaseSIMDMixedTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand inreg, RegisterOperand outreg,
+ string asm, string outkind, string inkind,
+ list<dag> pattern>
+ : I<(outs outreg:$Rd), (ins inreg:$Rn), asm,
+ "{\t$Rd" # outkind # ", $Rn" # inkind #
+ "|" # outkind # "\t$Rd, $Rn}", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b10000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class BaseSIMDMixedTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand inreg, RegisterOperand outreg,
+ string asm, string outkind, string inkind,
+ list<dag> pattern>
+ : I<(outs outreg:$dst), (ins outreg:$Rd, inreg:$Rn), asm,
+ "{\t$Rd" # outkind # ", $Rn" # inkind #
+ "|" # outkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b10000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDMixedTwoVector<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDMixedTwoVector<0, U, 0b00, opc, V128, V64,
+ asm, ".8b", ".8h",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
+ def v16i8 : BaseSIMDMixedTwoVectorTied<1, U, 0b00, opc, V128, V128,
+ asm#"2", ".16b", ".8h", []>;
+ def v4i16 : BaseSIMDMixedTwoVector<0, U, 0b01, opc, V128, V64,
+ asm, ".4h", ".4s",
+ [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
+ def v8i16 : BaseSIMDMixedTwoVectorTied<1, U, 0b01, opc, V128, V128,
+ asm#"2", ".8h", ".4s", []>;
+ def v2i32 : BaseSIMDMixedTwoVector<0, U, 0b10, opc, V128, V64,
+ asm, ".2s", ".2d",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
+ def v4i32 : BaseSIMDMixedTwoVectorTied<1, U, 0b10, opc, V128, V128,
+ asm#"2", ".4s", ".2d", []>;
+
+ def : Pat<(concat_vectors (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn))),
+ (!cast<Instruction>(NAME # "v16i8")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
+ def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn))),
+ (!cast<Instruction>(NAME # "v8i16")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
+ def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn))),
+ (!cast<Instruction>(NAME # "v4i32")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
+}
+
+class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,
+ bits<5> opcode, RegisterOperand regtype, string asm,
+ string kind, string zero, ValueType dty,
+ ValueType sty, SDNode OpNode>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
+ "{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero #
+ "|" # kind # "\t$Rd, $Rn, #" # zero # "}", "",
+ [(set (dty regtype:$Rd), (OpNode (sty regtype:$Rn)))]>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = size2;
+ let Inst{18-17} = 0b00;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+// Comparisons support all element sizes, except 1xD.
+multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm,
+ SDNode OpNode> {
+ def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, 0b00, opc, V64,
+ asm, ".8b", "0",
+ v8i8, v8i8, OpNode>;
+ def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, 0b00, opc, V128,
+ asm, ".16b", "0",
+ v16i8, v16i8, OpNode>;
+ def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, 0b00, opc, V64,
+ asm, ".4h", "0",
+ v4i16, v4i16, OpNode>;
+ def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, 0b00, opc, V128,
+ asm, ".8h", "0",
+ v8i16, v8i16, OpNode>;
+ def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, 0b00, opc, V64,
+ asm, ".2s", "0",
+ v2i32, v2i32, OpNode>;
+ def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, 0b00, opc, V128,
+ asm, ".4s", "0",
+ v4i32, v4i32, OpNode>;
+ def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, 0b00, opc, V128,
+ asm, ".2d", "0",
+ v2i64, v2i64, OpNode>;
+}
+
+// FP Comparisons support only S and D element sizes (and H for v8.2a).
+multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
+ string asm, SDNode OpNode> {
+
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16rz : BaseSIMDCmpTwoVector<0, U, {S,1}, 0b11, opc, V64,
+ asm, ".4h", "0.0",
+ v4i16, v4f16, OpNode>;
+ def v8i16rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b11, opc, V128,
+ asm, ".8h", "0.0",
+ v8i16, v8f16, OpNode>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, 0b00, opc, V64,
+ asm, ".2s", "0.0",
+ v2i32, v2f32, OpNode>;
+ def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, 0b00, opc, V128,
+ asm, ".4s", "0.0",
+ v4i32, v4f32, OpNode>;
+ def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b00, opc, V128,
+ asm, ".2d", "0.0",
+ v2i64, v2f64, OpNode>;
+
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def : InstAlias<asm # "\t$Vd.4h, $Vn.4h, #0",
+ (!cast<Instruction>(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>;
+ def : InstAlias<asm # "\t$Vd.8h, $Vn.8h, #0",
+ (!cast<Instruction>(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>;
+ }
+ def : InstAlias<asm # "\t$Vd.2s, $Vn.2s, #0",
+ (!cast<Instruction>(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>;
+ def : InstAlias<asm # "\t$Vd.4s, $Vn.4s, #0",
+ (!cast<Instruction>(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>;
+ def : InstAlias<asm # "\t$Vd.2d, $Vn.2d, #0",
+ (!cast<Instruction>(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def : InstAlias<asm # ".4h\t$Vd, $Vn, #0",
+ (!cast<Instruction>(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>;
+ def : InstAlias<asm # ".8h\t$Vd, $Vn, #0",
+ (!cast<Instruction>(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>;
+ }
+ def : InstAlias<asm # ".2s\t$Vd, $Vn, #0",
+ (!cast<Instruction>(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>;
+ def : InstAlias<asm # ".4s\t$Vd, $Vn, #0",
+ (!cast<Instruction>(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>;
+ def : InstAlias<asm # ".2d\t$Vd, $Vn, #0",
+ (!cast<Instruction>(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDFPCvtTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand outtype, RegisterOperand intype,
+ string asm, string VdTy, string VnTy,
+ list<dag> pattern>
+ : I<(outs outtype:$Rd), (ins intype:$Rn), asm,
+ !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b10000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class BaseSIMDFPCvtTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand outtype, RegisterOperand intype,
+ string asm, string VdTy, string VnTy,
+ list<dag> pattern>
+ : I<(outs outtype:$dst), (ins outtype:$Rd, intype:$Rn), asm,
+ !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b10000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDFPWidenTwoVector<bit U, bit S, bits<5> opc, string asm> {
+ def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V128, V64,
+ asm, ".4s", ".4h", []>;
+ def v8i16 : BaseSIMDFPCvtTwoVector<1, U, {S,0}, opc, V128, V128,
+ asm#"2", ".4s", ".8h", []>;
+ def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V128, V64,
+ asm, ".2d", ".2s", []>;
+ def v4i32 : BaseSIMDFPCvtTwoVector<1, U, {S,1}, opc, V128, V128,
+ asm#"2", ".2d", ".4s", []>;
+}
+
+multiclass SIMDFPNarrowTwoVector<bit U, bit S, bits<5> opc, string asm> {
+ def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V64, V128,
+ asm, ".4h", ".4s", []>;
+ def v8i16 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,0}, opc, V128, V128,
+ asm#"2", ".8h", ".4s", []>;
+ def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128,
+ asm, ".2s", ".2d", []>;
+ def v4i32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128,
+ asm#"2", ".4s", ".2d", []>;
+}
+
+multiclass SIMDFPInexactCvtTwoVector<bit U, bit S, bits<5> opc, string asm,
+ Intrinsic OpNode> {
+ def v2f32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128,
+ asm, ".2s", ".2d",
+ [(set (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
+ def v4f32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128,
+ asm#"2", ".4s", ".2d", []>;
+
+ def : Pat<(concat_vectors (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn))),
+ (!cast<Instruction>(NAME # "v4f32")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD three register different-size vector instructions.
+//----------------------------------------------------------------------------
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDDifferentThreeVector<bit U, bits<3> size, bits<4> opcode,
+ RegisterOperand outtype, RegisterOperand intype1,
+ RegisterOperand intype2, string asm,
+ string outkind, string inkind1, string inkind2,
+ list<dag> pattern>
+ : I<(outs outtype:$Rd), (ins intype1:$Rn, intype2:$Rm), asm,
+ "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
+ "|" # outkind # "\t$Rd, $Rn, $Rm}", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31} = 0;
+ let Inst{30} = size{0};
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size{2-1};
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = opcode;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDDifferentThreeVectorTied<bit U, bits<3> size, bits<4> opcode,
+ RegisterOperand outtype, RegisterOperand intype1,
+ RegisterOperand intype2, string asm,
+ string outkind, string inkind1, string inkind2,
+ list<dag> pattern>
+ : I<(outs outtype:$dst), (ins outtype:$Rd, intype1:$Rn, intype2:$Rm), asm,
+ "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
+ "|" # outkind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31} = 0;
+ let Inst{30} = size{0};
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size{2-1};
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = opcode;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+// FIXME: TableGen doesn't know how to deal with expanded types that also
+// change the element count (in this case, placing the results in
+// the high elements of the result register rather than the low
+// elements). Until that's fixed, we can't code-gen those.
+multiclass SIMDNarrowThreeVectorBHS<bit U, bits<4> opc, string asm,
+ Intrinsic IntOp> {
+ def v8i16_v8i8 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
+ V64, V128, V128,
+ asm, ".8b", ".8h", ".8h",
+ [(set (v8i8 V64:$Rd), (IntOp (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
+ def v8i16_v16i8 : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc,
+ V128, V128, V128,
+ asm#"2", ".16b", ".8h", ".8h",
+ []>;
+ def v4i32_v4i16 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
+ V64, V128, V128,
+ asm, ".4h", ".4s", ".4s",
+ [(set (v4i16 V64:$Rd), (IntOp (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
+ def v4i32_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc,
+ V128, V128, V128,
+ asm#"2", ".8h", ".4s", ".4s",
+ []>;
+ def v2i64_v2i32 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
+ V64, V128, V128,
+ asm, ".2s", ".2d", ".2d",
+ [(set (v2i32 V64:$Rd), (IntOp (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
+ def v2i64_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc,
+ V128, V128, V128,
+ asm#"2", ".4s", ".2d", ".2d",
+ []>;
+
+
+ // Patterns for the '2' variants involve INSERT_SUBREG, which you can't put in
+ // a version attached to an instruction.
+ def : Pat<(concat_vectors (v8i8 V64:$Rd), (IntOp (v8i16 V128:$Rn),
+ (v8i16 V128:$Rm))),
+ (!cast<Instruction>(NAME # "v8i16_v16i8")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+ def : Pat<(concat_vectors (v4i16 V64:$Rd), (IntOp (v4i32 V128:$Rn),
+ (v4i32 V128:$Rm))),
+ (!cast<Instruction>(NAME # "v4i32_v8i16")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+ def : Pat<(concat_vectors (v2i32 V64:$Rd), (IntOp (v2i64 V128:$Rn),
+ (v2i64 V128:$Rm))),
+ (!cast<Instruction>(NAME # "v2i64_v4i32")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+}
+
+multiclass SIMDDifferentThreeVectorBD<bit U, bits<4> opc, string asm,
+ Intrinsic IntOp> {
+ def v8i8 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
+ V128, V64, V64,
+ asm, ".8h", ".8b", ".8b",
+ [(set (v8i16 V128:$Rd), (IntOp (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
+ def v16i8 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
+ V128, V128, V128,
+ asm#"2", ".8h", ".16b", ".16b", []>;
+ let Predicates = [HasAES] in {
+ def v1i64 : BaseSIMDDifferentThreeVector<U, 0b110, opc,
+ V128, V64, V64,
+ asm, ".1q", ".1d", ".1d", []>;
+ def v2i64 : BaseSIMDDifferentThreeVector<U, 0b111, opc,
+ V128, V128, V128,
+ asm#"2", ".1q", ".2d", ".2d", []>;
+ }
+
+ def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 V128:$Rn)),
+ (v8i8 (extract_high_v16i8 V128:$Rm)))),
+ (!cast<Instruction>(NAME#"v16i8") V128:$Rn, V128:$Rm)>;
+}
+
+multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
+ V128, V64, V64,
+ asm, ".4s", ".4h", ".4h",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
+ def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
+ V128, V128, V128,
+ asm#"2", ".4s", ".8h", ".8h",
+ [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 V128:$Rm)))]>;
+ def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
+ V128, V64, V64,
+ asm, ".2d", ".2s", ".2s",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
+ def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
+ V128, V128, V128,
+ asm#"2", ".2d", ".4s", ".4s",
+ [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 V128:$Rm)))]>;
+}
+
+multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
+ V128, V64, V64,
+ asm, ".8h", ".8b", ".8b",
+ [(set (v8i16 V128:$Rd),
+ (zext (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))))]>;
+ def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
+ V128, V128, V128,
+ asm#"2", ".8h", ".16b", ".16b",
+ [(set (v8i16 V128:$Rd),
+ (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn),
+ (extract_high_v16i8 V128:$Rm)))))]>;
+ def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
+ V128, V64, V64,
+ asm, ".4s", ".4h", ".4h",
+ [(set (v4i32 V128:$Rd),
+ (zext (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))))]>;
+ def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
+ V128, V128, V128,
+ asm#"2", ".4s", ".8h", ".8h",
+ [(set (v4i32 V128:$Rd),
+ (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 V128:$Rm)))))]>;
+ def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
+ V128, V64, V64,
+ asm, ".2d", ".2s", ".2s",
+ [(set (v2i64 V128:$Rd),
+ (zext (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))))]>;
+ def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
+ V128, V128, V128,
+ asm#"2", ".2d", ".4s", ".4s",
+ [(set (v2i64 V128:$Rd),
+ (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 V128:$Rm)))))]>;
+}
+
+multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc,
+ string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b000, opc,
+ V128, V64, V64,
+ asm, ".8h", ".8b", ".8b",
+ [(set (v8i16 V128:$dst),
+ (add (v8i16 V128:$Rd),
+ (zext (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))))]>;
+ def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc,
+ V128, V128, V128,
+ asm#"2", ".8h", ".16b", ".16b",
+ [(set (v8i16 V128:$dst),
+ (add (v8i16 V128:$Rd),
+ (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn),
+ (extract_high_v16i8 V128:$Rm))))))]>;
+ def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
+ V128, V64, V64,
+ asm, ".4s", ".4h", ".4h",
+ [(set (v4i32 V128:$dst),
+ (add (v4i32 V128:$Rd),
+ (zext (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))))]>;
+ def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc,
+ V128, V128, V128,
+ asm#"2", ".4s", ".8h", ".8h",
+ [(set (v4i32 V128:$dst),
+ (add (v4i32 V128:$Rd),
+ (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 V128:$Rm))))))]>;
+ def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
+ V128, V64, V64,
+ asm, ".2d", ".2s", ".2s",
+ [(set (v2i64 V128:$dst),
+ (add (v2i64 V128:$Rd),
+ (zext (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))))]>;
+ def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc,
+ V128, V128, V128,
+ asm#"2", ".2d", ".4s", ".4s",
+ [(set (v2i64 V128:$dst),
+ (add (v2i64 V128:$Rd),
+ (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 V128:$Rm))))))]>;
+}
+
+multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
+ V128, V64, V64,
+ asm, ".8h", ".8b", ".8b",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
+ def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
+ V128, V128, V128,
+ asm#"2", ".8h", ".16b", ".16b",
+ [(set (v8i16 V128:$Rd), (OpNode (extract_high_v16i8 V128:$Rn),
+ (extract_high_v16i8 V128:$Rm)))]>;
+ def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
+ V128, V64, V64,
+ asm, ".4s", ".4h", ".4h",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
+ def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
+ V128, V128, V128,
+ asm#"2", ".4s", ".8h", ".8h",
+ [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 V128:$Rm)))]>;
+ def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
+ V128, V64, V64,
+ asm, ".2d", ".2s", ".2s",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
+ def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
+ V128, V128, V128,
+ asm#"2", ".2d", ".4s", ".4s",
+ [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 V128:$Rm)))]>;
+}
+
+multiclass SIMDLongThreeVectorTiedBHS<bit U, bits<4> opc,
+ string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b000, opc,
+ V128, V64, V64,
+ asm, ".8h", ".8b", ".8b",
+ [(set (v8i16 V128:$dst),
+ (OpNode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
+ def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc,
+ V128, V128, V128,
+ asm#"2", ".8h", ".16b", ".16b",
+ [(set (v8i16 V128:$dst),
+ (OpNode (v8i16 V128:$Rd),
+ (extract_high_v16i8 V128:$Rn),
+ (extract_high_v16i8 V128:$Rm)))]>;
+ def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
+ V128, V64, V64,
+ asm, ".4s", ".4h", ".4h",
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
+ def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc,
+ V128, V128, V128,
+ asm#"2", ".4s", ".8h", ".8h",
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd),
+ (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 V128:$Rm)))]>;
+ def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
+ V128, V64, V64,
+ asm, ".2d", ".2s", ".2s",
+ [(set (v2i64 V128:$dst),
+ (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
+ def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc,
+ V128, V128, V128,
+ asm#"2", ".2d", ".4s", ".4s",
+ [(set (v2i64 V128:$dst),
+ (OpNode (v2i64 V128:$Rd),
+ (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 V128:$Rm)))]>;
+}
+
+multiclass SIMDLongThreeVectorSQDMLXTiedHS<bit U, bits<4> opc, string asm,
+ SDPatternOperator Accum> {
+ def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
+ V128, V64, V64,
+ asm, ".4s", ".4h", ".4h",
+ [(set (v4i32 V128:$dst),
+ (Accum (v4i32 V128:$Rd),
+ (v4i32 (int_aarch64_neon_sqdmull (v4i16 V64:$Rn),
+ (v4i16 V64:$Rm)))))]>;
+ def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc,
+ V128, V128, V128,
+ asm#"2", ".4s", ".8h", ".8h",
+ [(set (v4i32 V128:$dst),
+ (Accum (v4i32 V128:$Rd),
+ (v4i32 (int_aarch64_neon_sqdmull (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 V128:$Rm)))))]>;
+ def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
+ V128, V64, V64,
+ asm, ".2d", ".2s", ".2s",
+ [(set (v2i64 V128:$dst),
+ (Accum (v2i64 V128:$Rd),
+ (v2i64 (int_aarch64_neon_sqdmull (v2i32 V64:$Rn),
+ (v2i32 V64:$Rm)))))]>;
+ def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc,
+ V128, V128, V128,
+ asm#"2", ".2d", ".4s", ".4s",
+ [(set (v2i64 V128:$dst),
+ (Accum (v2i64 V128:$Rd),
+ (v2i64 (int_aarch64_neon_sqdmull (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 V128:$Rm)))))]>;
+}
+
+multiclass SIMDWideThreeVectorBHS<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
+ V128, V128, V64,
+ asm, ".8h", ".8h", ".8b",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i8 V64:$Rm)))]>;
+ def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
+ V128, V128, V128,
+ asm#"2", ".8h", ".8h", ".16b",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn),
+ (extract_high_v16i8 V128:$Rm)))]>;
+ def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
+ V128, V128, V64,
+ asm, ".4s", ".4s", ".4h",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i16 V64:$Rm)))]>;
+ def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
+ V128, V128, V128,
+ asm#"2", ".4s", ".4s", ".8h",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn),
+ (extract_high_v8i16 V128:$Rm)))]>;
+ def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
+ V128, V128, V64,
+ asm, ".2d", ".2d", ".2s",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i32 V64:$Rm)))]>;
+ def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
+ V128, V128, V128,
+ asm#"2", ".2d", ".2d", ".4s",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn),
+ (extract_high_v4i32 V128:$Rm)))]>;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD bitwise extract from vector
+//----------------------------------------------------------------------------
+
+class BaseSIMDBitwiseExtract<bit size, RegisterOperand regtype, ValueType vty,
+ string asm, string kind>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, i32imm:$imm), asm,
+ "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $imm" #
+ "|" # kind # "\t$Rd, $Rn, $Rm, $imm}", "",
+ [(set (vty regtype:$Rd),
+ (AArch64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> imm;
+ let Inst{31} = 0;
+ let Inst{30} = size;
+ let Inst{29-21} = 0b101110000;
+ let Inst{20-16} = Rm;
+ let Inst{15} = 0;
+ let Inst{14-11} = imm;
+ let Inst{10} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+
+multiclass SIMDBitwiseExtract<string asm> {
+ def v8i8 : BaseSIMDBitwiseExtract<0, V64, v8i8, asm, ".8b"> {
+ let imm{3} = 0;
+ }
+ def v16i8 : BaseSIMDBitwiseExtract<1, V128, v16i8, asm, ".16b">;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD zip vector
+//----------------------------------------------------------------------------
+
+class BaseSIMDZipVector<bits<3> size, bits<3> opc, RegisterOperand regtype,
+ string asm, string kind, SDNode OpNode, ValueType valty>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
+ "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
+ "|" # kind # "\t$Rd, $Rn, $Rm}", "",
+ [(set (valty regtype:$Rd), (OpNode regtype:$Rn, regtype:$Rm))]>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31} = 0;
+ let Inst{30} = size{0};
+ let Inst{29-24} = 0b001110;
+ let Inst{23-22} = size{2-1};
+ let Inst{21} = 0;
+ let Inst{20-16} = Rm;
+ let Inst{15} = 0;
+ let Inst{14-12} = opc;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDZipVector<bits<3>opc, string asm,
+ SDNode OpNode> {
+ def v8i8 : BaseSIMDZipVector<0b000, opc, V64,
+ asm, ".8b", OpNode, v8i8>;
+ def v16i8 : BaseSIMDZipVector<0b001, opc, V128,
+ asm, ".16b", OpNode, v16i8>;
+ def v4i16 : BaseSIMDZipVector<0b010, opc, V64,
+ asm, ".4h", OpNode, v4i16>;
+ def v8i16 : BaseSIMDZipVector<0b011, opc, V128,
+ asm, ".8h", OpNode, v8i16>;
+ def v2i32 : BaseSIMDZipVector<0b100, opc, V64,
+ asm, ".2s", OpNode, v2i32>;
+ def v4i32 : BaseSIMDZipVector<0b101, opc, V128,
+ asm, ".4s", OpNode, v4i32>;
+ def v2i64 : BaseSIMDZipVector<0b111, opc, V128,
+ asm, ".2d", OpNode, v2i64>;
+
+ def : Pat<(v4f16 (OpNode V64:$Rn, V64:$Rm)),
+ (!cast<Instruction>(NAME#"v4i16") V64:$Rn, V64:$Rm)>;
+ def : Pat<(v8f16 (OpNode V128:$Rn, V128:$Rm)),
+ (!cast<Instruction>(NAME#"v8i16") V128:$Rn, V128:$Rm)>;
+ def : Pat<(v2f32 (OpNode V64:$Rn, V64:$Rm)),
+ (!cast<Instruction>(NAME#"v2i32") V64:$Rn, V64:$Rm)>;
+ def : Pat<(v4f32 (OpNode V128:$Rn, V128:$Rm)),
+ (!cast<Instruction>(NAME#"v4i32") V128:$Rn, V128:$Rm)>;
+ def : Pat<(v2f64 (OpNode V128:$Rn, V128:$Rm)),
+ (!cast<Instruction>(NAME#"v2i64") V128:$Rn, V128:$Rm)>;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD three register scalar instructions
+//----------------------------------------------------------------------------
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDThreeScalar<bit U, bits<3> size, bits<5> opcode,
+ RegisterClass regtype, string asm,
+ list<dag> pattern>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
+ "\t$Rd, $Rn, $Rm", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-21} = size;
+ let Inst{20-16} = Rm;
+ let Inst{15-11} = opcode;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode,
+ dag oops, dag iops, string asm,
+ list<dag> pattern>
+ : I<oops, iops, asm, "\t$Rd, $Rn, $Rm", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
+ let Inst{21} = R;
+ let Inst{20-16} = Rm;
+ let Inst{15-11} = opcode;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v1i64 : BaseSIMDThreeScalar<U, 0b111, opc, FPR64, asm,
+ [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
+}
+
+multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v1i64 : BaseSIMDThreeScalar<U, 0b111, opc, FPR64, asm,
+ [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
+ def v1i32 : BaseSIMDThreeScalar<U, 0b101, opc, FPR32, asm, []>;
+ def v1i16 : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>;
+ def v1i8 : BaseSIMDThreeScalar<U, 0b001, opc, FPR8 , asm, []>;
+
+ def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
+ (!cast<Instruction>(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>;
+ def : Pat<(i32 (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))),
+ (!cast<Instruction>(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>;
+}
+
+multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v1i32 : BaseSIMDThreeScalar<U, 0b101, opc, FPR32, asm,
+ [(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
+ def v1i16 : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>;
+}
+
+multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v1i32: BaseSIMDThreeScalarTied<U, 0b10, R, opc, (outs FPR32:$dst),
+ (ins FPR32:$Rd, FPR32:$Rn, FPR32:$Rm),
+ asm, []>;
+ def v1i16: BaseSIMDThreeScalarTied<U, 0b01, R, opc, (outs FPR16:$dst),
+ (ins FPR16:$Rd, FPR16:$Rn, FPR16:$Rm),
+ asm, []>;
+}
+
+multiclass SIMDFPThreeScalar<bit U, bit S, bits<3> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ def #NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
+ [(set (f64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
+ def #NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
+ [(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def #NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
+ [(set FPR16:$Rd, (OpNode FPR16:$Rn, FPR16:$Rm))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ }
+
+ def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>;
+}
+
+multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<3> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ def #NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
+ [(set (i64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
+ def #NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
+ [(set (i32 FPR32:$Rd), (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def #NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
+ []>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ }
+
+ def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>;
+}
+
+class BaseSIMDThreeScalarMixed<bit U, bits<2> size, bits<5> opcode,
+ dag oops, dag iops, string asm, string cstr, list<dag> pat>
+ : I<oops, iops, asm,
+ "\t$Rd, $Rn, $Rm", cstr, pat>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15-11} = opcode;
+ let Inst{10} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+multiclass SIMDThreeScalarMixedHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def i16 : BaseSIMDThreeScalarMixed<U, 0b01, opc,
+ (outs FPR32:$Rd),
+ (ins FPR16:$Rn, FPR16:$Rm), asm, "", []>;
+ def i32 : BaseSIMDThreeScalarMixed<U, 0b10, opc,
+ (outs FPR64:$Rd),
+ (ins FPR32:$Rn, FPR32:$Rm), asm, "",
+ [(set (i64 FPR64:$Rd), (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm)))]>;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+multiclass SIMDThreeScalarMixedTiedHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def i16 : BaseSIMDThreeScalarMixed<U, 0b01, opc,
+ (outs FPR32:$dst),
+ (ins FPR32:$Rd, FPR16:$Rn, FPR16:$Rm),
+ asm, "$Rd = $dst", []>;
+ def i32 : BaseSIMDThreeScalarMixed<U, 0b10, opc,
+ (outs FPR64:$dst),
+ (ins FPR64:$Rd, FPR32:$Rn, FPR32:$Rm),
+ asm, "$Rd = $dst",
+ [(set (i64 FPR64:$dst),
+ (OpNode (i64 FPR64:$Rd), (i32 FPR32:$Rn), (i32 FPR32:$Rm)))]>;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD two register scalar instructions
+//----------------------------------------------------------------------------
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
+ RegisterClass regtype, RegisterClass regtype2,
+ string asm, list<dag> pat>
+ : I<(outs regtype:$Rd), (ins regtype2:$Rn), asm,
+ "\t$Rd, $Rn", "", pat>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = size2;
+ let Inst{18-17} = 0b00;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDTwoScalarTied<bit U, bits<2> size, bits<5> opcode,
+ RegisterClass regtype, RegisterClass regtype2,
+ string asm, list<dag> pat>
+ : I<(outs regtype:$dst), (ins regtype:$Rd, regtype2:$Rn), asm,
+ "\t$Rd, $Rn", "$Rd = $dst", pat>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b10000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
+ RegisterClass regtype, string asm, string zero>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
+ "\t$Rd, $Rn, #" # zero, "", []>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = size2;
+ let Inst{18-17} = 0b00;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm>
+ : I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "",
+ [(set (f32 FPR32:$Rd), (int_aarch64_sisd_fcvtxn (f64 FPR64:$Rn)))]>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-17} = 0b011111100110000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDCmpTwoScalarD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v1i64rz : BaseSIMDCmpTwoScalar<U, 0b11, 0b00, opc, FPR64, asm, "0">;
+
+ def : Pat<(v1i64 (OpNode FPR64:$Rn)),
+ (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>;
+}
+
+multiclass SIMDFPCmpTwoScalar<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v1i64rz : BaseSIMDCmpTwoScalar<U, {S,1}, 0b00, opc, FPR64, asm, "0.0">;
+ def v1i32rz : BaseSIMDCmpTwoScalar<U, {S,0}, 0b00, opc, FPR32, asm, "0.0">;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v1i16rz : BaseSIMDCmpTwoScalar<U, {S,1}, 0b11, opc, FPR16, asm, "0.0">;
+ }
+
+ def : InstAlias<asm # "\t$Rd, $Rn, #0",
+ (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rd, FPR64:$Rn), 0>;
+ def : InstAlias<asm # "\t$Rd, $Rn, #0",
+ (!cast<Instruction>(NAME # v1i32rz) FPR32:$Rd, FPR32:$Rn), 0>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def : InstAlias<asm # "\t$Rd, $Rn, #0",
+ (!cast<Instruction>(NAME # v1i16rz) FPR16:$Rd, FPR16:$Rn), 0>;
+ }
+
+ def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))),
+ (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>;
+}
+
+multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v1i64 : BaseSIMDTwoScalar<U, 0b11, 0b00, opc, FPR64, FPR64, asm,
+ [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn)))]>;
+
+ def : Pat<(i64 (OpNode (i64 FPR64:$Rn))),
+ (!cast<Instruction>(NAME # "v1i64") FPR64:$Rn)>;
+}
+
+multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
+ def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,[]>;
+ def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,[]>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,[]>;
+ }
+}
+
+multiclass SIMDFPTwoScalarCVT<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,
+ [(set FPR64:$Rd, (OpNode (f64 FPR64:$Rn)))]>;
+ def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,
+ [(set FPR32:$Rd, (OpNode (f32 FPR32:$Rn)))]>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v1i16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,
+ [(set FPR16:$Rd, (OpNode (f16 FPR16:$Rn)))]>;
+ }
+}
+
+multiclass SIMDTwoScalarBHSD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ def v1i64 : BaseSIMDTwoScalar<U, 0b11, 0b00, opc, FPR64, FPR64, asm,
+ [(set (i64 FPR64:$Rd), (OpNode (i64 FPR64:$Rn)))]>;
+ def v1i32 : BaseSIMDTwoScalar<U, 0b10, 0b00, opc, FPR32, FPR32, asm,
+ [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>;
+ def v1i16 : BaseSIMDTwoScalar<U, 0b01, 0b00, opc, FPR16, FPR16, asm, []>;
+ def v1i8 : BaseSIMDTwoScalar<U, 0b00, 0b00, opc, FPR8 , FPR8 , asm, []>;
+ }
+
+ def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))),
+ (!cast<Instruction>(NAME # v1i64) FPR64:$Rn)>;
+}
+
+multiclass SIMDTwoScalarBHSDTied<bit U, bits<5> opc, string asm,
+ Intrinsic OpNode> {
+ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ def v1i64 : BaseSIMDTwoScalarTied<U, 0b11, opc, FPR64, FPR64, asm,
+ [(set (i64 FPR64:$dst), (OpNode (i64 FPR64:$Rd), (i64 FPR64:$Rn)))]>;
+ def v1i32 : BaseSIMDTwoScalarTied<U, 0b10, opc, FPR32, FPR32, asm,
+ [(set (i32 FPR32:$dst), (OpNode (i32 FPR32:$Rd), (i32 FPR32:$Rn)))]>;
+ def v1i16 : BaseSIMDTwoScalarTied<U, 0b01, opc, FPR16, FPR16, asm, []>;
+ def v1i8 : BaseSIMDTwoScalarTied<U, 0b00, opc, FPR8 , FPR8 , asm, []>;
+ }
+
+ def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn))),
+ (!cast<Instruction>(NAME # v1i64) FPR64:$Rd, FPR64:$Rn)>;
+}
+
+
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+multiclass SIMDTwoScalarMixedBHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v1i32 : BaseSIMDTwoScalar<U, 0b10, 0b00, opc, FPR32, FPR64, asm,
+ [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn)))]>;
+ def v1i16 : BaseSIMDTwoScalar<U, 0b01, 0b00, opc, FPR16, FPR32, asm, []>;
+ def v1i8 : BaseSIMDTwoScalar<U, 0b00, 0b00, opc, FPR8 , FPR16, asm, []>;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD scalar pairwise instructions
+//----------------------------------------------------------------------------
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDPairwiseScalar<bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand regtype, RegisterOperand vectype,
+ string asm, string kind>
+ : I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
+ "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", []>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b11000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDPairwiseScalarD<bit U, bits<5> opc, string asm> {
+ def v2i64p : BaseSIMDPairwiseScalar<U, 0b11, opc, FPR64Op, V128,
+ asm, ".2d">;
+}
+
+multiclass SIMDFPPairwiseScalar<bit S, bits<5> opc, string asm> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v2i16p : BaseSIMDPairwiseScalar<0, {S,0}, opc, FPR16Op, V64,
+ asm, ".2h">;
+ }
+ def v2i32p : BaseSIMDPairwiseScalar<1, {S,0}, opc, FPR32Op, V64,
+ asm, ".2s">;
+ def v2i64p : BaseSIMDPairwiseScalar<1, {S,1}, opc, FPR64Op, V128,
+ asm, ".2d">;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD across lanes instructions
+//----------------------------------------------------------------------------
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDAcrossLanes<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterClass regtype, RegisterOperand vectype,
+ string asm, string kind, list<dag> pattern>
+ : I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
+ "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b11000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDAcrossLanesBHS<bit U, bits<5> opcode,
+ string asm> {
+ def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR8, V64,
+ asm, ".8b", []>;
+ def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR8, V128,
+ asm, ".16b", []>;
+ def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR16, V64,
+ asm, ".4h", []>;
+ def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR16, V128,
+ asm, ".8h", []>;
+ def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR32, V128,
+ asm, ".4s", []>;
+}
+
+multiclass SIMDAcrossLanesHSD<bit U, bits<5> opcode, string asm> {
+ def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR16, V64,
+ asm, ".8b", []>;
+ def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR16, V128,
+ asm, ".16b", []>;
+ def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR32, V64,
+ asm, ".4h", []>;
+ def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR32, V128,
+ asm, ".8h", []>;
+ def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR64, V128,
+ asm, ".4s", []>;
+}
+
+multiclass SIMDFPAcrossLanes<bits<5> opcode, bit sz1, string asm,
+ Intrinsic intOp> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16v : BaseSIMDAcrossLanes<0, 0, {sz1, 0}, opcode, FPR16, V64,
+ asm, ".4h",
+ [(set FPR16:$Rd, (intOp (v4f16 V64:$Rn)))]>;
+ def v8i16v : BaseSIMDAcrossLanes<1, 0, {sz1, 0}, opcode, FPR16, V128,
+ asm, ".8h",
+ [(set FPR16:$Rd, (intOp (v8f16 V128:$Rn)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128,
+ asm, ".4s",
+ [(set FPR32:$Rd, (intOp (v4f32 V128:$Rn)))]>;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD INS/DUP instructions
+//----------------------------------------------------------------------------
+
+// FIXME: There has got to be a better way to factor these. ugh.
+
+class BaseSIMDInsDup<bit Q, bit op, dag outs, dag ins, string asm,
+ string operands, string constraints, list<dag> pattern>
+ : I<outs, ins, asm, operands, constraints, pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = op;
+ let Inst{28-21} = 0b01110000;
+ let Inst{15} = 0;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class SIMDDupFromMain<bit Q, bits<5> imm5, string size, ValueType vectype,
+ RegisterOperand vecreg, RegisterClass regtype>
+ : BaseSIMDInsDup<Q, 0, (outs vecreg:$Rd), (ins regtype:$Rn), "dup",
+ "{\t$Rd" # size # ", $Rn" #
+ "|" # size # "\t$Rd, $Rn}", "",
+ [(set (vectype vecreg:$Rd), (AArch64dup regtype:$Rn))]> {
+ let Inst{20-16} = imm5;
+ let Inst{14-11} = 0b0001;
+}
+
+class SIMDDupFromElement<bit Q, string dstkind, string srckind,
+ ValueType vectype, ValueType insreg,
+ RegisterOperand vecreg, Operand idxtype,
+ ValueType elttype, SDNode OpNode>
+ : BaseSIMDInsDup<Q, 0, (outs vecreg:$Rd), (ins V128:$Rn, idxtype:$idx), "dup",
+ "{\t$Rd" # dstkind # ", $Rn" # srckind # "$idx" #
+ "|" # dstkind # "\t$Rd, $Rn$idx}", "",
+ [(set (vectype vecreg:$Rd),
+ (OpNode (insreg V128:$Rn), idxtype:$idx))]> {
+ let Inst{14-11} = 0b0000;
+}
+
+class SIMDDup64FromElement
+ : SIMDDupFromElement<1, ".2d", ".d", v2i64, v2i64, V128,
+ VectorIndexD, i64, AArch64duplane64> {
+ bits<1> idx;
+ let Inst{20} = idx;
+ let Inst{19-16} = 0b1000;
+}
+
+class SIMDDup32FromElement<bit Q, string size, ValueType vectype,
+ RegisterOperand vecreg>
+ : SIMDDupFromElement<Q, size, ".s", vectype, v4i32, vecreg,
+ VectorIndexS, i64, AArch64duplane32> {
+ bits<2> idx;
+ let Inst{20-19} = idx;
+ let Inst{18-16} = 0b100;
+}
+
+class SIMDDup16FromElement<bit Q, string size, ValueType vectype,
+ RegisterOperand vecreg>
+ : SIMDDupFromElement<Q, size, ".h", vectype, v8i16, vecreg,
+ VectorIndexH, i64, AArch64duplane16> {
+ bits<3> idx;
+ let Inst{20-18} = idx;
+ let Inst{17-16} = 0b10;
+}
+
+class SIMDDup8FromElement<bit Q, string size, ValueType vectype,
+ RegisterOperand vecreg>
+ : SIMDDupFromElement<Q, size, ".b", vectype, v16i8, vecreg,
+ VectorIndexB, i64, AArch64duplane8> {
+ bits<4> idx;
+ let Inst{20-17} = idx;
+ let Inst{16} = 1;
+}
+
+class BaseSIMDMov<bit Q, string size, bits<4> imm4, RegisterClass regtype,
+ Operand idxtype, string asm, list<dag> pattern>
+ : BaseSIMDInsDup<Q, 0, (outs regtype:$Rd), (ins V128:$Rn, idxtype:$idx), asm,
+ "{\t$Rd, $Rn" # size # "$idx" #
+ "|" # size # "\t$Rd, $Rn$idx}", "", pattern> {
+ let Inst{14-11} = imm4;
+}
+
+class SIMDSMov<bit Q, string size, RegisterClass regtype,
+ Operand idxtype>
+ : BaseSIMDMov<Q, size, 0b0101, regtype, idxtype, "smov", []>;
+class SIMDUMov<bit Q, string size, ValueType vectype, RegisterClass regtype,
+ Operand idxtype>
+ : BaseSIMDMov<Q, size, 0b0111, regtype, idxtype, "umov",
+ [(set regtype:$Rd, (vector_extract (vectype V128:$Rn), idxtype:$idx))]>;
+
+class SIMDMovAlias<string asm, string size, Instruction inst,
+ RegisterClass regtype, Operand idxtype>
+ : InstAlias<asm#"{\t$dst, $src"#size#"$idx" #
+ "|" # size # "\t$dst, $src$idx}",
+ (inst regtype:$dst, V128:$src, idxtype:$idx)>;
+
+multiclass SMov {
+ def vi8to32 : SIMDSMov<0, ".b", GPR32, VectorIndexB> {
+ bits<4> idx;
+ let Inst{20-17} = idx;
+ let Inst{16} = 1;
+ }
+ def vi8to64 : SIMDSMov<1, ".b", GPR64, VectorIndexB> {
+ bits<4> idx;
+ let Inst{20-17} = idx;
+ let Inst{16} = 1;
+ }
+ def vi16to32 : SIMDSMov<0, ".h", GPR32, VectorIndexH> {
+ bits<3> idx;
+ let Inst{20-18} = idx;
+ let Inst{17-16} = 0b10;
+ }
+ def vi16to64 : SIMDSMov<1, ".h", GPR64, VectorIndexH> {
+ bits<3> idx;
+ let Inst{20-18} = idx;
+ let Inst{17-16} = 0b10;
+ }
+ def vi32to64 : SIMDSMov<1, ".s", GPR64, VectorIndexS> {
+ bits<2> idx;
+ let Inst{20-19} = idx;
+ let Inst{18-16} = 0b100;
+ }
+}
+
+multiclass UMov {
+ def vi8 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndexB> {
+ bits<4> idx;
+ let Inst{20-17} = idx;
+ let Inst{16} = 1;
+ }
+ def vi16 : SIMDUMov<0, ".h", v8i16, GPR32, VectorIndexH> {
+ bits<3> idx;
+ let Inst{20-18} = idx;
+ let Inst{17-16} = 0b10;
+ }
+ def vi32 : SIMDUMov<0, ".s", v4i32, GPR32, VectorIndexS> {
+ bits<2> idx;
+ let Inst{20-19} = idx;
+ let Inst{18-16} = 0b100;
+ }
+ def vi64 : SIMDUMov<1, ".d", v2i64, GPR64, VectorIndexD> {
+ bits<1> idx;
+ let Inst{20} = idx;
+ let Inst{19-16} = 0b1000;
+ }
+ def : SIMDMovAlias<"mov", ".s",
+ !cast<Instruction>(NAME#"vi32"),
+ GPR32, VectorIndexS>;
+ def : SIMDMovAlias<"mov", ".d",
+ !cast<Instruction>(NAME#"vi64"),
+ GPR64, VectorIndexD>;
+}
+
+class SIMDInsFromMain<string size, ValueType vectype,
+ RegisterClass regtype, Operand idxtype>
+ : BaseSIMDInsDup<1, 0, (outs V128:$dst),
+ (ins V128:$Rd, idxtype:$idx, regtype:$Rn), "ins",
+ "{\t$Rd" # size # "$idx, $Rn" #
+ "|" # size # "\t$Rd$idx, $Rn}",
+ "$Rd = $dst",
+ [(set V128:$dst,
+ (vector_insert (vectype V128:$Rd), regtype:$Rn, idxtype:$idx))]> {
+ let Inst{14-11} = 0b0011;
+}
+
+class SIMDInsFromElement<string size, ValueType vectype,
+ ValueType elttype, Operand idxtype>
+ : BaseSIMDInsDup<1, 1, (outs V128:$dst),
+ (ins V128:$Rd, idxtype:$idx, V128:$Rn, idxtype:$idx2), "ins",
+ "{\t$Rd" # size # "$idx, $Rn" # size # "$idx2" #
+ "|" # size # "\t$Rd$idx, $Rn$idx2}",
+ "$Rd = $dst",
+ [(set V128:$dst,
+ (vector_insert
+ (vectype V128:$Rd),
+ (elttype (vector_extract (vectype V128:$Rn), idxtype:$idx2)),
+ idxtype:$idx))]>;
+
+class SIMDInsMainMovAlias<string size, Instruction inst,
+ RegisterClass regtype, Operand idxtype>
+ : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" #
+ "|" # size #"\t$dst$idx, $src}",
+ (inst V128:$dst, idxtype:$idx, regtype:$src)>;
+class SIMDInsElementMovAlias<string size, Instruction inst,
+ Operand idxtype>
+ : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # size # "$idx2" #
+ # "|" # size #"\t$dst$idx, $src$idx2}",
+ (inst V128:$dst, idxtype:$idx, V128:$src, idxtype:$idx2)>;
+
+
+multiclass SIMDIns {
+ def vi8gpr : SIMDInsFromMain<".b", v16i8, GPR32, VectorIndexB> {
+ bits<4> idx;
+ let Inst{20-17} = idx;
+ let Inst{16} = 1;
+ }
+ def vi16gpr : SIMDInsFromMain<".h", v8i16, GPR32, VectorIndexH> {
+ bits<3> idx;
+ let Inst{20-18} = idx;
+ let Inst{17-16} = 0b10;
+ }
+ def vi32gpr : SIMDInsFromMain<".s", v4i32, GPR32, VectorIndexS> {
+ bits<2> idx;
+ let Inst{20-19} = idx;
+ let Inst{18-16} = 0b100;
+ }
+ def vi64gpr : SIMDInsFromMain<".d", v2i64, GPR64, VectorIndexD> {
+ bits<1> idx;
+ let Inst{20} = idx;
+ let Inst{19-16} = 0b1000;
+ }
+
+ def vi8lane : SIMDInsFromElement<".b", v16i8, i32, VectorIndexB> {
+ bits<4> idx;
+ bits<4> idx2;
+ let Inst{20-17} = idx;
+ let Inst{16} = 1;
+ let Inst{14-11} = idx2;
+ }
+ def vi16lane : SIMDInsFromElement<".h", v8i16, i32, VectorIndexH> {
+ bits<3> idx;
+ bits<3> idx2;
+ let Inst{20-18} = idx;
+ let Inst{17-16} = 0b10;
+ let Inst{14-12} = idx2;
+ let Inst{11} = {?};
+ }
+ def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> {
+ bits<2> idx;
+ bits<2> idx2;
+ let Inst{20-19} = idx;
+ let Inst{18-16} = 0b100;
+ let Inst{14-13} = idx2;
+ let Inst{12-11} = {?,?};
+ }
+ def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> {
+ bits<1> idx;
+ bits<1> idx2;
+ let Inst{20} = idx;
+ let Inst{19-16} = 0b1000;
+ let Inst{14} = idx2;
+ let Inst{13-11} = {?,?,?};
+ }
+
+ // For all forms of the INS instruction, the "mov" mnemonic is the
+ // preferred alias. Why they didn't just call the instruction "mov" in
+ // the first place is a very good question indeed...
+ def : SIMDInsMainMovAlias<".b", !cast<Instruction>(NAME#"vi8gpr"),
+ GPR32, VectorIndexB>;
+ def : SIMDInsMainMovAlias<".h", !cast<Instruction>(NAME#"vi16gpr"),
+ GPR32, VectorIndexH>;
+ def : SIMDInsMainMovAlias<".s", !cast<Instruction>(NAME#"vi32gpr"),
+ GPR32, VectorIndexS>;
+ def : SIMDInsMainMovAlias<".d", !cast<Instruction>(NAME#"vi64gpr"),
+ GPR64, VectorIndexD>;
+
+ def : SIMDInsElementMovAlias<".b", !cast<Instruction>(NAME#"vi8lane"),
+ VectorIndexB>;
+ def : SIMDInsElementMovAlias<".h", !cast<Instruction>(NAME#"vi16lane"),
+ VectorIndexH>;
+ def : SIMDInsElementMovAlias<".s", !cast<Instruction>(NAME#"vi32lane"),
+ VectorIndexS>;
+ def : SIMDInsElementMovAlias<".d", !cast<Instruction>(NAME#"vi64lane"),
+ VectorIndexD>;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD TBL/TBX
+//----------------------------------------------------------------------------
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDTableLookup<bit Q, bits<2> len, bit op, RegisterOperand vectype,
+ RegisterOperand listtype, string asm, string kind>
+ : I<(outs vectype:$Vd), (ins listtype:$Vn, vectype:$Vm), asm,
+ "\t$Vd" # kind # ", $Vn, $Vm" # kind, "", []>,
+ Sched<[WriteV]> {
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29-21} = 0b001110000;
+ let Inst{20-16} = Vm;
+ let Inst{15} = 0;
+ let Inst{14-13} = len;
+ let Inst{12} = op;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Vn;
+ let Inst{4-0} = Vd;
+}
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDTableLookupTied<bit Q, bits<2> len, bit op, RegisterOperand vectype,
+ RegisterOperand listtype, string asm, string kind>
+ : I<(outs vectype:$dst), (ins vectype:$Vd, listtype:$Vn, vectype:$Vm), asm,
+ "\t$Vd" # kind # ", $Vn, $Vm" # kind, "$Vd = $dst", []>,
+ Sched<[WriteV]> {
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29-21} = 0b001110000;
+ let Inst{20-16} = Vm;
+ let Inst{15} = 0;
+ let Inst{14-13} = len;
+ let Inst{12} = op;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Vn;
+ let Inst{4-0} = Vd;
+}
+
+class SIMDTableLookupAlias<string asm, Instruction inst,
+ RegisterOperand vectype, RegisterOperand listtype>
+ : InstAlias<!strconcat(asm, "\t$dst, $lst, $index"),
+ (inst vectype:$dst, listtype:$lst, vectype:$index), 0>;
+
+multiclass SIMDTableLookup<bit op, string asm> {
+ def v8i8One : BaseSIMDTableLookup<0, 0b00, op, V64, VecListOne16b,
+ asm, ".8b">;
+ def v8i8Two : BaseSIMDTableLookup<0, 0b01, op, V64, VecListTwo16b,
+ asm, ".8b">;
+ def v8i8Three : BaseSIMDTableLookup<0, 0b10, op, V64, VecListThree16b,
+ asm, ".8b">;
+ def v8i8Four : BaseSIMDTableLookup<0, 0b11, op, V64, VecListFour16b,
+ asm, ".8b">;
+ def v16i8One : BaseSIMDTableLookup<1, 0b00, op, V128, VecListOne16b,
+ asm, ".16b">;
+ def v16i8Two : BaseSIMDTableLookup<1, 0b01, op, V128, VecListTwo16b,
+ asm, ".16b">;
+ def v16i8Three: BaseSIMDTableLookup<1, 0b10, op, V128, VecListThree16b,
+ asm, ".16b">;
+ def v16i8Four : BaseSIMDTableLookup<1, 0b11, op, V128, VecListFour16b,
+ asm, ".16b">;
+
+ def : SIMDTableLookupAlias<asm # ".8b",
+ !cast<Instruction>(NAME#"v8i8One"),
+ V64, VecListOne128>;
+ def : SIMDTableLookupAlias<asm # ".8b",
+ !cast<Instruction>(NAME#"v8i8Two"),
+ V64, VecListTwo128>;
+ def : SIMDTableLookupAlias<asm # ".8b",
+ !cast<Instruction>(NAME#"v8i8Three"),
+ V64, VecListThree128>;
+ def : SIMDTableLookupAlias<asm # ".8b",
+ !cast<Instruction>(NAME#"v8i8Four"),
+ V64, VecListFour128>;
+ def : SIMDTableLookupAlias<asm # ".16b",
+ !cast<Instruction>(NAME#"v16i8One"),
+ V128, VecListOne128>;
+ def : SIMDTableLookupAlias<asm # ".16b",
+ !cast<Instruction>(NAME#"v16i8Two"),
+ V128, VecListTwo128>;
+ def : SIMDTableLookupAlias<asm # ".16b",
+ !cast<Instruction>(NAME#"v16i8Three"),
+ V128, VecListThree128>;
+ def : SIMDTableLookupAlias<asm # ".16b",
+ !cast<Instruction>(NAME#"v16i8Four"),
+ V128, VecListFour128>;
+}
+
+multiclass SIMDTableLookupTied<bit op, string asm> {
+ def v8i8One : BaseSIMDTableLookupTied<0, 0b00, op, V64, VecListOne16b,
+ asm, ".8b">;
+ def v8i8Two : BaseSIMDTableLookupTied<0, 0b01, op, V64, VecListTwo16b,
+ asm, ".8b">;
+ def v8i8Three : BaseSIMDTableLookupTied<0, 0b10, op, V64, VecListThree16b,
+ asm, ".8b">;
+ def v8i8Four : BaseSIMDTableLookupTied<0, 0b11, op, V64, VecListFour16b,
+ asm, ".8b">;
+ def v16i8One : BaseSIMDTableLookupTied<1, 0b00, op, V128, VecListOne16b,
+ asm, ".16b">;
+ def v16i8Two : BaseSIMDTableLookupTied<1, 0b01, op, V128, VecListTwo16b,
+ asm, ".16b">;
+ def v16i8Three: BaseSIMDTableLookupTied<1, 0b10, op, V128, VecListThree16b,
+ asm, ".16b">;
+ def v16i8Four : BaseSIMDTableLookupTied<1, 0b11, op, V128, VecListFour16b,
+ asm, ".16b">;
+
+ def : SIMDTableLookupAlias<asm # ".8b",
+ !cast<Instruction>(NAME#"v8i8One"),
+ V64, VecListOne128>;
+ def : SIMDTableLookupAlias<asm # ".8b",
+ !cast<Instruction>(NAME#"v8i8Two"),
+ V64, VecListTwo128>;
+ def : SIMDTableLookupAlias<asm # ".8b",
+ !cast<Instruction>(NAME#"v8i8Three"),
+ V64, VecListThree128>;
+ def : SIMDTableLookupAlias<asm # ".8b",
+ !cast<Instruction>(NAME#"v8i8Four"),
+ V64, VecListFour128>;
+ def : SIMDTableLookupAlias<asm # ".16b",
+ !cast<Instruction>(NAME#"v16i8One"),
+ V128, VecListOne128>;
+ def : SIMDTableLookupAlias<asm # ".16b",
+ !cast<Instruction>(NAME#"v16i8Two"),
+ V128, VecListTwo128>;
+ def : SIMDTableLookupAlias<asm # ".16b",
+ !cast<Instruction>(NAME#"v16i8Three"),
+ V128, VecListThree128>;
+ def : SIMDTableLookupAlias<asm # ".16b",
+ !cast<Instruction>(NAME#"v16i8Four"),
+ V128, VecListFour128>;
+}
+
+
+//----------------------------------------------------------------------------
+// AdvSIMD scalar CPY
+//----------------------------------------------------------------------------
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDScalarCPY<RegisterClass regtype, RegisterOperand vectype,
+ string kind, Operand idxtype>
+ : I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), "mov",
+ "{\t$dst, $src" # kind # "$idx" #
+ "|\t$dst, $src$idx}", "", []>,
+ Sched<[WriteV]> {
+ bits<5> dst;
+ bits<5> src;
+ let Inst{31-21} = 0b01011110000;
+ let Inst{15-10} = 0b000001;
+ let Inst{9-5} = src;
+ let Inst{4-0} = dst;
+}
+
+class SIMDScalarCPYAlias<string asm, string size, Instruction inst,
+ RegisterClass regtype, RegisterOperand vectype, Operand idxtype>
+ : InstAlias<asm # "{\t$dst, $src" # size # "$index" #
+ # "|\t$dst, $src$index}",
+ (inst regtype:$dst, vectype:$src, idxtype:$index), 0>;
+
+
+multiclass SIMDScalarCPY<string asm> {
+ def i8 : BaseSIMDScalarCPY<FPR8, V128, ".b", VectorIndexB> {
+ bits<4> idx;
+ let Inst{20-17} = idx;
+ let Inst{16} = 1;
+ }
+ def i16 : BaseSIMDScalarCPY<FPR16, V128, ".h", VectorIndexH> {
+ bits<3> idx;
+ let Inst{20-18} = idx;
+ let Inst{17-16} = 0b10;
+ }
+ def i32 : BaseSIMDScalarCPY<FPR32, V128, ".s", VectorIndexS> {
+ bits<2> idx;
+ let Inst{20-19} = idx;
+ let Inst{18-16} = 0b100;
+ }
+ def i64 : BaseSIMDScalarCPY<FPR64, V128, ".d", VectorIndexD> {
+ bits<1> idx;
+ let Inst{20} = idx;
+ let Inst{19-16} = 0b1000;
+ }
+
+ def : Pat<(v1i64 (scalar_to_vector (i64 (vector_extract (v2i64 V128:$src),
+ VectorIndexD:$idx)))),
+ (!cast<Instruction>(NAME # i64) V128:$src, VectorIndexD:$idx)>;
+
+ // 'DUP' mnemonic aliases.
+ def : SIMDScalarCPYAlias<"dup", ".b",
+ !cast<Instruction>(NAME#"i8"),
+ FPR8, V128, VectorIndexB>;
+ def : SIMDScalarCPYAlias<"dup", ".h",
+ !cast<Instruction>(NAME#"i16"),
+ FPR16, V128, VectorIndexH>;
+ def : SIMDScalarCPYAlias<"dup", ".s",
+ !cast<Instruction>(NAME#"i32"),
+ FPR32, V128, VectorIndexS>;
+ def : SIMDScalarCPYAlias<"dup", ".d",
+ !cast<Instruction>(NAME#"i64"),
+ FPR64, V128, VectorIndexD>;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD modified immediate instructions
+//----------------------------------------------------------------------------
+
+class BaseSIMDModifiedImm<bit Q, bit op, bit op2, dag oops, dag iops,
+ string asm, string op_string,
+ string cstr, list<dag> pattern>
+ : I<oops, iops, asm, op_string, cstr, pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<8> imm8;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = op;
+ let Inst{28-19} = 0b0111100000;
+ let Inst{18-16} = imm8{7-5};
+ let Inst{11} = op2;
+ let Inst{10} = 1;
+ let Inst{9-5} = imm8{4-0};
+ let Inst{4-0} = Rd;
+}
+
+class BaseSIMDModifiedImmVector<bit Q, bit op, bit op2, RegisterOperand vectype,
+ Operand immtype, dag opt_shift_iop,
+ string opt_shift, string asm, string kind,
+ list<dag> pattern>
+ : BaseSIMDModifiedImm<Q, op, op2, (outs vectype:$Rd),
+ !con((ins immtype:$imm8), opt_shift_iop), asm,
+ "{\t$Rd" # kind # ", $imm8" # opt_shift #
+ "|" # kind # "\t$Rd, $imm8" # opt_shift # "}",
+ "", pattern> {
+ let DecoderMethod = "DecodeModImmInstruction";
+}
+
+class BaseSIMDModifiedImmVectorTied<bit Q, bit op, RegisterOperand vectype,
+ Operand immtype, dag opt_shift_iop,
+ string opt_shift, string asm, string kind,
+ list<dag> pattern>
+ : BaseSIMDModifiedImm<Q, op, 0, (outs vectype:$dst),
+ !con((ins vectype:$Rd, immtype:$imm8), opt_shift_iop),
+ asm, "{\t$Rd" # kind # ", $imm8" # opt_shift #
+ "|" # kind # "\t$Rd, $imm8" # opt_shift # "}",
+ "$Rd = $dst", pattern> {
+ let DecoderMethod = "DecodeModImmTiedInstruction";
+}
+
+class BaseSIMDModifiedImmVectorShift<bit Q, bit op, bits<2> b15_b12,
+ RegisterOperand vectype, string asm,
+ string kind, list<dag> pattern>
+ : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255,
+ (ins logical_vec_shift:$shift),
+ "$shift", asm, kind, pattern> {
+ bits<2> shift;
+ let Inst{15} = b15_b12{1};
+ let Inst{14-13} = shift;
+ let Inst{12} = b15_b12{0};
+}
+
+class BaseSIMDModifiedImmVectorShiftTied<bit Q, bit op, bits<2> b15_b12,
+ RegisterOperand vectype, string asm,
+ string kind, list<dag> pattern>
+ : BaseSIMDModifiedImmVectorTied<Q, op, vectype, imm0_255,
+ (ins logical_vec_shift:$shift),
+ "$shift", asm, kind, pattern> {
+ bits<2> shift;
+ let Inst{15} = b15_b12{1};
+ let Inst{14-13} = shift;
+ let Inst{12} = b15_b12{0};
+}
+
+
+class BaseSIMDModifiedImmVectorShiftHalf<bit Q, bit op, bits<2> b15_b12,
+ RegisterOperand vectype, string asm,
+ string kind, list<dag> pattern>
+ : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255,
+ (ins logical_vec_hw_shift:$shift),
+ "$shift", asm, kind, pattern> {
+ bits<2> shift;
+ let Inst{15} = b15_b12{1};
+ let Inst{14} = 0;
+ let Inst{13} = shift{0};
+ let Inst{12} = b15_b12{0};
+}
+
+class BaseSIMDModifiedImmVectorShiftHalfTied<bit Q, bit op, bits<2> b15_b12,
+ RegisterOperand vectype, string asm,
+ string kind, list<dag> pattern>
+ : BaseSIMDModifiedImmVectorTied<Q, op, vectype, imm0_255,
+ (ins logical_vec_hw_shift:$shift),
+ "$shift", asm, kind, pattern> {
+ bits<2> shift;
+ let Inst{15} = b15_b12{1};
+ let Inst{14} = 0;
+ let Inst{13} = shift{0};
+ let Inst{12} = b15_b12{0};
+}
+
+multiclass SIMDModifiedImmVectorShift<bit op, bits<2> hw_cmode, bits<2> w_cmode,
+ string asm> {
+ def v4i16 : BaseSIMDModifiedImmVectorShiftHalf<0, op, hw_cmode, V64,
+ asm, ".4h", []>;
+ def v8i16 : BaseSIMDModifiedImmVectorShiftHalf<1, op, hw_cmode, V128,
+ asm, ".8h", []>;
+
+ def v2i32 : BaseSIMDModifiedImmVectorShift<0, op, w_cmode, V64,
+ asm, ".2s", []>;
+ def v4i32 : BaseSIMDModifiedImmVectorShift<1, op, w_cmode, V128,
+ asm, ".4s", []>;
+}
+
+multiclass SIMDModifiedImmVectorShiftTied<bit op, bits<2> hw_cmode,
+ bits<2> w_cmode, string asm,
+ SDNode OpNode> {
+ def v4i16 : BaseSIMDModifiedImmVectorShiftHalfTied<0, op, hw_cmode, V64,
+ asm, ".4h",
+ [(set (v4i16 V64:$dst), (OpNode V64:$Rd,
+ imm0_255:$imm8,
+ (i32 imm:$shift)))]>;
+ def v8i16 : BaseSIMDModifiedImmVectorShiftHalfTied<1, op, hw_cmode, V128,
+ asm, ".8h",
+ [(set (v8i16 V128:$dst), (OpNode V128:$Rd,
+ imm0_255:$imm8,
+ (i32 imm:$shift)))]>;
+
+ def v2i32 : BaseSIMDModifiedImmVectorShiftTied<0, op, w_cmode, V64,
+ asm, ".2s",
+ [(set (v2i32 V64:$dst), (OpNode V64:$Rd,
+ imm0_255:$imm8,
+ (i32 imm:$shift)))]>;
+ def v4i32 : BaseSIMDModifiedImmVectorShiftTied<1, op, w_cmode, V128,
+ asm, ".4s",
+ [(set (v4i32 V128:$dst), (OpNode V128:$Rd,
+ imm0_255:$imm8,
+ (i32 imm:$shift)))]>;
+}
+
+class SIMDModifiedImmMoveMSL<bit Q, bit op, bits<4> cmode,
+ RegisterOperand vectype, string asm,
+ string kind, list<dag> pattern>
+ : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255,
+ (ins move_vec_shift:$shift),
+ "$shift", asm, kind, pattern> {
+ bits<1> shift;
+ let Inst{15-13} = cmode{3-1};
+ let Inst{12} = shift;
+}
+
+class SIMDModifiedImmVectorNoShift<bit Q, bit op, bit op2, bits<4> cmode,
+ RegisterOperand vectype,
+ Operand imm_type, string asm,
+ string kind, list<dag> pattern>
+ : BaseSIMDModifiedImmVector<Q, op, op2, vectype, imm_type, (ins), "",
+ asm, kind, pattern> {
+ let Inst{15-12} = cmode;
+}
+
+class SIMDModifiedImmScalarNoShift<bit Q, bit op, bits<4> cmode, string asm,
+ list<dag> pattern>
+ : BaseSIMDModifiedImm<Q, op, 0, (outs FPR64:$Rd), (ins simdimmtype10:$imm8), asm,
+ "\t$Rd, $imm8", "", pattern> {
+ let Inst{15-12} = cmode;
+ let DecoderMethod = "DecodeModImmInstruction";
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD indexed element
+//----------------------------------------------------------------------------
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDIndexed<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
+ RegisterOperand dst_reg, RegisterOperand lhs_reg,
+ RegisterOperand rhs_reg, Operand vec_idx, string asm,
+ string apple_kind, string dst_kind, string lhs_kind,
+ string rhs_kind, list<dag> pattern>
+ : I<(outs dst_reg:$Rd), (ins lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx),
+ asm,
+ "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
+ "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28} = Scalar;
+ let Inst{27-24} = 0b1111;
+ let Inst{23-22} = size;
+ // Bit 21 must be set by the derived class.
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = opc;
+ // Bit 11 must be set by the derived class.
+ let Inst{10} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
+ RegisterOperand dst_reg, RegisterOperand lhs_reg,
+ RegisterOperand rhs_reg, Operand vec_idx, string asm,
+ string apple_kind, string dst_kind, string lhs_kind,
+ string rhs_kind, list<dag> pattern>
+ : I<(outs dst_reg:$dst),
+ (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), asm,
+ "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
+ "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28} = Scalar;
+ let Inst{27-24} = 0b1111;
+ let Inst{23-22} = size;
+ // Bit 21 must be set by the derived class.
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = opc;
+ // Bit 11 must be set by the derived class.
+ let Inst{10} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+// ARMv8.2 Index Dot product instructions
+class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind,
+ string lhs_kind, string rhs_kind,
+ RegisterOperand RegType,
+ ValueType AccumType, ValueType InputType,
+ SDPatternOperator OpNode> :
+ BaseSIMDIndexedTied<Q, U, 0b0, 0b10, 0b1110, RegType, RegType, V128,
+ VectorIndexS, asm, "", dst_kind, lhs_kind, rhs_kind,
+ [(set (AccumType RegType:$dst),
+ (AccumType (OpNode (AccumType RegType:$Rd),
+ (InputType RegType:$Rn),
+ (InputType (bitconvert (AccumType
+ (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx)))))))]> {
+ bits<2> idx;
+ let Inst{21} = idx{0}; // L
+ let Inst{11} = idx{1}; // H
+}
+
+multiclass SIMDThreeSameVectorDotIndex<bit U, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDThreeSameVectorDotIndex<0, U, asm, ".2s", ".8b", ".4b", V64,
+ v2i32, v8i8, OpNode>;
+ def v16i8 : BaseSIMDThreeSameVectorDotIndex<1, U, asm, ".4s", ".16b", ".4b", V128,
+ v4i32, v16i8, OpNode>;
+}
+
+multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b00, opc,
+ V64, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4h", ".4h", ".4h", ".h",
+ [(set (v4f16 V64:$Rd),
+ (OpNode (v4f16 V64:$Rn),
+ (v4f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b00, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm, ".8h", ".8h", ".8h", ".h",
+ [(set (v8f16 V128:$Rd),
+ (OpNode (v8f16 V128:$Rn),
+ (v8f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
+
+ def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
+ V64, V64,
+ V128, VectorIndexS,
+ asm, ".2s", ".2s", ".2s", ".s",
+ [(set (v2f32 V64:$Rd),
+ (OpNode (v2f32 V64:$Rn),
+ (v2f32 (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm, ".4s", ".4s", ".4s", ".s",
+ [(set (v4f32 V128:$Rd),
+ (OpNode (v4f32 V128:$Rn),
+ (v4f32 (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v2i64_indexed : BaseSIMDIndexed<1, U, 0, 0b11, opc,
+ V128, V128,
+ V128, VectorIndexD,
+ asm, ".2d", ".2d", ".2d", ".d",
+ [(set (v2f64 V128:$Rd),
+ (OpNode (v2f64 V128:$Rn),
+ (v2f64 (AArch64duplane64 (v2f64 V128:$Rm), VectorIndexD:$idx))))]> {
+ bits<1> idx;
+ let Inst{11} = idx{0};
+ let Inst{21} = 0;
+ }
+
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b00, opc,
+ FPR16Op, FPR16Op, V128_lo, VectorIndexH,
+ asm, ".h", "", "", ".h",
+ [(set (f16 FPR16Op:$Rd),
+ (OpNode (f16 FPR16Op:$Rn),
+ (f16 (vector_extract (v8f16 V128_lo:$Rm),
+ VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
+
+ def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc,
+ FPR32Op, FPR32Op, V128, VectorIndexS,
+ asm, ".s", "", "", ".s",
+ [(set (f32 FPR32Op:$Rd),
+ (OpNode (f32 FPR32Op:$Rn),
+ (f32 (vector_extract (v4f32 V128:$Rm),
+ VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b11, opc,
+ FPR64Op, FPR64Op, V128, VectorIndexD,
+ asm, ".d", "", "", ".d",
+ [(set (f64 FPR64Op:$Rd),
+ (OpNode (f64 FPR64Op:$Rn),
+ (f64 (vector_extract (v2f64 V128:$Rm),
+ VectorIndexD:$idx))))]> {
+ bits<1> idx;
+ let Inst{11} = idx{0};
+ let Inst{21} = 0;
+ }
+}
+
+multiclass SIMDFPIndexedTiedPatterns<string INST, SDPatternOperator OpNode> {
+ // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar.
+ def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
+ (AArch64duplane32 (v4f32 V128:$Rm),
+ VectorIndexS:$idx))),
+ (!cast<Instruction>(INST # v2i32_indexed)
+ V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
+ def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
+ (AArch64dup (f32 FPR32Op:$Rm)))),
+ (!cast<Instruction>(INST # "v2i32_indexed") V64:$Rd, V64:$Rn,
+ (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
+
+
+ // 2 variants for the .4s version: DUPLANE from 128-bit and DUP scalar.
+ def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
+ (AArch64duplane32 (v4f32 V128:$Rm),
+ VectorIndexS:$idx))),
+ (!cast<Instruction>(INST # "v4i32_indexed")
+ V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
+ def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
+ (AArch64dup (f32 FPR32Op:$Rm)))),
+ (!cast<Instruction>(INST # "v4i32_indexed") V128:$Rd, V128:$Rn,
+ (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
+
+ // 2 variants for the .2d version: DUPLANE from 128-bit and DUP scalar.
+ def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
+ (AArch64duplane64 (v2f64 V128:$Rm),
+ VectorIndexD:$idx))),
+ (!cast<Instruction>(INST # "v2i64_indexed")
+ V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
+ def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
+ (AArch64dup (f64 FPR64Op:$Rm)))),
+ (!cast<Instruction>(INST # "v2i64_indexed") V128:$Rd, V128:$Rn,
+ (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
+
+ // 2 variants for 32-bit scalar version: extract from .2s or from .4s
+ def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
+ (vector_extract (v4f32 V128:$Rm), VectorIndexS:$idx))),
+ (!cast<Instruction>(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn,
+ V128:$Rm, VectorIndexS:$idx)>;
+ def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
+ (vector_extract (v2f32 V64:$Rm), VectorIndexS:$idx))),
+ (!cast<Instruction>(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn,
+ (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
+
+ // 1 variant for 64-bit scalar version: extract from .1d or from .2d
+ def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
+ (vector_extract (v2f64 V128:$Rm), VectorIndexD:$idx))),
+ (!cast<Instruction>(INST # "v1i64_indexed") FPR64:$Rd, FPR64:$Rn,
+ V128:$Rm, VectorIndexD:$idx)>;
+}
+
+multiclass SIMDFPIndexedTied<bit U, bits<4> opc, string asm> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b00, opc, V64, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4h", ".4h", ".4h", ".h", []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b00, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm, ".8h", ".8h", ".8h", ".h", []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
+
+ def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64,
+ V128, VectorIndexS,
+ asm, ".2s", ".2s", ".2s", ".s", []> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm, ".4s", ".4s", ".4s", ".s", []> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v2i64_indexed : BaseSIMDIndexedTied<1, U, 0, 0b11, opc,
+ V128, V128,
+ V128, VectorIndexD,
+ asm, ".2d", ".2d", ".2d", ".d", []> {
+ bits<1> idx;
+ let Inst{11} = idx{0};
+ let Inst{21} = 0;
+ }
+
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v1i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b00, opc,
+ FPR16Op, FPR16Op, V128_lo, VectorIndexH,
+ asm, ".h", "", "", ".h", []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
+
+ def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
+ FPR32Op, FPR32Op, V128, VectorIndexS,
+ asm, ".s", "", "", ".s", []> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b11, opc,
+ FPR64Op, FPR64Op, V128, VectorIndexD,
+ asm, ".d", "", "", ".d", []> {
+ bits<1> idx;
+ let Inst{11} = idx{0};
+ let Inst{21} = 0;
+ }
+}
+
+multiclass SIMDIndexedHS<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, V64, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4h", ".4h", ".4h", ".h",
+ [(set (v4i16 V64:$Rd),
+ (OpNode (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm, ".8h", ".8h", ".8h", ".h",
+ [(set (v8i16 V128:$Rd),
+ (OpNode (v8i16 V128:$Rn),
+ (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
+ V64, V64,
+ V128, VectorIndexS,
+ asm, ".2s", ".2s", ".2s", ".s",
+ [(set (v2i32 V64:$Rd),
+ (OpNode (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm, ".4s", ".4s", ".4s", ".s",
+ [(set (v4i32 V128:$Rd),
+ (OpNode (v4i32 V128:$Rn),
+ (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc,
+ FPR16Op, FPR16Op, V128_lo, VectorIndexH,
+ asm, ".h", "", "", ".h", []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc,
+ FPR32Op, FPR32Op, V128, VectorIndexS,
+ asm, ".s", "", "", ".s",
+ [(set (i32 FPR32Op:$Rd),
+ (OpNode FPR32Op:$Rn,
+ (i32 (vector_extract (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+}
+
+multiclass SIMDVectorIndexedHS<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc,
+ V64, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4h", ".4h", ".4h", ".h",
+ [(set (v4i16 V64:$Rd),
+ (OpNode (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm, ".8h", ".8h", ".8h", ".h",
+ [(set (v8i16 V128:$Rd),
+ (OpNode (v8i16 V128:$Rn),
+ (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
+ V64, V64,
+ V128, VectorIndexS,
+ asm, ".2s", ".2s", ".2s", ".s",
+ [(set (v2i32 V64:$Rd),
+ (OpNode (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm, ".4s", ".4s", ".4s", ".s",
+ [(set (v4i32 V128:$Rd),
+ (OpNode (v4i32 V128:$Rn),
+ (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+}
+
+multiclass SIMDVectorIndexedHSTied<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, V64, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4h", ".4h", ".4h", ".h",
+ [(set (v4i16 V64:$dst),
+ (OpNode (v4i16 V64:$Rd),(v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm, ".8h", ".8h", ".8h", ".h",
+ [(set (v8i16 V128:$dst),
+ (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn),
+ (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
+ V64, V64,
+ V128, VectorIndexS,
+ asm, ".2s", ".2s", ".2s", ".s",
+ [(set (v2i32 V64:$dst),
+ (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm, ".4s", ".4s", ".4s", ".s",
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn),
+ (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+}
+
+multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc,
+ V128, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4s", ".4s", ".4h", ".h",
+ [(set (v4i32 V128:$Rd),
+ (OpNode (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm#"2", ".4s", ".4s", ".8h", ".h",
+ [(set (v4i32 V128:$Rd),
+ (OpNode (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))))]> {
+
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
+ V128, V64,
+ V128, VectorIndexS,
+ asm, ".2d", ".2d", ".2s", ".s",
+ [(set (v2i64 V128:$Rd),
+ (OpNode (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm#"2", ".2d", ".2d", ".4s", ".s",
+ [(set (v2i64 V128:$Rd),
+ (OpNode (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc,
+ FPR32Op, FPR16Op, V128_lo, VectorIndexH,
+ asm, ".h", "", "", ".h", []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc,
+ FPR64Op, FPR32Op, V128, VectorIndexS,
+ asm, ".s", "", "", ".s", []> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+}
+
+multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
+ SDPatternOperator Accum> {
+ def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
+ V128, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4s", ".4s", ".4h", ".h",
+ [(set (v4i32 V128:$dst),
+ (Accum (v4i32 V128:$Rd),
+ (v4i32 (int_aarch64_neon_sqdmull
+ (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ // FIXME: it would be nice to use the scalar (v1i32) instruction here, but an
+ // intermediate EXTRACT_SUBREG would be untyped.
+ def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
+ (i32 (vector_extract (v4i32
+ (int_aarch64_neon_sqdmull (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx)))),
+ (i64 0))))),
+ (EXTRACT_SUBREG
+ (!cast<Instruction>(NAME # v4i16_indexed)
+ (SUBREG_TO_REG (i32 0), FPR32Op:$Rd, ssub), V64:$Rn,
+ V128_lo:$Rm, VectorIndexH:$idx),
+ ssub)>;
+
+ def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm#"2", ".4s", ".4s", ".8h", ".h",
+ [(set (v4i32 V128:$dst),
+ (Accum (v4i32 V128:$Rd),
+ (v4i32 (int_aarch64_neon_sqdmull
+ (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16
+ (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
+ V128, V64,
+ V128, VectorIndexS,
+ asm, ".2d", ".2d", ".2s", ".s",
+ [(set (v2i64 V128:$dst),
+ (Accum (v2i64 V128:$Rd),
+ (v2i64 (int_aarch64_neon_sqdmull
+ (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm#"2", ".2d", ".2d", ".4s", ".s",
+ [(set (v2i64 V128:$dst),
+ (Accum (v2i64 V128:$Rd),
+ (v2i64 (int_aarch64_neon_sqdmull
+ (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32
+ (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc,
+ FPR32Op, FPR16Op, V128_lo, VectorIndexH,
+ asm, ".h", "", "", ".h", []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+
+ def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
+ FPR64Op, FPR32Op, V128, VectorIndexS,
+ asm, ".s", "", "", ".s",
+ [(set (i64 FPR64Op:$dst),
+ (Accum (i64 FPR64Op:$Rd),
+ (i64 (int_aarch64_neon_sqdmulls_scalar
+ (i32 FPR32Op:$Rn),
+ (i32 (vector_extract (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))))]> {
+
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+}
+
+multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc,
+ V128, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4s", ".4s", ".4h", ".h",
+ [(set (v4i32 V128:$Rd),
+ (OpNode (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm#"2", ".4s", ".4s", ".8h", ".h",
+ [(set (v4i32 V128:$Rd),
+ (OpNode (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))))]> {
+
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
+ V128, V64,
+ V128, VectorIndexS,
+ asm, ".2d", ".2d", ".2s", ".s",
+ [(set (v2i64 V128:$Rd),
+ (OpNode (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm#"2", ".2d", ".2d", ".4s", ".s",
+ [(set (v2i64 V128:$Rd),
+ (OpNode (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+ }
+}
+
+multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
+ V128, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4s", ".4s", ".4h", ".h",
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm#"2", ".4s", ".4s", ".8h", ".h",
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd),
+ (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
+ V128, V64,
+ V128, VectorIndexS,
+ asm, ".2d", ".2d", ".2s", ".s",
+ [(set (v2i64 V128:$dst),
+ (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm#"2", ".2d", ".2d", ".4s", ".s",
+ [(set (v2i64 V128:$dst),
+ (OpNode (v2i64 V128:$Rd),
+ (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+ }
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD scalar shift by immediate
+//----------------------------------------------------------------------------
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDScalarShift<bit U, bits<5> opc, bits<7> fixed_imm,
+ RegisterClass regtype1, RegisterClass regtype2,
+ Operand immtype, string asm, list<dag> pattern>
+ : I<(outs regtype1:$Rd), (ins regtype2:$Rn, immtype:$imm),
+ asm, "\t$Rd, $Rn, $imm", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<7> imm;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
+ let Inst{28-23} = 0b111110;
+ let Inst{22-16} = fixed_imm;
+ let Inst{15-11} = opc;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm,
+ RegisterClass regtype1, RegisterClass regtype2,
+ Operand immtype, string asm, list<dag> pattern>
+ : I<(outs regtype1:$dst), (ins regtype1:$Rd, regtype2:$Rn, immtype:$imm),
+ asm, "\t$Rd, $Rn, $imm", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<7> imm;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
+ let Inst{28-23} = 0b111110;
+ let Inst{22-16} = fixed_imm;
+ let Inst{15-11} = opc;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+
+multiclass SIMDFPScalarRShift<bit U, bits<5> opc, string asm> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
+ FPR16, FPR16, vecshiftR16, asm, []> {
+ let Inst{19-16} = imm{3-0};
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
+ def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
+ FPR32, FPR32, vecshiftR32, asm, []> {
+ let Inst{20-16} = imm{4-0};
+ }
+ def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
+ FPR64, FPR64, vecshiftR64, asm, []> {
+ let Inst{21-16} = imm{5-0};
+ }
+}
+
+multiclass SIMDScalarRShiftD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
+ FPR64, FPR64, vecshiftR64, asm,
+ [(set (i64 FPR64:$Rd),
+ (OpNode (i64 FPR64:$Rn), (i32 vecshiftR64:$imm)))]> {
+ let Inst{21-16} = imm{5-0};
+ }
+
+ def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftR64:$imm))),
+ (!cast<Instruction>(NAME # "d") FPR64:$Rn, vecshiftR64:$imm)>;
+}
+
+multiclass SIMDScalarRShiftDTied<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def d : BaseSIMDScalarShiftTied<U, opc, {1,?,?,?,?,?,?},
+ FPR64, FPR64, vecshiftR64, asm,
+ [(set (i64 FPR64:$dst), (OpNode (i64 FPR64:$Rd), (i64 FPR64:$Rn),
+ (i32 vecshiftR64:$imm)))]> {
+ let Inst{21-16} = imm{5-0};
+ }
+
+ def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
+ (i32 vecshiftR64:$imm))),
+ (!cast<Instruction>(NAME # "d") FPR64:$Rd, FPR64:$Rn,
+ vecshiftR64:$imm)>;
+}
+
+multiclass SIMDScalarLShiftD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
+ FPR64, FPR64, vecshiftL64, asm,
+ [(set (v1i64 FPR64:$Rd),
+ (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm)))]> {
+ let Inst{21-16} = imm{5-0};
+ }
+}
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+multiclass SIMDScalarLShiftDTied<bit U, bits<5> opc, string asm> {
+ def d : BaseSIMDScalarShiftTied<U, opc, {1,?,?,?,?,?,?},
+ FPR64, FPR64, vecshiftL64, asm, []> {
+ let Inst{21-16} = imm{5-0};
+ }
+}
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+multiclass SIMDScalarRShiftBHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?},
+ FPR8, FPR16, vecshiftR8, asm, []> {
+ let Inst{18-16} = imm{2-0};
+ }
+
+ def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
+ FPR16, FPR32, vecshiftR16, asm, []> {
+ let Inst{19-16} = imm{3-0};
+ }
+
+ def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
+ FPR32, FPR64, vecshiftR32, asm,
+ [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn), vecshiftR32:$imm))]> {
+ let Inst{20-16} = imm{4-0};
+ }
+}
+
+multiclass SIMDScalarLShiftBHSD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?},
+ FPR8, FPR8, vecshiftL8, asm, []> {
+ let Inst{18-16} = imm{2-0};
+ }
+
+ def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
+ FPR16, FPR16, vecshiftL16, asm, []> {
+ let Inst{19-16} = imm{3-0};
+ }
+
+ def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
+ FPR32, FPR32, vecshiftL32, asm,
+ [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn), (i32 vecshiftL32:$imm)))]> {
+ let Inst{20-16} = imm{4-0};
+ }
+
+ def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
+ FPR64, FPR64, vecshiftL64, asm,
+ [(set (i64 FPR64:$Rd), (OpNode (i64 FPR64:$Rn), (i32 vecshiftL64:$imm)))]> {
+ let Inst{21-16} = imm{5-0};
+ }
+
+ def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm))),
+ (!cast<Instruction>(NAME # "d") FPR64:$Rn, vecshiftL64:$imm)>;
+}
+
+multiclass SIMDScalarRShiftBHSD<bit U, bits<5> opc, string asm> {
+ def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?},
+ FPR8, FPR8, vecshiftR8, asm, []> {
+ let Inst{18-16} = imm{2-0};
+ }
+
+ def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
+ FPR16, FPR16, vecshiftR16, asm, []> {
+ let Inst{19-16} = imm{3-0};
+ }
+
+ def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
+ FPR32, FPR32, vecshiftR32, asm, []> {
+ let Inst{20-16} = imm{4-0};
+ }
+
+ def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
+ FPR64, FPR64, vecshiftR64, asm, []> {
+ let Inst{21-16} = imm{5-0};
+ }
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD vector x indexed element
+//----------------------------------------------------------------------------
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDVectorShift<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
+ RegisterOperand dst_reg, RegisterOperand src_reg,
+ Operand immtype,
+ string asm, string dst_kind, string src_kind,
+ list<dag> pattern>
+ : I<(outs dst_reg:$Rd), (ins src_reg:$Rn, immtype:$imm),
+ asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
+ "|" # dst_kind # "\t$Rd, $Rn, $imm}", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-23} = 0b011110;
+ let Inst{22-16} = fixed_imm;
+ let Inst{15-11} = opc;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDVectorShiftTied<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
+ RegisterOperand vectype1, RegisterOperand vectype2,
+ Operand immtype,
+ string asm, string dst_kind, string src_kind,
+ list<dag> pattern>
+ : I<(outs vectype1:$dst), (ins vectype1:$Rd, vectype2:$Rn, immtype:$imm),
+ asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
+ "|" # dst_kind # "\t$Rd, $Rn, $imm}", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-23} = 0b011110;
+ let Inst{22-16} = fixed_imm;
+ let Inst{15-11} = opc;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
+ Intrinsic OpNode> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+ V64, V64, vecshiftR16,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftR16,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
+ V64, V64, vecshiftR32,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 imm:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
+ V128, V128, vecshiftR32,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 imm:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
+ V128, V128, vecshiftR64,
+ asm, ".2d", ".2d",
+ [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 imm:$imm)))]> {
+ bits<6> imm;
+ let Inst{21-16} = imm;
+ }
+}
+
+multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
+ Intrinsic OpNode> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+ V64, V64, vecshiftR16,
+ asm, ".4h", ".4h",
+ [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftR16,
+ asm, ".8h", ".8h",
+ [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
+
+ def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
+ V64, V64, vecshiftR32,
+ asm, ".2s", ".2s",
+ [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 imm:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
+ V128, V128, vecshiftR32,
+ asm, ".4s", ".4s",
+ [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 imm:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
+ V128, V128, vecshiftR64,
+ asm, ".2d", ".2d",
+ [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 imm:$imm)))]> {
+ bits<6> imm;
+ let Inst{21-16} = imm;
+ }
+}
+
+multiclass SIMDVectorRShiftNarrowBHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?},
+ V64, V128, vecshiftR16Narrow,
+ asm, ".8b", ".8h",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?},
+ V128, V128, vecshiftR16Narrow,
+ asm#"2", ".16b", ".8h", []> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ let hasSideEffects = 0;
+ }
+
+ def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+ V64, V128, vecshiftR32Narrow,
+ asm, ".4h", ".4s",
+ [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftR32Narrow,
+ asm#"2", ".8h", ".4s", []> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ let hasSideEffects = 0;
+ }
+
+ def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
+ V64, V128, vecshiftR64Narrow,
+ asm, ".2s", ".2d",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?},
+ V128, V128, vecshiftR64Narrow,
+ asm#"2", ".4s", ".2d", []> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ let hasSideEffects = 0;
+ }
+
+ // TableGen doesn't like patters w/ INSERT_SUBREG on the instructions
+ // themselves, so put them here instead.
+
+ // Patterns involving what's effectively an insert high and a normal
+ // intrinsic, represented by CONCAT_VECTORS.
+ def : Pat<(concat_vectors (v8i8 V64:$Rd),(OpNode (v8i16 V128:$Rn),
+ vecshiftR16Narrow:$imm)),
+ (!cast<Instruction>(NAME # "v16i8_shift")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, vecshiftR16Narrow:$imm)>;
+ def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn),
+ vecshiftR32Narrow:$imm)),
+ (!cast<Instruction>(NAME # "v8i16_shift")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, vecshiftR32Narrow:$imm)>;
+ def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn),
+ vecshiftR64Narrow:$imm)),
+ (!cast<Instruction>(NAME # "v4i32_shift")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, vecshiftR64Narrow:$imm)>;
+}
+
+multiclass SIMDVectorLShiftBHSD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?},
+ V64, V64, vecshiftL8,
+ asm, ".8b", ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn),
+ (i32 vecshiftL8:$imm)))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?},
+ V128, V128, vecshiftL8,
+ asm, ".16b", ".16b",
+ [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn),
+ (i32 vecshiftL8:$imm)))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+ V64, V64, vecshiftL16,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn),
+ (i32 vecshiftL16:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftL16,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn),
+ (i32 vecshiftL16:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
+ V64, V64, vecshiftL32,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn),
+ (i32 vecshiftL32:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
+ V128, V128, vecshiftL32,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn),
+ (i32 vecshiftL32:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
+ V128, V128, vecshiftL64,
+ asm, ".2d", ".2d",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn),
+ (i32 vecshiftL64:$imm)))]> {
+ bits<6> imm;
+ let Inst{21-16} = imm;
+ }
+}
+
+multiclass SIMDVectorRShiftBHSD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?},
+ V64, V64, vecshiftR8,
+ asm, ".8b", ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn),
+ (i32 vecshiftR8:$imm)))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?},
+ V128, V128, vecshiftR8,
+ asm, ".16b", ".16b",
+ [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn),
+ (i32 vecshiftR8:$imm)))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+ V64, V64, vecshiftR16,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn),
+ (i32 vecshiftR16:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftR16,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn),
+ (i32 vecshiftR16:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
+ V64, V64, vecshiftR32,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn),
+ (i32 vecshiftR32:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
+ V128, V128, vecshiftR32,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn),
+ (i32 vecshiftR32:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
+ V128, V128, vecshiftR64,
+ asm, ".2d", ".2d",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn),
+ (i32 vecshiftR64:$imm)))]> {
+ bits<6> imm;
+ let Inst{21-16} = imm;
+ }
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+multiclass SIMDVectorRShiftBHSDTied<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?},
+ V64, V64, vecshiftR8, asm, ".8b", ".8b",
+ [(set (v8i8 V64:$dst),
+ (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn),
+ (i32 vecshiftR8:$imm)))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?},
+ V128, V128, vecshiftR8, asm, ".16b", ".16b",
+ [(set (v16i8 V128:$dst),
+ (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn),
+ (i32 vecshiftR8:$imm)))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?},
+ V64, V64, vecshiftR16, asm, ".4h", ".4h",
+ [(set (v4i16 V64:$dst),
+ (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn),
+ (i32 vecshiftR16:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftR16, asm, ".8h", ".8h",
+ [(set (v8i16 V128:$dst),
+ (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn),
+ (i32 vecshiftR16:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?},
+ V64, V64, vecshiftR32, asm, ".2s", ".2s",
+ [(set (v2i32 V64:$dst),
+ (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn),
+ (i32 vecshiftR32:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?},
+ V128, V128, vecshiftR32, asm, ".4s", ".4s",
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn),
+ (i32 vecshiftR32:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?},
+ V128, V128, vecshiftR64,
+ asm, ".2d", ".2d", [(set (v2i64 V128:$dst),
+ (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn),
+ (i32 vecshiftR64:$imm)))]> {
+ bits<6> imm;
+ let Inst{21-16} = imm;
+ }
+}
+
+multiclass SIMDVectorLShiftBHSDTied<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?},
+ V64, V64, vecshiftL8,
+ asm, ".8b", ".8b",
+ [(set (v8i8 V64:$dst),
+ (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn),
+ (i32 vecshiftL8:$imm)))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?},
+ V128, V128, vecshiftL8,
+ asm, ".16b", ".16b",
+ [(set (v16i8 V128:$dst),
+ (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn),
+ (i32 vecshiftL8:$imm)))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?},
+ V64, V64, vecshiftL16,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$dst),
+ (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn),
+ (i32 vecshiftL16:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftL16,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$dst),
+ (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn),
+ (i32 vecshiftL16:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?},
+ V64, V64, vecshiftL32,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$dst),
+ (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn),
+ (i32 vecshiftL32:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?},
+ V128, V128, vecshiftL32,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn),
+ (i32 vecshiftL32:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?},
+ V128, V128, vecshiftL64,
+ asm, ".2d", ".2d",
+ [(set (v2i64 V128:$dst),
+ (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn),
+ (i32 vecshiftL64:$imm)))]> {
+ bits<6> imm;
+ let Inst{21-16} = imm;
+ }
+}
+
+multiclass SIMDVectorLShiftLongBHSD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?},
+ V128, V64, vecshiftL8, asm, ".8h", ".8b",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), vecshiftL8:$imm))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?},
+ V128, V128, vecshiftL8,
+ asm#"2", ".8h", ".16b",
+ [(set (v8i16 V128:$Rd),
+ (OpNode (extract_high_v16i8 V128:$Rn), vecshiftL8:$imm))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+ V128, V64, vecshiftL16, asm, ".4s", ".4h",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), vecshiftL16:$imm))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftL16,
+ asm#"2", ".4s", ".8h",
+ [(set (v4i32 V128:$Rd),
+ (OpNode (extract_high_v8i16 V128:$Rn), vecshiftL16:$imm))]> {
+
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
+ V128, V64, vecshiftL32, asm, ".2d", ".2s",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), vecshiftL32:$imm))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
+ V128, V128, vecshiftL32,
+ asm#"2", ".2d", ".4s",
+ [(set (v2i64 V128:$Rd),
+ (OpNode (extract_high_v4i32 V128:$Rn), vecshiftL32:$imm))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+}
+
+
+//---
+// Vector load/store
+//---
+// SIMD ldX/stX no-index memory references don't allow the optional
+// ", #0" constant and handle post-indexing explicitly, so we use
+// a more specialized parse method for them. Otherwise, it's the same as
+// the general GPR64sp handling.
+
+class BaseSIMDLdSt<bit Q, bit L, bits<4> opcode, bits<2> size,
+ string asm, dag oops, dag iops, list<dag> pattern>
+ : I<oops, iops, asm, "\t$Vt, [$Rn]", "", pattern> {
+ bits<5> Vt;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29-23} = 0b0011000;
+ let Inst{22} = L;
+ let Inst{21-16} = 0b000000;
+ let Inst{15-12} = opcode;
+ let Inst{11-10} = size;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Vt;
+}
+
+class BaseSIMDLdStPost<bit Q, bit L, bits<4> opcode, bits<2> size,
+ string asm, dag oops, dag iops>
+ : I<oops, iops, asm, "\t$Vt, [$Rn], $Xm", "$Rn = $wback", []> {
+ bits<5> Vt;
+ bits<5> Rn;
+ bits<5> Xm;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29-23} = 0b0011001;
+ let Inst{22} = L;
+ let Inst{21} = 0;
+ let Inst{20-16} = Xm;
+ let Inst{15-12} = opcode;
+ let Inst{11-10} = size;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Vt;
+}
+
+// The immediate form of AdvSIMD post-indexed addressing is encoded with
+// register post-index addressing from the zero register.
+multiclass SIMDLdStAliases<string BaseName, string asm, string layout, string Count,
+ int Offset, int Size> {
+ // E.g. "ld1 { v0.8b, v1.8b }, [x1], #16"
+ // "ld1\t$Vt, [$Rn], #16"
+ // may get mapped to
+ // (LD1Twov8b_POST VecListTwo8b:$Vt, GPR64sp:$Rn, XZR)
+ def : InstAlias<asm # "\t$Vt, [$Rn], #" # Offset,
+ (!cast<Instruction>(BaseName # Count # "v" # layout # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # layout):$Vt,
+ XZR), 1>;
+
+ // E.g. "ld1.8b { v0, v1 }, [x1], #16"
+ // "ld1.8b\t$Vt, [$Rn], #16"
+ // may get mapped to
+ // (LD1Twov8b_POST VecListTwo64:$Vt, GPR64sp:$Rn, XZR)
+ def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn], #" # Offset,
+ (!cast<Instruction>(BaseName # Count # "v" # layout # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
+ XZR), 0>;
+
+ // E.g. "ld1.8b { v0, v1 }, [x1]"
+ // "ld1\t$Vt, [$Rn]"
+ // may get mapped to
+ // (LD1Twov8b VecListTwo64:$Vt, GPR64sp:$Rn)
+ def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn]",
+ (!cast<Instruction>(BaseName # Count # "v" # layout)
+ !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
+ GPR64sp:$Rn), 0>;
+
+ // E.g. "ld1.8b { v0, v1 }, [x1], x2"
+ // "ld1\t$Vt, [$Rn], $Xm"
+ // may get mapped to
+ // (LD1Twov8b_POST VecListTwo64:$Vt, GPR64sp:$Rn, GPR64pi8:$Xm)
+ def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn], $Xm",
+ (!cast<Instruction>(BaseName # Count # "v" # layout # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
+ !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>;
+}
+
+multiclass BaseSIMDLdN<string BaseName, string Count, string asm, string veclist,
+ int Offset128, int Offset64, bits<4> opcode> {
+ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+ def v16b: BaseSIMDLdSt<1, 1, opcode, 0b00, asm,
+ (outs !cast<RegisterOperand>(veclist # "16b"):$Vt),
+ (ins GPR64sp:$Rn), []>;
+ def v8h : BaseSIMDLdSt<1, 1, opcode, 0b01, asm,
+ (outs !cast<RegisterOperand>(veclist # "8h"):$Vt),
+ (ins GPR64sp:$Rn), []>;
+ def v4s : BaseSIMDLdSt<1, 1, opcode, 0b10, asm,
+ (outs !cast<RegisterOperand>(veclist # "4s"):$Vt),
+ (ins GPR64sp:$Rn), []>;
+ def v2d : BaseSIMDLdSt<1, 1, opcode, 0b11, asm,
+ (outs !cast<RegisterOperand>(veclist # "2d"):$Vt),
+ (ins GPR64sp:$Rn), []>;
+ def v8b : BaseSIMDLdSt<0, 1, opcode, 0b00, asm,
+ (outs !cast<RegisterOperand>(veclist # "8b"):$Vt),
+ (ins GPR64sp:$Rn), []>;
+ def v4h : BaseSIMDLdSt<0, 1, opcode, 0b01, asm,
+ (outs !cast<RegisterOperand>(veclist # "4h"):$Vt),
+ (ins GPR64sp:$Rn), []>;
+ def v2s : BaseSIMDLdSt<0, 1, opcode, 0b10, asm,
+ (outs !cast<RegisterOperand>(veclist # "2s"):$Vt),
+ (ins GPR64sp:$Rn), []>;
+
+
+ def v16b_POST: BaseSIMDLdStPost<1, 1, opcode, 0b00, asm,
+ (outs GPR64sp:$wback,
+ !cast<RegisterOperand>(veclist # "16b"):$Vt),
+ (ins GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
+ def v8h_POST : BaseSIMDLdStPost<1, 1, opcode, 0b01, asm,
+ (outs GPR64sp:$wback,
+ !cast<RegisterOperand>(veclist # "8h"):$Vt),
+ (ins GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
+ def v4s_POST : BaseSIMDLdStPost<1, 1, opcode, 0b10, asm,
+ (outs GPR64sp:$wback,
+ !cast<RegisterOperand>(veclist # "4s"):$Vt),
+ (ins GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
+ def v2d_POST : BaseSIMDLdStPost<1, 1, opcode, 0b11, asm,
+ (outs GPR64sp:$wback,
+ !cast<RegisterOperand>(veclist # "2d"):$Vt),
+ (ins GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
+ def v8b_POST : BaseSIMDLdStPost<0, 1, opcode, 0b00, asm,
+ (outs GPR64sp:$wback,
+ !cast<RegisterOperand>(veclist # "8b"):$Vt),
+ (ins GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
+ def v4h_POST : BaseSIMDLdStPost<0, 1, opcode, 0b01, asm,
+ (outs GPR64sp:$wback,
+ !cast<RegisterOperand>(veclist # "4h"):$Vt),
+ (ins GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
+ def v2s_POST : BaseSIMDLdStPost<0, 1, opcode, 0b10, asm,
+ (outs GPR64sp:$wback,
+ !cast<RegisterOperand>(veclist # "2s"):$Vt),
+ (ins GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
+ }
+
+ defm : SIMDLdStAliases<BaseName, asm, "16b", Count, Offset128, 128>;
+ defm : SIMDLdStAliases<BaseName, asm, "8h", Count, Offset128, 128>;
+ defm : SIMDLdStAliases<BaseName, asm, "4s", Count, Offset128, 128>;
+ defm : SIMDLdStAliases<BaseName, asm, "2d", Count, Offset128, 128>;
+ defm : SIMDLdStAliases<BaseName, asm, "8b", Count, Offset64, 64>;
+ defm : SIMDLdStAliases<BaseName, asm, "4h", Count, Offset64, 64>;
+ defm : SIMDLdStAliases<BaseName, asm, "2s", Count, Offset64, 64>;
+}
+
+// Only ld1/st1 has a v1d version.
+multiclass BaseSIMDStN<string BaseName, string Count, string asm, string veclist,
+ int Offset128, int Offset64, bits<4> opcode> {
+ let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in {
+ def v16b : BaseSIMDLdSt<1, 0, opcode, 0b00, asm, (outs),
+ (ins !cast<RegisterOperand>(veclist # "16b"):$Vt,
+ GPR64sp:$Rn), []>;
+ def v8h : BaseSIMDLdSt<1, 0, opcode, 0b01, asm, (outs),
+ (ins !cast<RegisterOperand>(veclist # "8h"):$Vt,
+ GPR64sp:$Rn), []>;
+ def v4s : BaseSIMDLdSt<1, 0, opcode, 0b10, asm, (outs),
+ (ins !cast<RegisterOperand>(veclist # "4s"):$Vt,
+ GPR64sp:$Rn), []>;
+ def v2d : BaseSIMDLdSt<1, 0, opcode, 0b11, asm, (outs),
+ (ins !cast<RegisterOperand>(veclist # "2d"):$Vt,
+ GPR64sp:$Rn), []>;
+ def v8b : BaseSIMDLdSt<0, 0, opcode, 0b00, asm, (outs),
+ (ins !cast<RegisterOperand>(veclist # "8b"):$Vt,
+ GPR64sp:$Rn), []>;
+ def v4h : BaseSIMDLdSt<0, 0, opcode, 0b01, asm, (outs),
+ (ins !cast<RegisterOperand>(veclist # "4h"):$Vt,
+ GPR64sp:$Rn), []>;
+ def v2s : BaseSIMDLdSt<0, 0, opcode, 0b10, asm, (outs),
+ (ins !cast<RegisterOperand>(veclist # "2s"):$Vt,
+ GPR64sp:$Rn), []>;
+
+ def v16b_POST : BaseSIMDLdStPost<1, 0, opcode, 0b00, asm,
+ (outs GPR64sp:$wback),
+ (ins !cast<RegisterOperand>(veclist # "16b"):$Vt,
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
+ def v8h_POST : BaseSIMDLdStPost<1, 0, opcode, 0b01, asm,
+ (outs GPR64sp:$wback),
+ (ins !cast<RegisterOperand>(veclist # "8h"):$Vt,
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
+ def v4s_POST : BaseSIMDLdStPost<1, 0, opcode, 0b10, asm,
+ (outs GPR64sp:$wback),
+ (ins !cast<RegisterOperand>(veclist # "4s"):$Vt,
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
+ def v2d_POST : BaseSIMDLdStPost<1, 0, opcode, 0b11, asm,
+ (outs GPR64sp:$wback),
+ (ins !cast<RegisterOperand>(veclist # "2d"):$Vt,
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
+ def v8b_POST : BaseSIMDLdStPost<0, 0, opcode, 0b00, asm,
+ (outs GPR64sp:$wback),
+ (ins !cast<RegisterOperand>(veclist # "8b"):$Vt,
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
+ def v4h_POST : BaseSIMDLdStPost<0, 0, opcode, 0b01, asm,
+ (outs GPR64sp:$wback),
+ (ins !cast<RegisterOperand>(veclist # "4h"):$Vt,
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
+ def v2s_POST : BaseSIMDLdStPost<0, 0, opcode, 0b10, asm,
+ (outs GPR64sp:$wback),
+ (ins !cast<RegisterOperand>(veclist # "2s"):$Vt,
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
+ }
+
+ defm : SIMDLdStAliases<BaseName, asm, "16b", Count, Offset128, 128>;
+ defm : SIMDLdStAliases<BaseName, asm, "8h", Count, Offset128, 128>;
+ defm : SIMDLdStAliases<BaseName, asm, "4s", Count, Offset128, 128>;
+ defm : SIMDLdStAliases<BaseName, asm, "2d", Count, Offset128, 128>;
+ defm : SIMDLdStAliases<BaseName, asm, "8b", Count, Offset64, 64>;
+ defm : SIMDLdStAliases<BaseName, asm, "4h", Count, Offset64, 64>;
+ defm : SIMDLdStAliases<BaseName, asm, "2s", Count, Offset64, 64>;
+}
+
+multiclass BaseSIMDLd1<string BaseName, string Count, string asm, string veclist,
+ int Offset128, int Offset64, bits<4> opcode>
+ : BaseSIMDLdN<BaseName, Count, asm, veclist, Offset128, Offset64, opcode> {
+
+ // LD1 instructions have extra "1d" variants.
+ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+ def v1d : BaseSIMDLdSt<0, 1, opcode, 0b11, asm,
+ (outs !cast<RegisterOperand>(veclist # "1d"):$Vt),
+ (ins GPR64sp:$Rn), []>;
+
+ def v1d_POST : BaseSIMDLdStPost<0, 1, opcode, 0b11, asm,
+ (outs GPR64sp:$wback,
+ !cast<RegisterOperand>(veclist # "1d"):$Vt),
+ (ins GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
+ }
+
+ defm : SIMDLdStAliases<BaseName, asm, "1d", Count, Offset64, 64>;
+}
+
+multiclass BaseSIMDSt1<string BaseName, string Count, string asm, string veclist,
+ int Offset128, int Offset64, bits<4> opcode>
+ : BaseSIMDStN<BaseName, Count, asm, veclist, Offset128, Offset64, opcode> {
+
+ // ST1 instructions have extra "1d" variants.
+ let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
+ def v1d : BaseSIMDLdSt<0, 0, opcode, 0b11, asm, (outs),
+ (ins !cast<RegisterOperand>(veclist # "1d"):$Vt,
+ GPR64sp:$Rn), []>;
+
+ def v1d_POST : BaseSIMDLdStPost<0, 0, opcode, 0b11, asm,
+ (outs GPR64sp:$wback),
+ (ins !cast<RegisterOperand>(veclist # "1d"):$Vt,
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
+ }
+
+ defm : SIMDLdStAliases<BaseName, asm, "1d", Count, Offset64, 64>;
+}
+
+multiclass SIMDLd1Multiple<string asm> {
+ defm One : BaseSIMDLd1<NAME, "One", asm, "VecListOne", 16, 8, 0b0111>;
+ defm Two : BaseSIMDLd1<NAME, "Two", asm, "VecListTwo", 32, 16, 0b1010>;
+ defm Three : BaseSIMDLd1<NAME, "Three", asm, "VecListThree", 48, 24, 0b0110>;
+ defm Four : BaseSIMDLd1<NAME, "Four", asm, "VecListFour", 64, 32, 0b0010>;
+}
+
+multiclass SIMDSt1Multiple<string asm> {
+ defm One : BaseSIMDSt1<NAME, "One", asm, "VecListOne", 16, 8, 0b0111>;
+ defm Two : BaseSIMDSt1<NAME, "Two", asm, "VecListTwo", 32, 16, 0b1010>;
+ defm Three : BaseSIMDSt1<NAME, "Three", asm, "VecListThree", 48, 24, 0b0110>;
+ defm Four : BaseSIMDSt1<NAME, "Four", asm, "VecListFour", 64, 32, 0b0010>;
+}
+
+multiclass SIMDLd2Multiple<string asm> {
+ defm Two : BaseSIMDLdN<NAME, "Two", asm, "VecListTwo", 32, 16, 0b1000>;
+}
+
+multiclass SIMDSt2Multiple<string asm> {
+ defm Two : BaseSIMDStN<NAME, "Two", asm, "VecListTwo", 32, 16, 0b1000>;
+}
+
+multiclass SIMDLd3Multiple<string asm> {
+ defm Three : BaseSIMDLdN<NAME, "Three", asm, "VecListThree", 48, 24, 0b0100>;
+}
+
+multiclass SIMDSt3Multiple<string asm> {
+ defm Three : BaseSIMDStN<NAME, "Three", asm, "VecListThree", 48, 24, 0b0100>;
+}
+
+multiclass SIMDLd4Multiple<string asm> {
+ defm Four : BaseSIMDLdN<NAME, "Four", asm, "VecListFour", 64, 32, 0b0000>;
+}
+
+multiclass SIMDSt4Multiple<string asm> {
+ defm Four : BaseSIMDStN<NAME, "Four", asm, "VecListFour", 64, 32, 0b0000>;
+}
+
+//---
+// AdvSIMD Load/store single-element
+//---
+
+class BaseSIMDLdStSingle<bit L, bit R, bits<3> opcode,
+ string asm, string operands, string cst,
+ dag oops, dag iops, list<dag> pattern>
+ : I<oops, iops, asm, operands, cst, pattern> {
+ bits<5> Vt;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{29-24} = 0b001101;
+ let Inst{22} = L;
+ let Inst{21} = R;
+ let Inst{15-13} = opcode;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Vt;
+}
+
+class BaseSIMDLdStSingleTied<bit L, bit R, bits<3> opcode,
+ string asm, string operands, string cst,
+ dag oops, dag iops, list<dag> pattern>
+ : I<oops, iops, asm, operands, "$Vt = $dst," # cst, pattern> {
+ bits<5> Vt;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{29-24} = 0b001101;
+ let Inst{22} = L;
+ let Inst{21} = R;
+ let Inst{15-13} = opcode;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Vt;
+}
+
+
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDLdR<bit Q, bit R, bits<3> opcode, bit S, bits<2> size, string asm,
+ DAGOperand listtype>
+ : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, [$Rn]", "",
+ (outs listtype:$Vt), (ins GPR64sp:$Rn),
+ []> {
+ let Inst{30} = Q;
+ let Inst{23} = 0;
+ let Inst{20-16} = 0b00000;
+ let Inst{12} = S;
+ let Inst{11-10} = size;
+}
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDLdRPost<bit Q, bit R, bits<3> opcode, bit S, bits<2> size,
+ string asm, DAGOperand listtype, DAGOperand GPR64pi>
+ : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, [$Rn], $Xm",
+ "$Rn = $wback",
+ (outs GPR64sp:$wback, listtype:$Vt),
+ (ins GPR64sp:$Rn, GPR64pi:$Xm), []> {
+ bits<5> Xm;
+ let Inst{30} = Q;
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = S;
+ let Inst{11-10} = size;
+}
+
+multiclass SIMDLdrAliases<string BaseName, string asm, string layout, string Count,
+ int Offset, int Size> {
+ // E.g. "ld1r { v0.8b }, [x1], #1"
+ // "ld1r.8b\t$Vt, [$Rn], #1"
+ // may get mapped to
+ // (LD1Rv8b_POST VecListOne8b:$Vt, GPR64sp:$Rn, XZR)
+ def : InstAlias<asm # "\t$Vt, [$Rn], #" # Offset,
+ (!cast<Instruction>(BaseName # "v" # layout # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # layout):$Vt,
+ XZR), 1>;
+
+ // E.g. "ld1r.8b { v0 }, [x1], #1"
+ // "ld1r.8b\t$Vt, [$Rn], #1"
+ // may get mapped to
+ // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, XZR)
+ def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn], #" # Offset,
+ (!cast<Instruction>(BaseName # "v" # layout # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
+ XZR), 0>;
+
+ // E.g. "ld1r.8b { v0 }, [x1]"
+ // "ld1r.8b\t$Vt, [$Rn]"
+ // may get mapped to
+ // (LD1Rv8b VecListOne64:$Vt, GPR64sp:$Rn)
+ def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn]",
+ (!cast<Instruction>(BaseName # "v" # layout)
+ !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
+ GPR64sp:$Rn), 0>;
+
+ // E.g. "ld1r.8b { v0 }, [x1], x2"
+ // "ld1r.8b\t$Vt, [$Rn], $Xm"
+ // may get mapped to
+ // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, GPR64pi1:$Xm)
+ def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn], $Xm",
+ (!cast<Instruction>(BaseName # "v" # layout # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
+ !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>;
+}
+
+multiclass SIMDLdR<bit R, bits<3> opcode, bit S, string asm, string Count,
+ int Offset1, int Offset2, int Offset4, int Offset8> {
+ def v8b : BaseSIMDLdR<0, R, opcode, S, 0b00, asm,
+ !cast<DAGOperand>("VecList" # Count # "8b")>;
+ def v16b: BaseSIMDLdR<1, R, opcode, S, 0b00, asm,
+ !cast<DAGOperand>("VecList" # Count #"16b")>;
+ def v4h : BaseSIMDLdR<0, R, opcode, S, 0b01, asm,
+ !cast<DAGOperand>("VecList" # Count #"4h")>;
+ def v8h : BaseSIMDLdR<1, R, opcode, S, 0b01, asm,
+ !cast<DAGOperand>("VecList" # Count #"8h")>;
+ def v2s : BaseSIMDLdR<0, R, opcode, S, 0b10, asm,
+ !cast<DAGOperand>("VecList" # Count #"2s")>;
+ def v4s : BaseSIMDLdR<1, R, opcode, S, 0b10, asm,
+ !cast<DAGOperand>("VecList" # Count #"4s")>;
+ def v1d : BaseSIMDLdR<0, R, opcode, S, 0b11, asm,
+ !cast<DAGOperand>("VecList" # Count #"1d")>;
+ def v2d : BaseSIMDLdR<1, R, opcode, S, 0b11, asm,
+ !cast<DAGOperand>("VecList" # Count #"2d")>;
+
+ def v8b_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b00, asm,
+ !cast<DAGOperand>("VecList" # Count # "8b"),
+ !cast<DAGOperand>("GPR64pi" # Offset1)>;
+ def v16b_POST: BaseSIMDLdRPost<1, R, opcode, S, 0b00, asm,
+ !cast<DAGOperand>("VecList" # Count # "16b"),
+ !cast<DAGOperand>("GPR64pi" # Offset1)>;
+ def v4h_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b01, asm,
+ !cast<DAGOperand>("VecList" # Count # "4h"),
+ !cast<DAGOperand>("GPR64pi" # Offset2)>;
+ def v8h_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b01, asm,
+ !cast<DAGOperand>("VecList" # Count # "8h"),
+ !cast<DAGOperand>("GPR64pi" # Offset2)>;
+ def v2s_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b10, asm,
+ !cast<DAGOperand>("VecList" # Count # "2s"),
+ !cast<DAGOperand>("GPR64pi" # Offset4)>;
+ def v4s_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b10, asm,
+ !cast<DAGOperand>("VecList" # Count # "4s"),
+ !cast<DAGOperand>("GPR64pi" # Offset4)>;
+ def v1d_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b11, asm,
+ !cast<DAGOperand>("VecList" # Count # "1d"),
+ !cast<DAGOperand>("GPR64pi" # Offset8)>;
+ def v2d_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b11, asm,
+ !cast<DAGOperand>("VecList" # Count # "2d"),
+ !cast<DAGOperand>("GPR64pi" # Offset8)>;
+
+ defm : SIMDLdrAliases<NAME, asm, "8b", Count, Offset1, 64>;
+ defm : SIMDLdrAliases<NAME, asm, "16b", Count, Offset1, 128>;
+ defm : SIMDLdrAliases<NAME, asm, "4h", Count, Offset2, 64>;
+ defm : SIMDLdrAliases<NAME, asm, "8h", Count, Offset2, 128>;
+ defm : SIMDLdrAliases<NAME, asm, "2s", Count, Offset4, 64>;
+ defm : SIMDLdrAliases<NAME, asm, "4s", Count, Offset4, 128>;
+ defm : SIMDLdrAliases<NAME, asm, "1d", Count, Offset8, 64>;
+ defm : SIMDLdrAliases<NAME, asm, "2d", Count, Offset8, 128>;
+}
+
+class SIMDLdStSingleB<bit L, bit R, bits<3> opcode, string asm,
+ dag oops, dag iops, list<dag> pattern>
+ : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "", oops, iops,
+ pattern> {
+ // idx encoded in Q:S:size fields.
+ bits<4> idx;
+ let Inst{30} = idx{3};
+ let Inst{23} = 0;
+ let Inst{20-16} = 0b00000;
+ let Inst{12} = idx{2};
+ let Inst{11-10} = idx{1-0};
+}
+class SIMDLdStSingleBTied<bit L, bit R, bits<3> opcode, string asm,
+ dag oops, dag iops, list<dag> pattern>
+ : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "",
+ oops, iops, pattern> {
+ // idx encoded in Q:S:size fields.
+ bits<4> idx;
+ let Inst{30} = idx{3};
+ let Inst{23} = 0;
+ let Inst{20-16} = 0b00000;
+ let Inst{12} = idx{2};
+ let Inst{11-10} = idx{1-0};
+}
+class SIMDLdStSingleBPost<bit L, bit R, bits<3> opcode, string asm,
+ dag oops, dag iops>
+ : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm",
+ "$Rn = $wback", oops, iops, []> {
+ // idx encoded in Q:S:size fields.
+ bits<4> idx;
+ bits<5> Xm;
+ let Inst{30} = idx{3};
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = idx{2};
+ let Inst{11-10} = idx{1-0};
+}
+class SIMDLdStSingleBTiedPost<bit L, bit R, bits<3> opcode, string asm,
+ dag oops, dag iops>
+ : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm",
+ "$Rn = $wback", oops, iops, []> {
+ // idx encoded in Q:S:size fields.
+ bits<4> idx;
+ bits<5> Xm;
+ let Inst{30} = idx{3};
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = idx{2};
+ let Inst{11-10} = idx{1-0};
+}
+
+class SIMDLdStSingleH<bit L, bit R, bits<3> opcode, bit size, string asm,
+ dag oops, dag iops, list<dag> pattern>
+ : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "", oops, iops,
+ pattern> {
+ // idx encoded in Q:S:size<1> fields.
+ bits<3> idx;
+ let Inst{30} = idx{2};
+ let Inst{23} = 0;
+ let Inst{20-16} = 0b00000;
+ let Inst{12} = idx{1};
+ let Inst{11} = idx{0};
+ let Inst{10} = size;
+}
+class SIMDLdStSingleHTied<bit L, bit R, bits<3> opcode, bit size, string asm,
+ dag oops, dag iops, list<dag> pattern>
+ : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "",
+ oops, iops, pattern> {
+ // idx encoded in Q:S:size<1> fields.
+ bits<3> idx;
+ let Inst{30} = idx{2};
+ let Inst{23} = 0;
+ let Inst{20-16} = 0b00000;
+ let Inst{12} = idx{1};
+ let Inst{11} = idx{0};
+ let Inst{10} = size;
+}
+
+class SIMDLdStSingleHPost<bit L, bit R, bits<3> opcode, bit size, string asm,
+ dag oops, dag iops>
+ : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm",
+ "$Rn = $wback", oops, iops, []> {
+ // idx encoded in Q:S:size<1> fields.
+ bits<3> idx;
+ bits<5> Xm;
+ let Inst{30} = idx{2};
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = idx{1};
+ let Inst{11} = idx{0};
+ let Inst{10} = size;
+}
+class SIMDLdStSingleHTiedPost<bit L, bit R, bits<3> opcode, bit size, string asm,
+ dag oops, dag iops>
+ : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm",
+ "$Rn = $wback", oops, iops, []> {
+ // idx encoded in Q:S:size<1> fields.
+ bits<3> idx;
+ bits<5> Xm;
+ let Inst{30} = idx{2};
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = idx{1};
+ let Inst{11} = idx{0};
+ let Inst{10} = size;
+}
+class SIMDLdStSingleS<bit L, bit R, bits<3> opcode, bits<2> size, string asm,
+ dag oops, dag iops, list<dag> pattern>
+ : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "", oops, iops,
+ pattern> {
+ // idx encoded in Q:S fields.
+ bits<2> idx;
+ let Inst{30} = idx{1};
+ let Inst{23} = 0;
+ let Inst{20-16} = 0b00000;
+ let Inst{12} = idx{0};
+ let Inst{11-10} = size;
+}
+class SIMDLdStSingleSTied<bit L, bit R, bits<3> opcode, bits<2> size, string asm,
+ dag oops, dag iops, list<dag> pattern>
+ : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "",
+ oops, iops, pattern> {
+ // idx encoded in Q:S fields.
+ bits<2> idx;
+ let Inst{30} = idx{1};
+ let Inst{23} = 0;
+ let Inst{20-16} = 0b00000;
+ let Inst{12} = idx{0};
+ let Inst{11-10} = size;
+}
+class SIMDLdStSingleSPost<bit L, bit R, bits<3> opcode, bits<2> size,
+ string asm, dag oops, dag iops>
+ : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm",
+ "$Rn = $wback", oops, iops, []> {
+ // idx encoded in Q:S fields.
+ bits<2> idx;
+ bits<5> Xm;
+ let Inst{30} = idx{1};
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = idx{0};
+ let Inst{11-10} = size;
+}
+class SIMDLdStSingleSTiedPost<bit L, bit R, bits<3> opcode, bits<2> size,
+ string asm, dag oops, dag iops>
+ : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm",
+ "$Rn = $wback", oops, iops, []> {
+ // idx encoded in Q:S fields.
+ bits<2> idx;
+ bits<5> Xm;
+ let Inst{30} = idx{1};
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = idx{0};
+ let Inst{11-10} = size;
+}
+class SIMDLdStSingleD<bit L, bit R, bits<3> opcode, bits<2> size, string asm,
+ dag oops, dag iops, list<dag> pattern>
+ : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "", oops, iops,
+ pattern> {
+ // idx encoded in Q field.
+ bits<1> idx;
+ let Inst{30} = idx;
+ let Inst{23} = 0;
+ let Inst{20-16} = 0b00000;
+ let Inst{12} = 0;
+ let Inst{11-10} = size;
+}
+class SIMDLdStSingleDTied<bit L, bit R, bits<3> opcode, bits<2> size, string asm,
+ dag oops, dag iops, list<dag> pattern>
+ : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "",
+ oops, iops, pattern> {
+ // idx encoded in Q field.
+ bits<1> idx;
+ let Inst{30} = idx;
+ let Inst{23} = 0;
+ let Inst{20-16} = 0b00000;
+ let Inst{12} = 0;
+ let Inst{11-10} = size;
+}
+class SIMDLdStSingleDPost<bit L, bit R, bits<3> opcode, bits<2> size,
+ string asm, dag oops, dag iops>
+ : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm",
+ "$Rn = $wback", oops, iops, []> {
+ // idx encoded in Q field.
+ bits<1> idx;
+ bits<5> Xm;
+ let Inst{30} = idx;
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = 0;
+ let Inst{11-10} = size;
+}
+class SIMDLdStSingleDTiedPost<bit L, bit R, bits<3> opcode, bits<2> size,
+ string asm, dag oops, dag iops>
+ : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm",
+ "$Rn = $wback", oops, iops, []> {
+ // idx encoded in Q field.
+ bits<1> idx;
+ bits<5> Xm;
+ let Inst{30} = idx;
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = 0;
+ let Inst{11-10} = size;
+}
+
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+multiclass SIMDLdSingleBTied<bit R, bits<3> opcode, string asm,
+ RegisterOperand listtype,
+ RegisterOperand GPR64pi> {
+ def i8 : SIMDLdStSingleBTied<1, R, opcode, asm,
+ (outs listtype:$dst),
+ (ins listtype:$Vt, VectorIndexB:$idx,
+ GPR64sp:$Rn), []>;
+
+ def i8_POST : SIMDLdStSingleBTiedPost<1, R, opcode, asm,
+ (outs GPR64sp:$wback, listtype:$dst),
+ (ins listtype:$Vt, VectorIndexB:$idx,
+ GPR64sp:$Rn, GPR64pi:$Xm)>;
+}
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+multiclass SIMDLdSingleHTied<bit R, bits<3> opcode, bit size, string asm,
+ RegisterOperand listtype,
+ RegisterOperand GPR64pi> {
+ def i16 : SIMDLdStSingleHTied<1, R, opcode, size, asm,
+ (outs listtype:$dst),
+ (ins listtype:$Vt, VectorIndexH:$idx,
+ GPR64sp:$Rn), []>;
+
+ def i16_POST : SIMDLdStSingleHTiedPost<1, R, opcode, size, asm,
+ (outs GPR64sp:$wback, listtype:$dst),
+ (ins listtype:$Vt, VectorIndexH:$idx,
+ GPR64sp:$Rn, GPR64pi:$Xm)>;
+}
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+multiclass SIMDLdSingleSTied<bit R, bits<3> opcode, bits<2> size,string asm,
+ RegisterOperand listtype,
+ RegisterOperand GPR64pi> {
+ def i32 : SIMDLdStSingleSTied<1, R, opcode, size, asm,
+ (outs listtype:$dst),
+ (ins listtype:$Vt, VectorIndexS:$idx,
+ GPR64sp:$Rn), []>;
+
+ def i32_POST : SIMDLdStSingleSTiedPost<1, R, opcode, size, asm,
+ (outs GPR64sp:$wback, listtype:$dst),
+ (ins listtype:$Vt, VectorIndexS:$idx,
+ GPR64sp:$Rn, GPR64pi:$Xm)>;
+}
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+multiclass SIMDLdSingleDTied<bit R, bits<3> opcode, bits<2> size, string asm,
+ RegisterOperand listtype, RegisterOperand GPR64pi> {
+ def i64 : SIMDLdStSingleDTied<1, R, opcode, size, asm,
+ (outs listtype:$dst),
+ (ins listtype:$Vt, VectorIndexD:$idx,
+ GPR64sp:$Rn), []>;
+
+ def i64_POST : SIMDLdStSingleDTiedPost<1, R, opcode, size, asm,
+ (outs GPR64sp:$wback, listtype:$dst),
+ (ins listtype:$Vt, VectorIndexD:$idx,
+ GPR64sp:$Rn, GPR64pi:$Xm)>;
+}
+let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+multiclass SIMDStSingleB<bit R, bits<3> opcode, string asm,
+ RegisterOperand listtype, RegisterOperand GPR64pi> {
+ def i8 : SIMDLdStSingleB<0, R, opcode, asm,
+ (outs), (ins listtype:$Vt, VectorIndexB:$idx,
+ GPR64sp:$Rn), []>;
+
+ def i8_POST : SIMDLdStSingleBPost<0, R, opcode, asm,
+ (outs GPR64sp:$wback),
+ (ins listtype:$Vt, VectorIndexB:$idx,
+ GPR64sp:$Rn, GPR64pi:$Xm)>;
+}
+let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+multiclass SIMDStSingleH<bit R, bits<3> opcode, bit size, string asm,
+ RegisterOperand listtype, RegisterOperand GPR64pi> {
+ def i16 : SIMDLdStSingleH<0, R, opcode, size, asm,
+ (outs), (ins listtype:$Vt, VectorIndexH:$idx,
+ GPR64sp:$Rn), []>;
+
+ def i16_POST : SIMDLdStSingleHPost<0, R, opcode, size, asm,
+ (outs GPR64sp:$wback),
+ (ins listtype:$Vt, VectorIndexH:$idx,
+ GPR64sp:$Rn, GPR64pi:$Xm)>;
+}
+let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+multiclass SIMDStSingleS<bit R, bits<3> opcode, bits<2> size,string asm,
+ RegisterOperand listtype, RegisterOperand GPR64pi> {
+ def i32 : SIMDLdStSingleS<0, R, opcode, size, asm,
+ (outs), (ins listtype:$Vt, VectorIndexS:$idx,
+ GPR64sp:$Rn), []>;
+
+ def i32_POST : SIMDLdStSingleSPost<0, R, opcode, size, asm,
+ (outs GPR64sp:$wback),
+ (ins listtype:$Vt, VectorIndexS:$idx,
+ GPR64sp:$Rn, GPR64pi:$Xm)>;
+}
+let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+multiclass SIMDStSingleD<bit R, bits<3> opcode, bits<2> size, string asm,
+ RegisterOperand listtype, RegisterOperand GPR64pi> {
+ def i64 : SIMDLdStSingleD<0, R, opcode, size, asm,
+ (outs), (ins listtype:$Vt, VectorIndexD:$idx,
+ GPR64sp:$Rn), []>;
+
+ def i64_POST : SIMDLdStSingleDPost<0, R, opcode, size, asm,
+ (outs GPR64sp:$wback),
+ (ins listtype:$Vt, VectorIndexD:$idx,
+ GPR64sp:$Rn, GPR64pi:$Xm)>;
+}
+
+multiclass SIMDLdStSingleAliases<string asm, string layout, string Type,
+ string Count, int Offset, Operand idxtype> {
+ // E.g. "ld1 { v0.8b }[0], [x1], #1"
+ // "ld1\t$Vt, [$Rn], #1"
+ // may get mapped to
+ // (LD1Rv8b_POST VecListOne8b:$Vt, GPR64sp:$Rn, XZR)
+ def : InstAlias<asm # "\t$Vt$idx, [$Rn], #" # Offset,
+ (!cast<Instruction>(NAME # Type # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # layout):$Vt,
+ idxtype:$idx, XZR), 1>;
+
+ // E.g. "ld1.8b { v0 }[0], [x1], #1"
+ // "ld1.8b\t$Vt, [$Rn], #1"
+ // may get mapped to
+ // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, XZR)
+ def : InstAlias<asm # "." # layout # "\t$Vt$idx, [$Rn], #" # Offset,
+ (!cast<Instruction>(NAME # Type # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # "128"):$Vt,
+ idxtype:$idx, XZR), 0>;
+
+ // E.g. "ld1.8b { v0 }[0], [x1]"
+ // "ld1.8b\t$Vt, [$Rn]"
+ // may get mapped to
+ // (LD1Rv8b VecListOne64:$Vt, GPR64sp:$Rn)
+ def : InstAlias<asm # "." # layout # "\t$Vt$idx, [$Rn]",
+ (!cast<Instruction>(NAME # Type)
+ !cast<RegisterOperand>("VecList" # Count # "128"):$Vt,
+ idxtype:$idx, GPR64sp:$Rn), 0>;
+
+ // E.g. "ld1.8b { v0 }[0], [x1], x2"
+ // "ld1.8b\t$Vt, [$Rn], $Xm"
+ // may get mapped to
+ // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, GPR64pi1:$Xm)
+ def : InstAlias<asm # "." # layout # "\t$Vt$idx, [$Rn], $Xm",
+ (!cast<Instruction>(NAME # Type # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # "128"):$Vt,
+ idxtype:$idx,
+ !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>;
+}
+
+multiclass SIMDLdSt1SingleAliases<string asm> {
+ defm "" : SIMDLdStSingleAliases<asm, "b", "i8", "One", 1, VectorIndexB>;
+ defm "" : SIMDLdStSingleAliases<asm, "h", "i16", "One", 2, VectorIndexH>;
+ defm "" : SIMDLdStSingleAliases<asm, "s", "i32", "One", 4, VectorIndexS>;
+ defm "" : SIMDLdStSingleAliases<asm, "d", "i64", "One", 8, VectorIndexD>;
+}
+
+multiclass SIMDLdSt2SingleAliases<string asm> {
+ defm "" : SIMDLdStSingleAliases<asm, "b", "i8", "Two", 2, VectorIndexB>;
+ defm "" : SIMDLdStSingleAliases<asm, "h", "i16", "Two", 4, VectorIndexH>;
+ defm "" : SIMDLdStSingleAliases<asm, "s", "i32", "Two", 8, VectorIndexS>;
+ defm "" : SIMDLdStSingleAliases<asm, "d", "i64", "Two", 16, VectorIndexD>;
+}
+
+multiclass SIMDLdSt3SingleAliases<string asm> {
+ defm "" : SIMDLdStSingleAliases<asm, "b", "i8", "Three", 3, VectorIndexB>;
+ defm "" : SIMDLdStSingleAliases<asm, "h", "i16", "Three", 6, VectorIndexH>;
+ defm "" : SIMDLdStSingleAliases<asm, "s", "i32", "Three", 12, VectorIndexS>;
+ defm "" : SIMDLdStSingleAliases<asm, "d", "i64", "Three", 24, VectorIndexD>;
+}
+
+multiclass SIMDLdSt4SingleAliases<string asm> {
+ defm "" : SIMDLdStSingleAliases<asm, "b", "i8", "Four", 4, VectorIndexB>;
+ defm "" : SIMDLdStSingleAliases<asm, "h", "i16", "Four", 8, VectorIndexH>;
+ defm "" : SIMDLdStSingleAliases<asm, "s", "i32", "Four", 16, VectorIndexS>;
+ defm "" : SIMDLdStSingleAliases<asm, "d", "i64", "Four", 32, VectorIndexD>;
+}
+} // end of 'let Predicates = [HasNEON]'
+
+//----------------------------------------------------------------------------
+// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract
+//----------------------------------------------------------------------------
+
+let Predicates = [HasNEON, HasRDM] in {
+
+class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand regtype, string asm,
+ string kind, list<dag> pattern>
+ : BaseSIMDThreeSameVectorTied<Q, U, {size,0}, opcode, regtype, asm, kind,
+ pattern> {
+}
+multiclass SIMDThreeSameVectorSQRDMLxHTiedHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator Accum> {
+ def v4i16 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b01, opc, V64, asm, ".4h",
+ [(set (v4i16 V64:$dst),
+ (Accum (v4i16 V64:$Rd),
+ (v4i16 (int_aarch64_neon_sqrdmulh (v4i16 V64:$Rn),
+ (v4i16 V64:$Rm)))))]>;
+ def v8i16 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b01, opc, V128, asm, ".8h",
+ [(set (v8i16 V128:$dst),
+ (Accum (v8i16 V128:$Rd),
+ (v8i16 (int_aarch64_neon_sqrdmulh (v8i16 V128:$Rn),
+ (v8i16 V128:$Rm)))))]>;
+ def v2i32 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b10, opc, V64, asm, ".2s",
+ [(set (v2i32 V64:$dst),
+ (Accum (v2i32 V64:$Rd),
+ (v2i32 (int_aarch64_neon_sqrdmulh (v2i32 V64:$Rn),
+ (v2i32 V64:$Rm)))))]>;
+ def v4i32 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b10, opc, V128, asm, ".4s",
+ [(set (v4i32 V128:$dst),
+ (Accum (v4i32 V128:$Rd),
+ (v4i32 (int_aarch64_neon_sqrdmulh (v4i32 V128:$Rn),
+ (v4i32 V128:$Rm)))))]>;
+}
+
+multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
+ SDPatternOperator Accum> {
+ def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
+ V64, V64, V128_lo, VectorIndexH,
+ asm, ".4h", ".4h", ".4h", ".h",
+ [(set (v4i16 V64:$dst),
+ (Accum (v4i16 V64:$Rd),
+ (v4i16 (int_aarch64_neon_sqrdmulh
+ (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
+ V128, V128, V128_lo, VectorIndexH,
+ asm, ".8h", ".8h", ".8h", ".h",
+ [(set (v8i16 V128:$dst),
+ (Accum (v8i16 V128:$Rd),
+ (v8i16 (int_aarch64_neon_sqrdmulh
+ (v8i16 V128:$Rn),
+ (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
+ V64, V64, V128, VectorIndexS,
+ asm, ".2s", ".2s", ".2s", ".s",
+ [(set (v2i32 V64:$dst),
+ (Accum (v2i32 V64:$Rd),
+ (v2i32 (int_aarch64_neon_sqrdmulh
+ (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ // FIXME: it would be nice to use the scalar (v1i32) instruction here, but
+ // an intermediate EXTRACT_SUBREG would be untyped.
+ // FIXME: direct EXTRACT_SUBREG from v2i32 to i32 is illegal, that's why we
+ // got it lowered here as (i32 vector_extract (v4i32 insert_subvector(..)))
+ def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
+ (i32 (vector_extract
+ (v4i32 (insert_subvector
+ (undef),
+ (v2i32 (int_aarch64_neon_sqrdmulh
+ (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32
+ (v4i32 V128:$Rm),
+ VectorIndexS:$idx)))),
+ (i32 0))),
+ (i64 0))))),
+ (EXTRACT_SUBREG
+ (v2i32 (!cast<Instruction>(NAME # v2i32_indexed)
+ (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
+ FPR32Op:$Rd,
+ ssub)),
+ V64:$Rn,
+ V128:$Rm,
+ VectorIndexS:$idx)),
+ ssub)>;
+
+ def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
+ V128, V128, V128, VectorIndexS,
+ asm, ".4s", ".4s", ".4s", ".s",
+ [(set (v4i32 V128:$dst),
+ (Accum (v4i32 V128:$Rd),
+ (v4i32 (int_aarch64_neon_sqrdmulh
+ (v4i32 V128:$Rn),
+ (v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ // FIXME: it would be nice to use the scalar (v1i32) instruction here, but
+ // an intermediate EXTRACT_SUBREG would be untyped.
+ def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
+ (i32 (vector_extract
+ (v4i32 (int_aarch64_neon_sqrdmulh
+ (v4i32 V128:$Rn),
+ (v4i32 (AArch64duplane32
+ (v4i32 V128:$Rm),
+ VectorIndexS:$idx)))),
+ (i64 0))))),
+ (EXTRACT_SUBREG
+ (v4i32 (!cast<Instruction>(NAME # v4i32_indexed)
+ (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ FPR32Op:$Rd,
+ ssub)),
+ V128:$Rn,
+ V128:$Rm,
+ VectorIndexS:$idx)),
+ ssub)>;
+
+ def i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc,
+ FPR16Op, FPR16Op, V128_lo,
+ VectorIndexH, asm, ".h", "", "", ".h",
+ []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
+ FPR32Op, FPR32Op, V128, VectorIndexS,
+ asm, ".s", "", "", ".s",
+ [(set (i32 FPR32Op:$dst),
+ (Accum (i32 FPR32Op:$Rd),
+ (i32 (int_aarch64_neon_sqrdmulh
+ (i32 FPR32Op:$Rn),
+ (i32 (vector_extract (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+}
+} // let Predicates = [HasNeon, HasRDM]
+
+//----------------------------------------------------------------------------
+// ARMv8.3 Complex ADD/MLA instructions
+//----------------------------------------------------------------------------
+
+class ComplexRotationOperand<int Angle, int Remainder, string Type>
+ : AsmOperandClass {
+ let PredicateMethod = "isComplexRotation<" # Angle # ", " # Remainder # ">";
+ let DiagnosticType = "InvalidComplexRotation" # Type;
+ let Name = "ComplexRotation" # Type;
+}
+def complexrotateop : Operand<i32> {
+ let ParserMatchClass = ComplexRotationOperand<90, 0, "Even">;
+ let PrintMethod = "printComplexRotationOp<90, 0>";
+}
+def complexrotateopodd : Operand<i32> {
+ let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">;
+ let PrintMethod = "printComplexRotationOp<180, 90>";
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode,
+ RegisterOperand regtype, Operand rottype,
+ string asm, string kind, list<dag> pattern>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
+ "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
+ "|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<1> rot;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = opcode;
+ // Non-tied version (FCADD) only has one rotation bit
+ let Inst{12} = rot;
+ let Inst{11} = 0;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
+ string asm, SDPatternOperator OpNode>{
+ let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDThreeSameVectorComplex<0, U, 0b01, opcode, V64, rottype,
+ asm, ".4h",
+ [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
+ (v4f16 V64:$Rn),
+ (v4f16 V64:$Rm),
+ (rottype i32:$rot)))]>;
+
+ def v8f16 : BaseSIMDThreeSameVectorComplex<1, U, 0b01, opcode, V128, rottype,
+ asm, ".8h",
+ [(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd),
+ (v8f16 V128:$Rn),
+ (v8f16 V128:$Rm),
+ (rottype i32:$rot)))]>;
+ }
+
+ let Predicates = [HasV8_3a, HasNEON] in {
+ def v2f32 : BaseSIMDThreeSameVectorComplex<0, U, 0b10, opcode, V64, rottype,
+ asm, ".2s",
+ [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
+ (v2f32 V64:$Rn),
+ (v2f32 V64:$Rm),
+ (rottype i32:$rot)))]>;
+
+ def v4f32 : BaseSIMDThreeSameVectorComplex<1, U, 0b10, opcode, V128, rottype,
+ asm, ".4s",
+ [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
+ (v4f32 V128:$Rn),
+ (v4f32 V128:$Rm),
+ (rottype i32:$rot)))]>;
+
+ def v2f64 : BaseSIMDThreeSameVectorComplex<1, U, 0b11, opcode, V128, rottype,
+ asm, ".2d",
+ [(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd),
+ (v2f64 V128:$Rn),
+ (v2f64 V128:$Rm),
+ (rottype i32:$rot)))]>;
+ }
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDThreeSameVectorTiedComplex<bit Q, bit U, bits<2> size,
+ bits<3> opcode,
+ RegisterOperand regtype,
+ Operand rottype, string asm,
+ string kind, list<dag> pattern>
+ : I<(outs regtype:$dst),
+ (ins regtype:$Rd, regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
+ "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
+ "|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<2> rot;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = opcode;
+ let Inst{12-11} = rot;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
+ Operand rottype, string asm,
+ SDPatternOperator OpNode> {
+ let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b01, opcode, V64,
+ rottype, asm, ".4h",
+ [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
+ (v4f16 V64:$Rn),
+ (v4f16 V64:$Rm),
+ (rottype i32:$rot)))]>;
+
+ def v8f16 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b01, opcode, V128,
+ rottype, asm, ".8h",
+ [(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd),
+ (v8f16 V128:$Rn),
+ (v8f16 V128:$Rm),
+ (rottype i32:$rot)))]>;
+ }
+
+ let Predicates = [HasV8_3a, HasNEON] in {
+ def v2f32 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b10, opcode, V64,
+ rottype, asm, ".2s",
+ [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
+ (v2f32 V64:$Rn),
+ (v2f32 V64:$Rm),
+ (rottype i32:$rot)))]>;
+
+ def v4f32 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b10, opcode, V128,
+ rottype, asm, ".4s",
+ [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
+ (v4f32 V128:$Rn),
+ (v4f32 V128:$Rm),
+ (rottype i32:$rot)))]>;
+
+ def v2f64 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b11, opcode, V128,
+ rottype, asm, ".2d",
+ [(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd),
+ (v2f64 V128:$Rn),
+ (v2f64 V128:$Rm),
+ (rottype i32:$rot)))]>;
+ }
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDIndexedTiedComplex<bit Q, bit U, bit Scalar, bits<2> size,
+ bit opc1, bit opc2, RegisterOperand dst_reg,
+ RegisterOperand lhs_reg,
+ RegisterOperand rhs_reg, Operand vec_idx,
+ Operand rottype, string asm, string apple_kind,
+ string dst_kind, string lhs_kind,
+ string rhs_kind, list<dag> pattern>
+ : I<(outs dst_reg:$dst),
+ (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx, rottype:$rot),
+ asm,
+ "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind #
+ "$idx, $rot" # "|" # apple_kind #
+ "\t$Rd, $Rn, $Rm$idx, $rot}", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<2> rot;
+
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28} = Scalar;
+ let Inst{27-24} = 0b1111;
+ let Inst{23-22} = size;
+ // Bit 21 must be set by the derived class.
+ let Inst{20-16} = Rm;
+ let Inst{15} = opc1;
+ let Inst{14-13} = rot;
+ let Inst{12} = opc2;
+ // Bit 11 must be set by the derived class.
+ let Inst{10} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+// The complex instructions index by pairs of elements, so the VectorIndexes
+// don't match the lane types, and the index bits are different to the other
+// classes.
+multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype,
+ string asm, SDPatternOperator OpNode> {
+ let Predicates = [HasV8_3a,HasNEON,HasFullFP16] in {
+ def v4f16_indexed : BaseSIMDIndexedTiedComplex<0, 1, 0, 0b01, opc1, opc2, V64,
+ V64, V128, VectorIndexD, rottype, asm, ".4h", ".4h",
+ ".4h", ".h", []> {
+ bits<1> idx;
+ let Inst{11} = 0;
+ let Inst{21} = idx{0};
+ }
+
+ def v8f16_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b01, opc1, opc2,
+ V128, V128, V128, VectorIndexS, rottype, asm, ".8h",
+ ".8h", ".8h", ".h", []> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+ } // Predicates = [HasV8_3a,HasNEON,HasFullFP16]
+
+ let Predicates = [HasV8_3a,HasNEON] in {
+ def v4f32_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b10, opc1, opc2,
+ V128, V128, V128, VectorIndexD, rottype, asm, ".4s",
+ ".4s", ".4s", ".s", []> {
+ bits<1> idx;
+ let Inst{11} = idx{0};
+ let Inst{21} = 0;
+ }
+ } // Predicates = [HasV8_3a,HasNEON]
+}
+
+//----------------------------------------------------------------------------
+// Crypto extensions
+//----------------------------------------------------------------------------
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class AESBase<bits<4> opc, string asm, dag outs, dag ins, string cstr,
+ list<dag> pat>
+ : I<outs, ins, asm, "{\t$Rd.16b, $Rn.16b|.16b\t$Rd, $Rn}", cstr, pat>,
+ Sched<[WriteV]>{
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-16} = 0b0100111000101000;
+ let Inst{15-12} = opc;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class AESInst<bits<4> opc, string asm, Intrinsic OpNode>
+ : AESBase<opc, asm, (outs V128:$Rd), (ins V128:$Rn), "",
+ [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
+
+class AESTiedInst<bits<4> opc, string asm, Intrinsic OpNode>
+ : AESBase<opc, asm, (outs V128:$dst), (ins V128:$Rd, V128:$Rn),
+ "$Rd = $dst",
+ [(set (v16i8 V128:$dst),
+ (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>;
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class SHA3OpTiedInst<bits<3> opc, string asm, string dst_lhs_kind,
+ dag oops, dag iops, list<dag> pat>
+ : I<oops, iops, asm,
+ "{\t$Rd" # dst_lhs_kind # ", $Rn" # dst_lhs_kind # ", $Rm.4s" #
+ "|.4s\t$Rd, $Rn, $Rm}", "$Rd = $dst", pat>,
+ Sched<[WriteV]>{
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-21} = 0b01011110000;
+ let Inst{20-16} = Rm;
+ let Inst{15} = 0;
+ let Inst{14-12} = opc;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class SHATiedInstQSV<bits<3> opc, string asm, Intrinsic OpNode>
+ : SHA3OpTiedInst<opc, asm, "", (outs FPR128:$dst),
+ (ins FPR128:$Rd, FPR32:$Rn, V128:$Rm),
+ [(set (v4i32 FPR128:$dst),
+ (OpNode (v4i32 FPR128:$Rd), (i32 FPR32:$Rn),
+ (v4i32 V128:$Rm)))]>;
+
+class SHATiedInstVVV<bits<3> opc, string asm, Intrinsic OpNode>
+ : SHA3OpTiedInst<opc, asm, ".4s", (outs V128:$dst),
+ (ins V128:$Rd, V128:$Rn, V128:$Rm),
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn),
+ (v4i32 V128:$Rm)))]>;
+
+class SHATiedInstQQV<bits<3> opc, string asm, Intrinsic OpNode>
+ : SHA3OpTiedInst<opc, asm, "", (outs FPR128:$dst),
+ (ins FPR128:$Rd, FPR128:$Rn, V128:$Rm),
+ [(set (v4i32 FPR128:$dst),
+ (OpNode (v4i32 FPR128:$Rd), (v4i32 FPR128:$Rn),
+ (v4i32 V128:$Rm)))]>;
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class SHA2OpInst<bits<4> opc, string asm, string kind,
+ string cstr, dag oops, dag iops,
+ list<dag> pat>
+ : I<oops, iops, asm, "{\t$Rd" # kind # ", $Rn" # kind #
+ "|" # kind # "\t$Rd, $Rn}", cstr, pat>,
+ Sched<[WriteV]>{
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-16} = 0b0101111000101000;
+ let Inst{15-12} = opc;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class SHATiedInstVV<bits<4> opc, string asm, Intrinsic OpNode>
+ : SHA2OpInst<opc, asm, ".4s", "$Rd = $dst", (outs V128:$dst),
+ (ins V128:$Rd, V128:$Rn),
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>;
+
+class SHAInstSS<bits<4> opc, string asm, Intrinsic OpNode>
+ : SHA2OpInst<opc, asm, "", "", (outs FPR32:$Rd), (ins FPR32:$Rn),
+ [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>;
+
+// Armv8.2-A Crypto extensions
+class BaseCryptoV82<dag oops, dag iops, string asm, string asmops, string cst,
+ list<dag> pattern>
+ : I <oops, iops, asm, asmops, cst, pattern>, Sched<[WriteV]> {
+ bits<5> Vd;
+ bits<5> Vn;
+ let Inst{31-25} = 0b1100111;
+ let Inst{9-5} = Vn;
+ let Inst{4-0} = Vd;
+}
+
+class CryptoRRTied<bits<1>op0, bits<2>op1, string asm, string asmops>
+ : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm), asm, asmops,
+ "$Vm = $Vd", []> {
+ let Inst{31-25} = 0b1100111;
+ let Inst{24-21} = 0b0110;
+ let Inst{20-15} = 0b000001;
+ let Inst{14} = op0;
+ let Inst{13-12} = 0b00;
+ let Inst{11-10} = op1;
+}
+class CryptoRRTied_2D<bits<1>op0, bits<2>op1, string asm>
+ : CryptoRRTied<op0, op1, asm, "{\t$Vd.2d, $Vn.2d}">;
+class CryptoRRTied_4S<bits<1>op0, bits<2>op1, string asm>
+ : CryptoRRTied<op0, op1, asm, "{\t$Vd.4s, $Vn.4s}">;
+
+class CryptoRRR<bits<1> op0, bits<2>op1, dag oops, dag iops, string asm,
+ string asmops, string cst>
+ : BaseCryptoV82<oops, iops, asm , asmops, cst, []> {
+ bits<5> Vm;
+ let Inst{24-21} = 0b0011;
+ let Inst{20-16} = Vm;
+ let Inst{15} = 0b1;
+ let Inst{14} = op0;
+ let Inst{13-12} = 0b00;
+ let Inst{11-10} = op1;
+}
+class CryptoRRR_2D<bits<1> op0, bits<2>op1, string asm>
+ : CryptoRRR<op0, op1, (outs V128:$Vd), (ins V128:$Vn, V128:$Vm), asm,
+ "{\t$Vd.2d, $Vn.2d, $Vm.2d}", "">;
+class CryptoRRRTied_2D<bits<1> op0, bits<2>op1, string asm>
+ : CryptoRRR<op0, op1, (outs V128:$Vdst), (ins V128:$Vd, V128:$Vn, V128:$Vm), asm,
+ "{\t$Vd.2d, $Vn.2d, $Vm.2d}", "$Vd = $Vdst">;
+class CryptoRRR_4S<bits<1> op0, bits<2>op1, string asm>
+ : CryptoRRR<op0, op1, (outs V128:$Vd), (ins V128:$Vn, V128:$Vm), asm,
+ "{\t$Vd.4s, $Vn.4s, $Vm.4s}", "">;
+class CryptoRRRTied_4S<bits<1> op0, bits<2>op1, string asm>
+ : CryptoRRR<op0, op1, (outs V128:$Vdst), (ins V128:$Vd, V128:$Vn, V128:$Vm), asm,
+ "{\t$Vd.4s, $Vn.4s, $Vm.4s}", "$Vd = $Vdst">;
+class CryptoRRRTied<bits<1> op0, bits<2>op1, string asm>
+ : CryptoRRR<op0, op1, (outs FPR128:$Vdst), (ins FPR128:$Vd, FPR128:$Vn, V128:$Vm),
+ asm, "{\t$Vd, $Vn, $Vm.2d}", "$Vd = $Vdst">;
+
+class CryptoRRRR<bits<2>op0, string asm, string asmops>
+ : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm, V128:$Va), asm,
+ asmops, "", []> {
+ bits<5> Vm;
+ bits<5> Va;
+ let Inst{24-23} = 0b00;
+ let Inst{22-21} = op0;
+ let Inst{20-16} = Vm;
+ let Inst{15} = 0b0;
+ let Inst{14-10} = Va;
+}
+class CryptoRRRR_16B<bits<2>op0, string asm>
+ : CryptoRRRR<op0, asm, "{\t$Vd.16b, $Vn.16b, $Vm.16b, $Va.16b}"> {
+}
+class CryptoRRRR_4S<bits<2>op0, string asm>
+ : CryptoRRRR<op0, asm, "{\t$Vd.4s, $Vn.4s, $Vm.4s, $Va.4s}"> {
+}
+
+class CryptoRRRi6<string asm>
+ : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm, uimm6:$imm), asm,
+ "{\t$Vd.2d, $Vn.2d, $Vm.2d, $imm}", "", []> {
+ bits<6> imm;
+ bits<5> Vm;
+ let Inst{24-21} = 0b0100;
+ let Inst{20-16} = Vm;
+ let Inst{15-10} = imm;
+ let Inst{9-5} = Vn;
+ let Inst{4-0} = Vd;
+}
+
+class CryptoRRRi2Tied<bits<1>op0, bits<2>op1, string asm>
+ : BaseCryptoV82<(outs V128:$Vdst),
+ (ins V128:$Vd, V128:$Vn, V128:$Vm, VectorIndexS:$imm),
+ asm, "{\t$Vd.4s, $Vn.4s, $Vm.s$imm}", "$Vd = $Vdst", []> {
+ bits<2> imm;
+ bits<5> Vm;
+ let Inst{24-21} = 0b0010;
+ let Inst{20-16} = Vm;
+ let Inst{15} = 0b1;
+ let Inst{14} = op0;
+ let Inst{13-12} = imm;
+ let Inst{11-10} = op1;
+}
+
+//----------------------------------------------------------------------------
+// v8.1 atomic instructions extension:
+// * CAS
+// * CASP
+// * SWP
+// * LDOPregister<OP>, and aliases STOPregister<OP>
+
+// Instruction encodings:
+//
+// 31 30|29 24|23|22|21|20 16|15|14 10|9 5|4 0
+// CAS SZ |001000|1 |A |1 |Rs |R |11111 |Rn |Rt
+// CASP 0|SZ|001000|0 |A |1 |Rs |R |11111 |Rn |Rt
+// SWP SZ |111000|A |R |1 |Rs |1 |OPC|00|Rn |Rt
+// LD SZ |111000|A |R |1 |Rs |0 |OPC|00|Rn |Rt
+// ST SZ |111000|A |R |1 |Rs |0 |OPC|00|Rn |11111
+
+// Instruction syntax:
+//
+// CAS{<order>}[<size>] <Ws>, <Wt>, [<Xn|SP>]
+// CAS{<order>} <Xs>, <Xt>, [<Xn|SP>]
+// CASP{<order>} <Ws>, <W(s+1)>, <Wt>, <W(t+1)>, [<Xn|SP>]
+// CASP{<order>} <Xs>, <X(s+1)>, <Xt>, <X(t+1)>, [<Xn|SP>]
+// SWP{<order>}[<size>] <Ws>, <Wt>, [<Xn|SP>]
+// SWP{<order>} <Xs>, <Xt>, [<Xn|SP>]
+// LD<OP>{<order>}[<size>] <Ws>, <Wt>, [<Xn|SP>]
+// LD<OP>{<order>} <Xs>, <Xt>, [<Xn|SP>]
+// ST<OP>{<order>}[<size>] <Ws>, [<Xn|SP>]
+// ST<OP>{<order>} <Xs>, [<Xn|SP>]
+
+let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
+class BaseCASEncoding<dag oops, dag iops, string asm, string operands,
+ string cstr, list<dag> pattern>
+ : I<oops, iops, asm, operands, cstr, pattern> {
+ bits<2> Sz;
+ bit NP;
+ bit Acq;
+ bit Rel;
+ bits<5> Rs;
+ bits<5> Rn;
+ bits<5> Rt;
+ let Inst{31-30} = Sz;
+ let Inst{29-24} = 0b001000;
+ let Inst{23} = NP;
+ let Inst{22} = Acq;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rs;
+ let Inst{15} = Rel;
+ let Inst{14-10} = 0b11111;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+ let Predicates = [HasLSE];
+}
+
+class BaseCAS<string order, string size, RegisterClass RC>
+ : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn),
+ "cas" # order # size, "\t$Rs, $Rt, [$Rn]",
+ "$out = $Rs",[]>,
+ Sched<[WriteAtomic]> {
+ let NP = 1;
+}
+
+multiclass CompareAndSwap<bits<1> Acq, bits<1> Rel, string order> {
+ let Sz = 0b00, Acq = Acq, Rel = Rel in def B : BaseCAS<order, "b", GPR32>;
+ let Sz = 0b01, Acq = Acq, Rel = Rel in def H : BaseCAS<order, "h", GPR32>;
+ let Sz = 0b10, Acq = Acq, Rel = Rel in def W : BaseCAS<order, "", GPR32>;
+ let Sz = 0b11, Acq = Acq, Rel = Rel in def X : BaseCAS<order, "", GPR64>;
+}
+
+class BaseCASP<string order, string size, RegisterOperand RC>
+ : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn),
+ "casp" # order # size, "\t$Rs, $Rt, [$Rn]",
+ "$out = $Rs",[]>,
+ Sched<[WriteAtomic]> {
+ let NP = 0;
+}
+
+multiclass CompareAndSwapPair<bits<1> Acq, bits<1> Rel, string order> {
+ let Sz = 0b00, Acq = Acq, Rel = Rel in
+ def W : BaseCASP<order, "", WSeqPairClassOperand>;
+ let Sz = 0b01, Acq = Acq, Rel = Rel in
+ def X : BaseCASP<order, "", XSeqPairClassOperand>;
+}
+
+let Predicates = [HasLSE] in
+class BaseSWP<string order, string size, RegisterClass RC>
+ : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swp" # order # size,
+ "\t$Rs, $Rt, [$Rn]","",[]>,
+ Sched<[WriteAtomic]> {
+ bits<2> Sz;
+ bit Acq;
+ bit Rel;
+ bits<5> Rs;
+ bits<3> opc = 0b000;
+ bits<5> Rn;
+ bits<5> Rt;
+ let Inst{31-30} = Sz;
+ let Inst{29-24} = 0b111000;
+ let Inst{23} = Acq;
+ let Inst{22} = Rel;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rs;
+ let Inst{15} = 0b1;
+ let Inst{14-12} = opc;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+ let Predicates = [HasLSE];
+}
+
+multiclass Swap<bits<1> Acq, bits<1> Rel, string order> {
+ let Sz = 0b00, Acq = Acq, Rel = Rel in def B : BaseSWP<order, "b", GPR32>;
+ let Sz = 0b01, Acq = Acq, Rel = Rel in def H : BaseSWP<order, "h", GPR32>;
+ let Sz = 0b10, Acq = Acq, Rel = Rel in def W : BaseSWP<order, "", GPR32>;
+ let Sz = 0b11, Acq = Acq, Rel = Rel in def X : BaseSWP<order, "", GPR64>;
+}
+
+let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
+class BaseLDOPregister<string op, string order, string size, RegisterClass RC>
+ : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ld" # op # order # size,
+ "\t$Rs, $Rt, [$Rn]","",[]>,
+ Sched<[WriteAtomic]> {
+ bits<2> Sz;
+ bit Acq;
+ bit Rel;
+ bits<5> Rs;
+ bits<3> opc;
+ bits<5> Rn;
+ bits<5> Rt;
+ let Inst{31-30} = Sz;
+ let Inst{29-24} = 0b111000;
+ let Inst{23} = Acq;
+ let Inst{22} = Rel;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rs;
+ let Inst{15} = 0b0;
+ let Inst{14-12} = opc;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+ let Predicates = [HasLSE];
+}
+
+multiclass LDOPregister<bits<3> opc, string op, bits<1> Acq, bits<1> Rel,
+ string order> {
+ let Sz = 0b00, Acq = Acq, Rel = Rel, opc = opc in
+ def B : BaseLDOPregister<op, order, "b", GPR32>;
+ let Sz = 0b01, Acq = Acq, Rel = Rel, opc = opc in
+ def H : BaseLDOPregister<op, order, "h", GPR32>;
+ let Sz = 0b10, Acq = Acq, Rel = Rel, opc = opc in
+ def W : BaseLDOPregister<op, order, "", GPR32>;
+ let Sz = 0b11, Acq = Acq, Rel = Rel, opc = opc in
+ def X : BaseLDOPregister<op, order, "", GPR64>;
+}
+
+// Differing SrcRHS and DstRHS allow you to cover CLR & SUB by giving a more
+// complex DAG for DstRHS.
+let Predicates = [HasLSE] in
+multiclass LDOPregister_patterns_ord_dag<string inst, string suffix, string op,
+ string size, dag SrcRHS, dag DstRHS> {
+ def : Pat<(!cast<PatFrag>(op#"_"#size#"_monotonic") GPR64sp:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # suffix) DstRHS, GPR64sp:$Rn)>;
+ def : Pat<(!cast<PatFrag>(op#"_"#size#"_acquire") GPR64sp:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # "A" # suffix) DstRHS, GPR64sp:$Rn)>;
+ def : Pat<(!cast<PatFrag>(op#"_"#size#"_release") GPR64sp:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # "L" # suffix) DstRHS, GPR64sp:$Rn)>;
+ def : Pat<(!cast<PatFrag>(op#"_"#size#"_acq_rel") GPR64sp:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # "AL" # suffix) DstRHS, GPR64sp:$Rn)>;
+ def : Pat<(!cast<PatFrag>(op#"_"#size#"_seq_cst") GPR64sp:$Rn, SrcRHS),
+ (!cast<Instruction>(inst # "AL" # suffix) DstRHS, GPR64sp:$Rn)>;
+}
+
+multiclass LDOPregister_patterns_ord<string inst, string suffix, string op,
+ string size, dag RHS> {
+ defm : LDOPregister_patterns_ord_dag<inst, suffix, op, size, RHS, RHS>;
+}
+
+multiclass LDOPregister_patterns_ord_mod<string inst, string suffix, string op,
+ string size, dag LHS, dag RHS> {
+ defm : LDOPregister_patterns_ord_dag<inst, suffix, op, size, LHS, RHS>;
+}
+
+multiclass LDOPregister_patterns<string inst, string op> {
+ defm : LDOPregister_patterns_ord<inst, "X", op, "64", (i64 GPR64:$Rm)>;
+ defm : LDOPregister_patterns_ord<inst, "W", op, "32", (i32 GPR32:$Rm)>;
+ defm : LDOPregister_patterns_ord<inst, "H", op, "16", (i32 GPR32:$Rm)>;
+ defm : LDOPregister_patterns_ord<inst, "B", op, "8", (i32 GPR32:$Rm)>;
+}
+
+multiclass LDOPregister_patterns_mod<string inst, string op, string mod> {
+ defm : LDOPregister_patterns_ord_mod<inst, "X", op, "64",
+ (i64 GPR64:$Rm),
+ (i64 (!cast<Instruction>(mod#Xrr) XZR, GPR64:$Rm))>;
+ defm : LDOPregister_patterns_ord_mod<inst, "W", op, "32",
+ (i32 GPR32:$Rm),
+ (i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
+ defm : LDOPregister_patterns_ord_mod<inst, "H", op, "16",
+ (i32 GPR32:$Rm),
+ (i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
+ defm : LDOPregister_patterns_ord_mod<inst, "B", op, "8",
+ (i32 GPR32:$Rm),
+ (i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
+}
+
+let Predicates = [HasLSE] in
+multiclass CASregister_patterns_ord_dag<string inst, string suffix, string op,
+ string size, dag OLD, dag NEW> {
+ def : Pat<(!cast<PatFrag>(op#"_"#size#"_monotonic") GPR64sp:$Rn, OLD, NEW),
+ (!cast<Instruction>(inst # suffix) OLD, NEW, GPR64sp:$Rn)>;
+ def : Pat<(!cast<PatFrag>(op#"_"#size#"_acquire") GPR64sp:$Rn, OLD, NEW),
+ (!cast<Instruction>(inst # "A" # suffix) OLD, NEW, GPR64sp:$Rn)>;
+ def : Pat<(!cast<PatFrag>(op#"_"#size#"_release") GPR64sp:$Rn, OLD, NEW),
+ (!cast<Instruction>(inst # "L" # suffix) OLD, NEW, GPR64sp:$Rn)>;
+ def : Pat<(!cast<PatFrag>(op#"_"#size#"_acq_rel") GPR64sp:$Rn, OLD, NEW),
+ (!cast<Instruction>(inst # "AL" # suffix) OLD, NEW, GPR64sp:$Rn)>;
+ def : Pat<(!cast<PatFrag>(op#"_"#size#"_seq_cst") GPR64sp:$Rn, OLD, NEW),
+ (!cast<Instruction>(inst # "AL" # suffix) OLD, NEW, GPR64sp:$Rn)>;
+}
+
+multiclass CASregister_patterns_ord<string inst, string suffix, string op,
+ string size, dag OLD, dag NEW> {
+ defm : CASregister_patterns_ord_dag<inst, suffix, op, size, OLD, NEW>;
+}
+
+multiclass CASregister_patterns<string inst, string op> {
+ defm : CASregister_patterns_ord<inst, "X", op, "64",
+ (i64 GPR64:$Rold), (i64 GPR64:$Rnew)>;
+ defm : CASregister_patterns_ord<inst, "W", op, "32",
+ (i32 GPR32:$Rold), (i32 GPR32:$Rnew)>;
+ defm : CASregister_patterns_ord<inst, "H", op, "16",
+ (i32 GPR32:$Rold), (i32 GPR32:$Rnew)>;
+ defm : CASregister_patterns_ord<inst, "B", op, "8",
+ (i32 GPR32:$Rold), (i32 GPR32:$Rnew)>;
+}
+
+let Predicates = [HasLSE] in
+class BaseSTOPregister<string asm, RegisterClass OP, Register Reg,
+ Instruction inst> :
+ InstAlias<asm # "\t$Rs, [$Rn]", (inst Reg, OP:$Rs, GPR64sp:$Rn)>;
+
+multiclass STOPregister<string asm, string instr> {
+ def : BaseSTOPregister<asm # "lb", GPR32, WZR,
+ !cast<Instruction>(instr # "LB")>;
+ def : BaseSTOPregister<asm # "lh", GPR32, WZR,
+ !cast<Instruction>(instr # "LH")>;
+ def : BaseSTOPregister<asm # "l", GPR32, WZR,
+ !cast<Instruction>(instr # "LW")>;
+ def : BaseSTOPregister<asm # "l", GPR64, XZR,
+ !cast<Instruction>(instr # "LX")>;
+ def : BaseSTOPregister<asm # "b", GPR32, WZR,
+ !cast<Instruction>(instr # "B")>;
+ def : BaseSTOPregister<asm # "h", GPR32, WZR,
+ !cast<Instruction>(instr # "H")>;
+ def : BaseSTOPregister<asm, GPR32, WZR,
+ !cast<Instruction>(instr # "W")>;
+ def : BaseSTOPregister<asm, GPR64, XZR,
+ !cast<Instruction>(instr # "X")>;
+}
+
+//----------------------------------------------------------------------------
+// Allow the size specifier tokens to be upper case, not just lower.
+def : TokenAlias<".4B", ".4b">; // Add dot product
+def : TokenAlias<".8B", ".8b">;
+def : TokenAlias<".4H", ".4h">;
+def : TokenAlias<".2S", ".2s">;
+def : TokenAlias<".1D", ".1d">;
+def : TokenAlias<".16B", ".16b">;
+def : TokenAlias<".8H", ".8h">;
+def : TokenAlias<".4S", ".4s">;
+def : TokenAlias<".2D", ".2d">;
+def : TokenAlias<".1Q", ".1q">;
+def : TokenAlias<".2H", ".2h">;
+def : TokenAlias<".B", ".b">;
+def : TokenAlias<".H", ".h">;
+def : TokenAlias<".S", ".s">;
+def : TokenAlias<".D", ".d">;
+def : TokenAlias<".Q", ".q">;
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td
new file mode 100644
index 000000000..d6b8bb5d8
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td
@@ -0,0 +1,6494 @@
+//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Instruction definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction Predicate Definitions.
+//
+def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
+ AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
+def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
+ AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
+def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">,
+ AssemblerPredicate<"HasV8_3aOps", "armv8.3a">;
+def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
+ AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
+def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
+ AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
+def HasNEON : Predicate<"Subtarget->hasNEON()">,
+ AssemblerPredicate<"FeatureNEON", "neon">;
+def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
+ AssemblerPredicate<"FeatureCrypto", "crypto">;
+def HasSM4 : Predicate<"Subtarget->hasSM4()">,
+ AssemblerPredicate<"FeatureSM4", "sm4">;
+def HasSHA3 : Predicate<"Subtarget->hasSHA3()">,
+ AssemblerPredicate<"FeatureSHA3", "sha3">;
+def HasSHA2 : Predicate<"Subtarget->hasSHA2()">,
+ AssemblerPredicate<"FeatureSHA2", "sha2">;
+def HasAES : Predicate<"Subtarget->hasAES()">,
+ AssemblerPredicate<"FeatureAES", "aes">;
+def HasDotProd : Predicate<"Subtarget->hasDotProd()">,
+ AssemblerPredicate<"FeatureDotProd", "dotprod">;
+def HasCRC : Predicate<"Subtarget->hasCRC()">,
+ AssemblerPredicate<"FeatureCRC", "crc">;
+def HasLSE : Predicate<"Subtarget->hasLSE()">,
+ AssemblerPredicate<"FeatureLSE", "lse">;
+def HasRAS : Predicate<"Subtarget->hasRAS()">,
+ AssemblerPredicate<"FeatureRAS", "ras">;
+def HasRDM : Predicate<"Subtarget->hasRDM()">,
+ AssemblerPredicate<"FeatureRDM", "rdm">;
+def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">;
+def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
+ AssemblerPredicate<"FeatureFullFP16", "fullfp16">;
+def HasSPE : Predicate<"Subtarget->hasSPE()">,
+ AssemblerPredicate<"FeatureSPE", "spe">;
+def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
+ AssemblerPredicate<"FeatureFuseAES",
+ "fuse-aes">;
+def HasSVE : Predicate<"Subtarget->hasSVE()">,
+ AssemblerPredicate<"FeatureSVE", "sve">;
+def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
+ AssemblerPredicate<"FeatureRCPC", "rcpc">;
+
+def IsLE : Predicate<"Subtarget->isLittleEndian()">;
+def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
+def UseAlternateSExtLoadCVTF32
+ : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
+
+def UseNegativeImmediates
+ : Predicate<"false">, AssemblerPredicate<"!FeatureNoNegativeImmediates",
+ "NegativeImmediates">;
+
+
+//===----------------------------------------------------------------------===//
+// AArch64-specific DAG Nodes.
+//
+
+// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
+def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
+def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisInt<0>,
+ SDTCisVT<3, i32>]>;
+
+// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
+def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<4, i32>]>;
+
+def SDT_AArch64Brcond : SDTypeProfile<0, 3,
+ [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
+def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisVT<2, OtherVT>]>;
+
+
+def SDT_AArch64CSel : SDTypeProfile<1, 4,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisInt<3>,
+ SDTCisVT<4, i32>]>;
+def SDT_AArch64CCMP : SDTypeProfile<1, 5,
+ [SDTCisVT<0, i32>,
+ SDTCisInt<1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisInt<3>,
+ SDTCisInt<4>,
+ SDTCisVT<5, i32>]>;
+def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
+ [SDTCisVT<0, i32>,
+ SDTCisFP<1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisInt<3>,
+ SDTCisInt<4>,
+ SDTCisVT<5, i32>]>;
+def SDT_AArch64FCmp : SDTypeProfile<0, 2,
+ [SDTCisFP<0>,
+ SDTCisSameAs<0, 1>]>;
+def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
+def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
+def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
+def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
+def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
+def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisInt<2>, SDTCisInt<3>]>;
+def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
+def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
+
+def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
+def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
+def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
+def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>;
+def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>,
+ SDTCisSameAs<0,3>]>;
+def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
+def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
+
+def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
+
+def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>]>;
+
+// Generates the general dynamic sequences, i.e.
+// adrp x0, :tlsdesc:var
+// ldr x1, [x0, #:tlsdesc_lo12:var]
+// add x0, x0, #:tlsdesc_lo12:var
+// .tlsdesccall var
+// blr x1
+
+// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
+// number of operands (the variable)
+def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
+ [SDTCisPtrTy<0>]>;
+
+def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
+ SDTCisSameAs<1, 4>]>;
+
+
+// Node definitions.
+def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
+def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
+def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
+def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
+ SDCallSeqStart<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>,
+ [SDNPHasChain, SDNPOutGlue]>;
+def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END",
+ SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64call : SDNode<"AArch64ISD::CALL",
+ SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
+ [SDNPHasChain]>;
+def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
+ [SDNPHasChain]>;
+def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz,
+ [SDNPHasChain]>;
+def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz,
+ [SDNPHasChain]>;
+def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz,
+ [SDNPHasChain]>;
+
+
+def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
+def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
+def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
+def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
+def AArch64retflag : SDNode<"AArch64ISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >;
+def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>;
+def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut,
+ [SDNPCommutative]>;
+def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>;
+def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut,
+ [SDNPCommutative]>;
+def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>;
+def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>;
+
+def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>;
+def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>;
+def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
+
+def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
+
+def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
+
+def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
+def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
+def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
+def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
+def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
+
+def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
+def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
+def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
+def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>;
+def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
+def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
+
+def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
+def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
+def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
+def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
+def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
+def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
+def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
+
+def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
+def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
+def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
+def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
+
+def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
+def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
+def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
+def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
+def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
+def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
+def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
+def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
+
+def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>;
+def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
+def AArch64bsl: SDNode<"AArch64ISD::BSL", SDT_AArch64trivec>;
+
+def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
+def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
+def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
+def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>;
+def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>;
+
+def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
+def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
+def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
+
+def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>;
+def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>;
+def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
+def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
+def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
+def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
+ (AArch64not (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
+
+def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
+def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
+def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
+def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
+def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
+
+def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
+def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
+
+def AArch64neg : SDNode<"AArch64ISD::NEG", SDT_AArch64unvec>;
+
+def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
+def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
+
+def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
+ SDT_AArch64TLSDescCallSeq,
+ [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
+ SDNPVariadic]>;
+
+
+def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
+ SDT_AArch64WrapperLarge>;
+
+def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
+
+def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
+def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
+
+def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
+def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>;
+def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;
+def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>;
+
+def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
+def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
+def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
+def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
+def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
+def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+
+// AArch64 Instruction Predicate Definitions.
+// We could compute these on a per-module basis but doing so requires accessing
+// the Function object through the <Target>Subtarget and objections were raised
+// to that (see post-commit review comments for r301750).
+let RecomputePerFunction = 1 in {
+ def ForCodeSize : Predicate<"MF->getFunction().optForSize()">;
+ def NotForCodeSize : Predicate<"!MF->getFunction().optForSize()">;
+ // Avoid generating STRQro if it is slow, unless we're optimizing for code size.
+ def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction().optForSize()">;
+}
+
+include "AArch64InstrFormats.td"
+include "SVEInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous instructions.
+//===----------------------------------------------------------------------===//
+
+let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in {
+// We set Sched to empty list because we expect these instructions to simply get
+// removed in most cases.
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ [(AArch64callseq_start timm:$amt1, timm:$amt2)]>,
+ Sched<[]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ [(AArch64callseq_end timm:$amt1, timm:$amt2)]>,
+ Sched<[]>;
+} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1
+
+let isReMaterializable = 1, isCodeGenOnly = 1 in {
+// FIXME: The following pseudo instructions are only needed because remat
+// cannot handle multiple instructions. When that changes, they can be
+// removed, along with the AArch64Wrapper node.
+
+let AddedComplexity = 10 in
+def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr),
+ [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
+ Sched<[WriteLDAdr]>;
+
+// The MOVaddr instruction should match only when the add is not folded
+// into a load or store address.
+def MOVaddr
+ : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
+ tglobaladdr:$low))]>,
+ Sched<[WriteAdrAdr]>;
+def MOVaddrJT
+ : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
+ tjumptable:$low))]>,
+ Sched<[WriteAdrAdr]>;
+def MOVaddrCP
+ : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
+ tconstpool:$low))]>,
+ Sched<[WriteAdrAdr]>;
+def MOVaddrBA
+ : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
+ tblockaddress:$low))]>,
+ Sched<[WriteAdrAdr]>;
+def MOVaddrTLS
+ : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
+ tglobaltlsaddr:$low))]>,
+ Sched<[WriteAdrAdr]>;
+def MOVaddrEXT
+ : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
+ texternalsym:$low))]>,
+ Sched<[WriteAdrAdr]>;
+// Normally AArch64addlow either gets folded into a following ldr/str,
+// or together with an adrp into MOVaddr above. For cases with TLS, it
+// might appear without either of them, so allow lowering it into a plain
+// add.
+def ADDlowTLS
+ : Pseudo<(outs GPR64:$dst), (ins GPR64:$src, i64imm:$low),
+ [(set GPR64:$dst, (AArch64addlow GPR64:$src,
+ tglobaltlsaddr:$low))]>,
+ Sched<[WriteAdr]>;
+
+} // isReMaterializable, isCodeGenOnly
+
+def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr),
+ (LOADgot tglobaltlsaddr:$addr)>;
+
+def : Pat<(AArch64LOADgot texternalsym:$addr),
+ (LOADgot texternalsym:$addr)>;
+
+def : Pat<(AArch64LOADgot tconstpool:$addr),
+ (LOADgot tconstpool:$addr)>;
+
+//===----------------------------------------------------------------------===//
+// System instructions.
+//===----------------------------------------------------------------------===//
+
+def HINT : HintI<"hint">;
+def : InstAlias<"nop", (HINT 0b000)>;
+def : InstAlias<"yield",(HINT 0b001)>;
+def : InstAlias<"wfe", (HINT 0b010)>;
+def : InstAlias<"wfi", (HINT 0b011)>;
+def : InstAlias<"sev", (HINT 0b100)>;
+def : InstAlias<"sevl", (HINT 0b101)>;
+def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>;
+def : InstAlias<"csdb", (HINT 20)>;
+
+// v8.2a Statistical Profiling extension
+def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
+
+// As far as LLVM is concerned this writes to the system's exclusive monitors.
+let mayLoad = 1, mayStore = 1 in
+def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
+
+// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
+// model patterns with sufficiently fine granularity.
+let mayLoad = ?, mayStore = ? in {
+def DMB : CRmSystemI<barrier_op, 0b101, "dmb",
+ [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>;
+
+def DSB : CRmSystemI<barrier_op, 0b100, "dsb",
+ [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>;
+
+def ISB : CRmSystemI<barrier_op, 0b110, "isb",
+ [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>;
+
+def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> {
+ let CRm = 0b0010;
+ let Inst{12} = 0;
+ let Predicates = [HasV8_4a];
+}
+}
+
+// ARMv8.2 Dot Product
+let Predicates = [HasDotProd] in {
+defm SDOT : SIMDThreeSameVectorDot<0, "sdot", int_aarch64_neon_sdot>;
+defm UDOT : SIMDThreeSameVectorDot<1, "udot", int_aarch64_neon_udot>;
+defm SDOTlane : SIMDThreeSameVectorDotIndex<0, "sdot", int_aarch64_neon_sdot>;
+defm UDOTlane : SIMDThreeSameVectorDotIndex<1, "udot", int_aarch64_neon_udot>;
+}
+
+// Armv8.2-A Crypto extensions
+let Predicates = [HasSHA3] in {
+def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">;
+def SHA512H2 : CryptoRRRTied<0b0, 0b01, "sha512h2">;
+def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">;
+def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">;
+def RAX1 : CryptoRRR_2D<0b0,0b11, "rax1">;
+def EOR3 : CryptoRRRR_16B<0b00, "eor3">;
+def BCAX : CryptoRRRR_16B<0b01, "bcax">;
+def XAR : CryptoRRRi6<"xar">;
+} // HasSHA3
+
+let Predicates = [HasSM4] in {
+def SM3TT1A : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">;
+def SM3TT1B : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">;
+def SM3TT2A : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">;
+def SM3TT2B : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">;
+def SM3SS1 : CryptoRRRR_4S<0b10, "sm3ss1">;
+def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">;
+def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">;
+def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">;
+def SM4E : CryptoRRTied_4S<0b0, 0b01, "sm4e">;
+} // HasSM4
+
+let Predicates = [HasRCPC] in {
+ // v8.3 Release Consistent Processor Consistent support, optional in v8.2.
+ def LDAPRB : RCPCLoad<0b00, "ldaprb", GPR32>;
+ def LDAPRH : RCPCLoad<0b01, "ldaprh", GPR32>;
+ def LDAPRW : RCPCLoad<0b10, "ldapr", GPR32>;
+ def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>;
+}
+
+// v8.3a complex add and multiply-accumulate. No predicate here, that is done
+// inside the multiclass as the FP16 versions need different predicates.
+defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
+ "fcmla", null_frag>;
+defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
+ "fcadd", null_frag>;
+defm FCMLA : SIMDIndexedTiedComplexHSD<1, 0, 1, complexrotateop, "fcmla",
+ null_frag>;
+
+// v8.3a Pointer Authentication
+// These instructions inhabit part of the hint space and so can be used for
+// armv8 targets
+let Uses = [LR], Defs = [LR] in {
+ def PACIAZ : SystemNoOperands<0b000, "paciaz">;
+ def PACIBZ : SystemNoOperands<0b010, "pacibz">;
+ def AUTIAZ : SystemNoOperands<0b100, "autiaz">;
+ def AUTIBZ : SystemNoOperands<0b110, "autibz">;
+}
+let Uses = [LR, SP], Defs = [LR] in {
+ def PACIASP : SystemNoOperands<0b001, "paciasp">;
+ def PACIBSP : SystemNoOperands<0b011, "pacibsp">;
+ def AUTIASP : SystemNoOperands<0b101, "autiasp">;
+ def AUTIBSP : SystemNoOperands<0b111, "autibsp">;
+}
+let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in {
+ def PACIA1716 : SystemNoOperands<0b000, "pacia1716">;
+ def PACIB1716 : SystemNoOperands<0b010, "pacib1716">;
+ def AUTIA1716 : SystemNoOperands<0b100, "autia1716">;
+ def AUTIB1716 : SystemNoOperands<0b110, "autib1716">;
+}
+
+let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
+ def XPACLRI : SystemNoOperands<0b111, "xpaclri">;
+}
+
+// These pointer authentication isntructions require armv8.3a
+let Predicates = [HasV8_3a] in {
+ multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm> {
+ def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia")>;
+ def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib")>;
+ def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da")>;
+ def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db")>;
+ def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza")>;
+ def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza")>;
+ def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb")>;
+ def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb")>;
+ }
+
+ defm PAC : SignAuth<0b000, 0b010, "pac">;
+ defm AUT : SignAuth<0b001, 0b011, "aut">;
+
+ def XPACI : SignAuthZero<0b100, 0b00, "xpaci">;
+ def XPACD : SignAuthZero<0b100, 0b01, "xpacd">;
+ def PACGA : SignAuthTwoOperand<0b1100, "pacga", null_frag>;
+
+ // Combined Instructions
+ def BRAA : AuthBranchTwoOperands<0, 0, "braa">;
+ def BRAB : AuthBranchTwoOperands<0, 1, "brab">;
+ def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">;
+ def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">;
+
+ def BRAAZ : AuthOneOperand<0b000, 0, "braaz">;
+ def BRABZ : AuthOneOperand<0b000, 1, "brabz">;
+ def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">;
+ def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">;
+
+ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ def RETAA : AuthReturn<0b010, 0, "retaa">;
+ def RETAB : AuthReturn<0b010, 1, "retab">;
+ def ERETAA : AuthReturn<0b100, 0, "eretaa">;
+ def ERETAB : AuthReturn<0b100, 1, "eretab">;
+ }
+
+ defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>;
+ defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>;
+
+ // v8.3a floating point conversion for javascript
+ let Predicates = [HasV8_3a, HasFPARMv8] in
+ def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
+ "fjcvtzs", []> {
+ let Inst{31} = 0;
+ }
+
+} // HasV8_3a
+
+// v8.4 Flag manipulation instructions
+let Predicates = [HasV8_4a] in {
+def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
+ let Inst{20-5} = 0b0000001000000000;
+}
+def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
+def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
+def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
+ "{\t$Rn, $imm, $mask}">;
+} // HasV8_4a
+
+def : InstAlias<"clrex", (CLREX 0xf)>;
+def : InstAlias<"isb", (ISB 0xf)>;
+
+def MRS : MRSI;
+def MSR : MSRI;
+def MSRpstateImm1 : MSRpstateImm0_1;
+def MSRpstateImm4 : MSRpstateImm0_15;
+
+// The thread pointer (on Linux, at least, where this has been implemented) is
+// TPIDR_EL0.
+def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
+ [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
+
+// The cycle counter PMC register is PMCCNTR_EL0.
+let Predicates = [HasPerfMon] in
+def : Pat<(readcyclecounter), (MRS 0xdce8)>;
+
+// FPCR register
+def : Pat<(i64 (int_aarch64_get_fpcr)), (MRS 0xda20)>;
+
+// Generic system instructions
+def SYSxt : SystemXtI<0, "sys">;
+def SYSLxt : SystemLXtI<1, "sysl">;
+
+def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
+ (SYSxt imm0_7:$op1, sys_cr_op:$Cn,
+ sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
+
+//===----------------------------------------------------------------------===//
+// Move immediate instructions.
+//===----------------------------------------------------------------------===//
+
+defm MOVK : InsertImmediate<0b11, "movk">;
+defm MOVN : MoveImmediate<0b00, "movn">;
+
+let PostEncoderMethod = "fixMOVZ" in
+defm MOVZ : MoveImmediate<0b10, "movz">;
+
+// First group of aliases covers an implicit "lsl #0".
+def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0), 0>;
+def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0), 0>;
+def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>;
+def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>;
+def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>;
+def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>;
+
+// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
+def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
+def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>;
+def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>;
+def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>;
+
+def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
+def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>;
+def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>;
+def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>;
+
+def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g3:$sym, 48), 0>;
+def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32), 0>;
+def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16), 0>;
+def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0), 0>;
+
+def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>;
+def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>;
+
+def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>;
+def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>;
+
+def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16), 0>;
+def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0), 0>;
+
+// Final group of aliases covers true "mov $Rd, $imm" cases.
+multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR,
+ int width, int shift> {
+ def _asmoperand : AsmOperandClass {
+ let Name = basename # width # "_lsl" # shift # "MovAlias";
+ let PredicateMethod = "is" # basename # "MovAlias<" # width # ", "
+ # shift # ">";
+ let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">";
+ }
+
+ def _movimm : Operand<i32> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand");
+ }
+
+ def : InstAlias<"mov $Rd, $imm",
+ (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>;
+}
+
+defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>;
+defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>;
+
+defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>;
+defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>;
+defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>;
+defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>;
+
+defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>;
+defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>;
+
+defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>;
+defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>;
+defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>;
+defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>;
+
+let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
+ isAsCheapAsAMove = 1 in {
+// FIXME: The following pseudo instructions are only needed because remat
+// cannot handle multiple instructions. When that changes, we can select
+// directly to the real instructions and get rid of these pseudos.
+
+def MOVi32imm
+ : Pseudo<(outs GPR32:$dst), (ins i32imm:$src),
+ [(set GPR32:$dst, imm:$src)]>,
+ Sched<[WriteImm]>;
+def MOVi64imm
+ : Pseudo<(outs GPR64:$dst), (ins i64imm:$src),
+ [(set GPR64:$dst, imm:$src)]>,
+ Sched<[WriteImm]>;
+} // isReMaterializable, isCodeGenOnly
+
+// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
+// eventual expansion code fewer bits to worry about getting right. Marshalling
+// the types is a little tricky though:
+def i64imm_32bit : ImmLeaf<i64, [{
+ return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
+}]>;
+
+def s64imm_32bit : ImmLeaf<i64, [{
+ int64_t Imm64 = static_cast<int64_t>(Imm);
+ return Imm64 >= std::numeric_limits<int32_t>::min() &&
+ Imm64 <= std::numeric_limits<int32_t>::max();
+}]>;
+
+def trunc_imm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
+def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">,
+ GISDNodeXFormEquiv<trunc_imm>;
+
+def : Pat<(i64 i64imm_32bit:$src),
+ (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
+
+// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
+def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
+return CurDAG->getTargetConstant(
+ N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
+def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
+return CurDAG->getTargetConstant(
+ N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
+}]>;
+
+
+def : Pat<(f32 fpimm:$in),
+ (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
+def : Pat<(f64 fpimm:$in),
+ (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>;
+
+
+// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
+// sequences.
+def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
+ tglobaladdr:$g1, tglobaladdr:$g0),
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0),
+ tglobaladdr:$g1, 16),
+ tglobaladdr:$g2, 32),
+ tglobaladdr:$g3, 48)>;
+
+def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
+ tblockaddress:$g1, tblockaddress:$g0),
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0),
+ tblockaddress:$g1, 16),
+ tblockaddress:$g2, 32),
+ tblockaddress:$g3, 48)>;
+
+def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
+ tconstpool:$g1, tconstpool:$g0),
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0),
+ tconstpool:$g1, 16),
+ tconstpool:$g2, 32),
+ tconstpool:$g3, 48)>;
+
+def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
+ tjumptable:$g1, tjumptable:$g0),
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0),
+ tjumptable:$g1, 16),
+ tjumptable:$g2, 32),
+ tjumptable:$g3, 48)>;
+
+
+//===----------------------------------------------------------------------===//
+// Arithmetic instructions.
+//===----------------------------------------------------------------------===//
+
+// Add/subtract with carry.
+defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>;
+defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>;
+
+def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>;
+def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>;
+def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
+def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
+
+// Add/subtract
+defm ADD : AddSub<0, "add", "sub", add>;
+defm SUB : AddSub<1, "sub", "add">;
+
+def : InstAlias<"mov $dst, $src",
+ (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
+def : InstAlias<"mov $dst, $src",
+ (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>;
+def : InstAlias<"mov $dst, $src",
+ (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>;
+def : InstAlias<"mov $dst, $src",
+ (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
+
+defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">;
+defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
+
+// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
+def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
+ (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
+def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm),
+ (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>;
+def : Pat<(sub GPR32:$Rn, GPR32:$Rm),
+ (SUBSWrr GPR32:$Rn, GPR32:$Rm)>;
+def : Pat<(sub GPR64:$Rn, GPR64:$Rm),
+ (SUBSXrr GPR64:$Rn, GPR64:$Rm)>;
+def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
+ (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
+def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
+ (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
+let AddedComplexity = 1 in {
+def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3),
+ (SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>;
+def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3),
+ (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>;
+}
+
+// Because of the immediate format for add/sub-imm instructions, the
+// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
+// These patterns capture that transformation.
+let AddedComplexity = 1 in {
+def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
+ (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
+def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
+ (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
+def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
+ (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
+def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
+ (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
+}
+
+// Because of the immediate format for add/sub-imm instructions, the
+// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
+// These patterns capture that transformation.
+let AddedComplexity = 1 in {
+def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
+ (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
+def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
+ (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
+def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
+ (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
+def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
+ (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
+}
+
+def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
+def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
+def : InstAlias<"neg $dst, $src$shift",
+ (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
+def : InstAlias<"neg $dst, $src$shift",
+ (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
+
+def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
+def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
+def : InstAlias<"negs $dst, $src$shift",
+ (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
+def : InstAlias<"negs $dst, $src$shift",
+ (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
+
+
+// Unsigned/Signed divide
+defm UDIV : Div<0, "udiv", udiv>;
+defm SDIV : Div<1, "sdiv", sdiv>;
+
+def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>;
+def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>;
+def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>;
+def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>;
+
+// Variable shift
+defm ASRV : Shift<0b10, "asr", sra>;
+defm LSLV : Shift<0b00, "lsl", shl>;
+defm LSRV : Shift<0b01, "lsr", srl>;
+defm RORV : Shift<0b11, "ror", rotr>;
+
+def : ShiftAlias<"asrv", ASRVWr, GPR32>;
+def : ShiftAlias<"asrv", ASRVXr, GPR64>;
+def : ShiftAlias<"lslv", LSLVWr, GPR32>;
+def : ShiftAlias<"lslv", LSLVXr, GPR64>;
+def : ShiftAlias<"lsrv", LSRVWr, GPR32>;
+def : ShiftAlias<"lsrv", LSRVXr, GPR64>;
+def : ShiftAlias<"rorv", RORVWr, GPR32>;
+def : ShiftAlias<"rorv", RORVXr, GPR64>;
+
+// Multiply-add
+let AddedComplexity = 5 in {
+defm MADD : MulAccum<0, "madd", add>;
+defm MSUB : MulAccum<1, "msub", sub>;
+
+def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
+ (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
+def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)),
+ (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
+
+def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
+ (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
+def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
+ (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
+def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
+ (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
+def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
+ (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
+} // AddedComplexity = 5
+
+let AddedComplexity = 5 in {
+def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
+def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
+def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
+def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
+
+def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
+ (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
+def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
+ (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
+
+def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
+ (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
+def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
+ (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
+
+def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))),
+ (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
+def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))),
+ (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
+def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))),
+ (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
+ (MOVi32imm (trunc_imm imm:$C)), XZR)>;
+
+def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
+ (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
+def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
+ (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
+def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))),
+ (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
+ (MOVi32imm (trunc_imm imm:$C)), XZR)>;
+
+def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)),
+ (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
+def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)),
+ (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
+def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)),
+ GPR64:$Ra)),
+ (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
+ (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
+
+def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
+ (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
+def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
+ (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
+def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32),
+ (s64imm_32bit:$C)))),
+ (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
+ (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
+} // AddedComplexity = 5
+
+def : MulAccumWAlias<"mul", MADDWrrr>;
+def : MulAccumXAlias<"mul", MADDXrrr>;
+def : MulAccumWAlias<"mneg", MSUBWrrr>;
+def : MulAccumXAlias<"mneg", MSUBXrrr>;
+def : WideMulAccumAlias<"smull", SMADDLrrr>;
+def : WideMulAccumAlias<"smnegl", SMSUBLrrr>;
+def : WideMulAccumAlias<"umull", UMADDLrrr>;
+def : WideMulAccumAlias<"umnegl", UMSUBLrrr>;
+
+// Multiply-high
+def SMULHrr : MulHi<0b010, "smulh", mulhs>;
+def UMULHrr : MulHi<0b110, "umulh", mulhu>;
+
+// CRC32
+def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">;
+def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">;
+def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">;
+def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">;
+
+def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">;
+def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
+def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
+def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
+
+// v8.1 atomic CAS
+defm CAS : CompareAndSwap<0, 0, "">;
+defm CASA : CompareAndSwap<1, 0, "a">;
+defm CASL : CompareAndSwap<0, 1, "l">;
+defm CASAL : CompareAndSwap<1, 1, "al">;
+
+// v8.1 atomic CASP
+defm CASP : CompareAndSwapPair<0, 0, "">;
+defm CASPA : CompareAndSwapPair<1, 0, "a">;
+defm CASPL : CompareAndSwapPair<0, 1, "l">;
+defm CASPAL : CompareAndSwapPair<1, 1, "al">;
+
+// v8.1 atomic SWP
+defm SWP : Swap<0, 0, "">;
+defm SWPA : Swap<1, 0, "a">;
+defm SWPL : Swap<0, 1, "l">;
+defm SWPAL : Swap<1, 1, "al">;
+
+// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register)
+defm LDADD : LDOPregister<0b000, "add", 0, 0, "">;
+defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">;
+defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">;
+defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">;
+
+defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">;
+defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">;
+defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">;
+defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">;
+
+defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">;
+defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">;
+defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">;
+defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">;
+
+defm LDSET : LDOPregister<0b011, "set", 0, 0, "">;
+defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">;
+defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">;
+defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">;
+
+defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">;
+defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">;
+defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">;
+defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">;
+
+defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">;
+defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">;
+defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">;
+defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">;
+
+defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">;
+defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">;
+defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">;
+defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">;
+
+defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">;
+defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">;
+defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">;
+defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">;
+
+// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR"
+defm : STOPregister<"stadd","LDADD">; // STADDx
+defm : STOPregister<"stclr","LDCLR">; // STCLRx
+defm : STOPregister<"steor","LDEOR">; // STEORx
+defm : STOPregister<"stset","LDSET">; // STSETx
+defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx
+defm : STOPregister<"stsmin","LDSMIN">;// STSMINx
+defm : STOPregister<"stumax","LDUMAX">;// STUMAXx
+defm : STOPregister<"stumin","LDUMIN">;// STUMINx
+
+//===----------------------------------------------------------------------===//
+// Logical instructions.
+//===----------------------------------------------------------------------===//
+
+// (immediate)
+defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
+defm AND : LogicalImm<0b00, "and", and, "bic">;
+defm EOR : LogicalImm<0b10, "eor", xor, "eon">;
+defm ORR : LogicalImm<0b01, "orr", or, "orn">;
+
+// FIXME: these aliases *are* canonical sometimes (when movz can't be
+// used). Actually, it seems to be working right now, but putting logical_immXX
+// here is a bit dodgy on the AsmParser side too.
+def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR,
+ logical_imm32:$imm), 0>;
+def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
+ logical_imm64:$imm), 0>;
+
+
+// (register)
+defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>;
+defm BICS : LogicalRegS<0b11, 1, "bics",
+ BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
+defm AND : LogicalReg<0b00, 0, "and", and>;
+defm BIC : LogicalReg<0b00, 1, "bic",
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+defm EON : LogicalReg<0b10, 1, "eon",
+ BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
+defm EOR : LogicalReg<0b10, 0, "eor", xor>;
+defm ORN : LogicalReg<0b01, 1, "orn",
+ BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
+defm ORR : LogicalReg<0b01, 0, "orr", or>;
+
+def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>;
+def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>;
+
+def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>;
+def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>;
+
+def : InstAlias<"mvn $Wd, $Wm$sh",
+ (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>;
+def : InstAlias<"mvn $Xd, $Xm$sh",
+ (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>;
+
+def : InstAlias<"tst $src1, $src2",
+ (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>;
+def : InstAlias<"tst $src1, $src2",
+ (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>;
+
+def : InstAlias<"tst $src1, $src2",
+ (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>;
+def : InstAlias<"tst $src1, $src2",
+ (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>;
+
+def : InstAlias<"tst $src1, $src2$sh",
+ (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>;
+def : InstAlias<"tst $src1, $src2$sh",
+ (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>;
+
+
+def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
+def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
+
+
+//===----------------------------------------------------------------------===//
+// One operand data processing instructions.
+//===----------------------------------------------------------------------===//
+
+defm CLS : OneOperandData<0b101, "cls">;
+defm CLZ : OneOperandData<0b100, "clz", ctlz>;
+defm RBIT : OneOperandData<0b000, "rbit", bitreverse>;
+
+def REV16Wr : OneWRegData<0b001, "rev16",
+ UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
+def REV16Xr : OneXRegData<0b001, "rev16", null_frag>;
+
+def : Pat<(cttz GPR32:$Rn),
+ (CLZWr (RBITWr GPR32:$Rn))>;
+def : Pat<(cttz GPR64:$Rn),
+ (CLZXr (RBITXr GPR64:$Rn))>;
+def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
+ (i32 1))),
+ (CLSWr GPR32:$Rn)>;
+def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
+ (i64 1))),
+ (CLSXr GPR64:$Rn)>;
+
+// Unlike the other one operand instructions, the instructions with the "rev"
+// mnemonic do *not* just different in the size bit, but actually use different
+// opcode bits for the different sizes.
+def REVWr : OneWRegData<0b010, "rev", bswap>;
+def REVXr : OneXRegData<0b011, "rev", bswap>;
+def REV32Xr : OneXRegData<0b010, "rev32",
+ UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
+
+def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
+
+// The bswap commutes with the rotr so we want a pattern for both possible
+// orders.
+def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
+def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
+
+//===----------------------------------------------------------------------===//
+// Bitfield immediate extraction instruction.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in
+defm EXTR : ExtractImm<"extr">;
+def : InstAlias<"ror $dst, $src, $shift",
+ (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
+def : InstAlias<"ror $dst, $src, $shift",
+ (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>;
+
+def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)),
+ (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>;
+def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
+ (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Other bitfield immediate instructions.
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in {
+defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">;
+defm SBFM : BitfieldImm<0b00, "sbfm">;
+defm UBFM : BitfieldImm<0b10, "ubfm">;
+}
+
+def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
+}]>;
+
+def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = 31 - N->getZExtValue();
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
+}]>;
+
+// min(7, 31 - shift_amt)
+def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = 31 - N->getZExtValue();
+ enc = enc > 7 ? 7 : enc;
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
+}]>;
+
+// min(15, 31 - shift_amt)
+def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = 31 - N->getZExtValue();
+ enc = enc > 15 ? 15 : enc;
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
+}]>;
+
+def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
+}]>;
+
+def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = 63 - N->getZExtValue();
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
+}]>;
+
+// min(7, 63 - shift_amt)
+def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = 63 - N->getZExtValue();
+ enc = enc > 7 ? 7 : enc;
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
+}]>;
+
+// min(15, 63 - shift_amt)
+def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = 63 - N->getZExtValue();
+ enc = enc > 15 ? 15 : enc;
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
+}]>;
+
+// min(31, 63 - shift_amt)
+def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = 63 - N->getZExtValue();
+ enc = enc > 31 ? 31 : enc;
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
+}]>;
+
+def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
+ (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
+ (i64 (i32shift_b imm0_31:$imm)))>;
+def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)),
+ (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
+ (i64 (i64shift_b imm0_63:$imm)))>;
+
+let AddedComplexity = 10 in {
+def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)),
+ (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
+def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)),
+ (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
+}
+
+def : InstAlias<"asr $dst, $src, $shift",
+ (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
+def : InstAlias<"asr $dst, $src, $shift",
+ (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
+def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
+def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
+def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
+def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
+def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
+
+def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)),
+ (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
+def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)),
+ (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
+
+def : InstAlias<"lsr $dst, $src, $shift",
+ (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
+def : InstAlias<"lsr $dst, $src, $shift",
+ (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
+def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
+def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
+def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
+def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
+def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
+
+//===----------------------------------------------------------------------===//
+// Conditional comparison instructions.
+//===----------------------------------------------------------------------===//
+defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
+defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
+
+//===----------------------------------------------------------------------===//
+// Conditional select instructions.
+//===----------------------------------------------------------------------===//
+defm CSEL : CondSelect<0, 0b00, "csel">;
+
+def inc : PatFrag<(ops node:$in), (add node:$in, 1)>;
+defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
+defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
+defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
+
+def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
+ (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
+def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
+ (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
+def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
+ (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
+def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
+ (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
+def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
+ (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
+def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
+ (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
+
+def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
+ (CSINCWr WZR, WZR, (i32 imm:$cc))>;
+def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
+ (CSINCXr XZR, XZR, (i32 imm:$cc))>;
+def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
+ (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
+def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV),
+ (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>;
+def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV),
+ (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
+def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV),
+ (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
+def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
+ (CSINVWr WZR, WZR, (i32 imm:$cc))>;
+def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
+ (CSINVXr XZR, XZR, (i32 imm:$cc))>;
+def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV),
+ (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>;
+def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV),
+ (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>;
+def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV),
+ (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
+def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV),
+ (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
+
+// The inverse of the condition code from the alias instruction is what is used
+// in the aliased instruction. The parser all ready inverts the condition code
+// for these aliases.
+def : InstAlias<"cset $dst, $cc",
+ (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
+def : InstAlias<"cset $dst, $cc",
+ (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
+
+def : InstAlias<"csetm $dst, $cc",
+ (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
+def : InstAlias<"csetm $dst, $cc",
+ (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
+
+def : InstAlias<"cinc $dst, $src, $cc",
+ (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
+def : InstAlias<"cinc $dst, $src, $cc",
+ (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
+
+def : InstAlias<"cinv $dst, $src, $cc",
+ (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
+def : InstAlias<"cinv $dst, $src, $cc",
+ (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
+
+def : InstAlias<"cneg $dst, $src, $cc",
+ (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
+def : InstAlias<"cneg $dst, $src, $cc",
+ (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
+
+//===----------------------------------------------------------------------===//
+// PC-relative instructions.
+//===----------------------------------------------------------------------===//
+let isReMaterializable = 1 in {
+let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
+def ADR : ADRI<0, "adr", adrlabel, []>;
+} // hasSideEffects = 0
+
+def ADRP : ADRI<1, "adrp", adrplabel,
+ [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
+} // isReMaterializable = 1
+
+// page address of a constant pool entry, block address
+def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
+def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
+def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>;
+
+//===----------------------------------------------------------------------===//
+// Unconditional branch (register) instructions.
+//===----------------------------------------------------------------------===//
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+def RET : BranchReg<0b0010, "ret", []>;
+def DRPS : SpecialReturn<0b0101, "drps">;
+def ERET : SpecialReturn<0b0100, "eret">;
+} // isReturn = 1, isTerminator = 1, isBarrier = 1
+
+// Default to the LR register.
+def : InstAlias<"ret", (RET LR)>;
+
+let isCall = 1, Defs = [LR], Uses = [SP] in {
+def BLR : BranchReg<0b0001, "blr", [(AArch64call GPR64:$Rn)]>;
+} // isCall
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
+} // isBranch, isTerminator, isBarrier, isIndirectBranch
+
+// Create a separate pseudo-instruction for codegen to use so that we don't
+// flag lr as used in every function. It'll be restored before the RET by the
+// epilogue if it's legitimately used.
+def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]>,
+ Sched<[WriteBrReg]> {
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let isReturn = 1;
+}
+
+// This is a directive-like pseudo-instruction. The purpose is to insert an
+// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
+// (which in the usual case is a BLR).
+let hasSideEffects = 1 in
+def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> {
+ let AsmString = ".tlsdesccall $sym";
+}
+
+// FIXME: maybe the scratch register used shouldn't be fixed to X1?
+// FIXME: can "hasSideEffects be dropped?
+let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
+ isCodeGenOnly = 1 in
+def TLSDESC_CALLSEQ
+ : Pseudo<(outs), (ins i64imm:$sym),
+ [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>,
+ Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>;
+def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
+ (TLSDESC_CALLSEQ texternalsym:$sym)>;
+
+//===----------------------------------------------------------------------===//
+// Conditional branch (immediate) instruction.
+//===----------------------------------------------------------------------===//
+def Bcc : BranchCond;
+
+//===----------------------------------------------------------------------===//
+// Compare-and-branch instructions.
+//===----------------------------------------------------------------------===//
+defm CBZ : CmpBranch<0, "cbz", AArch64cbz>;
+defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>;
+
+//===----------------------------------------------------------------------===//
+// Test-bit-and-branch instructions.
+//===----------------------------------------------------------------------===//
+defm TBZ : TestBranch<0, "tbz", AArch64tbz>;
+defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>;
+
+//===----------------------------------------------------------------------===//
+// Unconditional branch (immediate) instructions.
+//===----------------------------------------------------------------------===//
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+def B : BranchImm<0, "b", [(br bb:$addr)]>;
+} // isBranch, isTerminator, isBarrier
+
+let isCall = 1, Defs = [LR], Uses = [SP] in {
+def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>;
+} // isCall
+def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>;
+
+//===----------------------------------------------------------------------===//
+// Exception generation instructions.
+//===----------------------------------------------------------------------===//
+let isTrap = 1 in {
+def BRK : ExceptionGeneration<0b001, 0b00, "brk">;
+}
+def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
+def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
+def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">;
+def HLT : ExceptionGeneration<0b010, 0b00, "hlt">;
+def HVC : ExceptionGeneration<0b000, 0b10, "hvc">;
+def SMC : ExceptionGeneration<0b000, 0b11, "smc">;
+def SVC : ExceptionGeneration<0b000, 0b01, "svc">;
+
+// DCPSn defaults to an immediate operand of zero if unspecified.
+def : InstAlias<"dcps1", (DCPS1 0)>;
+def : InstAlias<"dcps2", (DCPS2 0)>;
+def : InstAlias<"dcps3", (DCPS3 0)>;
+
+//===----------------------------------------------------------------------===//
+// Load instructions.
+//===----------------------------------------------------------------------===//
+
+// Pair (indexed, offset)
+defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">;
+defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">;
+defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">;
+defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">;
+defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">;
+
+defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">;
+
+// Pair (pre-indexed)
+def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
+def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
+def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
+def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
+def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
+
+def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
+
+// Pair (post-indexed)
+def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
+def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
+def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
+def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
+def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
+
+def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
+
+
+// Pair (no allocate)
+defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">;
+defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">;
+defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">;
+defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">;
+defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">;
+
+//---
+// (register offset)
+//---
+
+// Integer
+defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>;
+defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>;
+defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
+defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;
+
+// Floating-point
+defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", untyped, load>;
+defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>;
+defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>;
+defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>;
+defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>;
+
+// Load sign-extended half-word
+defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>;
+defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>;
+
+// Load sign-extended byte
+defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>;
+defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>;
+
+// Load sign-extended word
+defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
+
+// Pre-fetch.
+defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
+
+// For regular load, we do not have any alignment requirement.
+// Thus, it is safe to directly map the vector loads with interesting
+// addressing modes.
+// FIXME: We could do the same for bitconvert to floating point vectors.
+multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
+ ValueType ScalTy, ValueType VecTy,
+ Instruction LOADW, Instruction LOADX,
+ SubRegIndex sub> {
+ def : Pat<(VecTy (scalar_to_vector (ScalTy
+ (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
+ (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
+ (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
+ sub)>;
+
+ def : Pat<(VecTy (scalar_to_vector (ScalTy
+ (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
+ (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
+ (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
+ sub)>;
+}
+
+let AddedComplexity = 10 in {
+defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>;
+defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>;
+
+defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>;
+defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>;
+
+defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>;
+defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>;
+
+defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>;
+defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>;
+
+defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>;
+defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>;
+
+defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>;
+
+defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>;
+
+
+def : Pat <(v1i64 (scalar_to_vector (i64
+ (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend64:$extend))))),
+ (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
+
+def : Pat <(v1i64 (scalar_to_vector (i64
+ (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend64:$extend))))),
+ (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
+}
+
+// Match all load 64 bits width whose type is compatible with FPR64
+multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy,
+ Instruction LOADW, Instruction LOADX> {
+
+ def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
+ (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
+
+ def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
+ (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
+}
+
+let AddedComplexity = 10 in {
+let Predicates = [IsLE] in {
+ // We must do vector loads with LD1 in big-endian.
+ defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>;
+ defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
+ defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>;
+ defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
+ defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>;
+}
+
+defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>;
+defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>;
+
+// Match all load 128 bits width whose type is compatible with FPR128
+let Predicates = [IsLE] in {
+ // We must do vector loads with LD1 in big-endian.
+ defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>;
+ defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>;
+ defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>;
+ defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>;
+ defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>;
+ defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>;
+ defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>;
+}
+} // AddedComplexity = 10
+
+// zextload -> i64
+multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop,
+ Instruction INSTW, Instruction INSTX> {
+ def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
+ (SUBREG_TO_REG (i64 0),
+ (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
+ sub_32)>;
+
+ def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
+ (SUBREG_TO_REG (i64 0),
+ (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
+ sub_32)>;
+}
+
+let AddedComplexity = 10 in {
+ defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>;
+ defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>;
+ defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>;
+
+ // zextloadi1 -> zextloadi8
+ defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
+
+ // extload -> zextload
+ defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>;
+ defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>;
+ defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>;
+
+ // extloadi1 -> zextloadi8
+ defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>;
+}
+
+
+// zextload -> i64
+multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop,
+ Instruction INSTW, Instruction INSTX> {
+ def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
+ (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
+
+ def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
+ (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
+
+}
+
+let AddedComplexity = 10 in {
+ // extload -> zextload
+ defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>;
+ defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>;
+ defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>;
+
+ // zextloadi1 -> zextloadi8
+ defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
+}
+
+//---
+// (unsigned immediate)
+//---
+defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr",
+ [(set GPR64z:$Rt,
+ (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
+defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr",
+ [(set GPR32z:$Rt,
+ (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
+defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr",
+ [(set FPR8Op:$Rt,
+ (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>;
+defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr",
+ [(set (f16 FPR16Op:$Rt),
+ (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>;
+defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr",
+ [(set (f32 FPR32Op:$Rt),
+ (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
+defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr",
+ [(set (f64 FPR64Op:$Rt),
+ (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
+defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr",
+ [(set (f128 FPR128Op:$Rt),
+ (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>;
+
+// For regular load, we do not have any alignment requirement.
+// Thus, it is safe to directly map the vector loads with interesting
+// addressing modes.
+// FIXME: We could do the same for bitconvert to floating point vectors.
+def : Pat <(v8i8 (scalar_to_vector (i32
+ (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
+ (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
+def : Pat <(v16i8 (scalar_to_vector (i32
+ (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
+def : Pat <(v4i16 (scalar_to_vector (i32
+ (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+ (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
+def : Pat <(v8i16 (scalar_to_vector (i32
+ (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
+def : Pat <(v2i32 (scalar_to_vector (i32
+ (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
+ (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
+ (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
+def : Pat <(v4i32 (scalar_to_vector (i32
+ (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
+def : Pat <(v1i64 (scalar_to_vector (i64
+ (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat <(v2i64 (scalar_to_vector (i64
+ (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
+
+// Match all load 64 bits width whose type is compatible with FPR64
+let Predicates = [IsLE] in {
+ // We must use LD1 to perform vector loads in big-endian.
+ def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+}
+def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+
+// Match all load 128 bits width whose type is compatible with FPR128
+let Predicates = [IsLE] in {
+ // We must use LD1 to perform vector loads in big-endian.
+ def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+}
+def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+
+defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh",
+ [(set GPR32:$Rt,
+ (zextloadi16 (am_indexed16 GPR64sp:$Rn,
+ uimm12s2:$offset)))]>;
+defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb",
+ [(set GPR32:$Rt,
+ (zextloadi8 (am_indexed8 GPR64sp:$Rn,
+ uimm12s1:$offset)))]>;
+// zextload -> i64
+def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
+def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
+
+// zextloadi1 -> zextloadi8
+def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
+ (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
+def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
+
+// extload -> zextload
+def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
+ (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
+def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
+ (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
+def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
+ (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
+def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
+def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
+def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
+def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
+
+// load sign-extended half-word
+defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh",
+ [(set GPR32:$Rt,
+ (sextloadi16 (am_indexed16 GPR64sp:$Rn,
+ uimm12s2:$offset)))]>;
+defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh",
+ [(set GPR64:$Rt,
+ (sextloadi16 (am_indexed16 GPR64sp:$Rn,
+ uimm12s2:$offset)))]>;
+
+// load sign-extended byte
+defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb",
+ [(set GPR32:$Rt,
+ (sextloadi8 (am_indexed8 GPR64sp:$Rn,
+ uimm12s1:$offset)))]>;
+defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb",
+ [(set GPR64:$Rt,
+ (sextloadi8 (am_indexed8 GPR64sp:$Rn,
+ uimm12s1:$offset)))]>;
+
+// load sign-extended word
+defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
+ [(set GPR64:$Rt,
+ (sextloadi32 (am_indexed32 GPR64sp:$Rn,
+ uimm12s4:$offset)))]>;
+
+// load zero-extended word
+def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
+
+// Pre-fetch.
+def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
+ [(AArch64Prefetch imm:$Rt,
+ (am_indexed64 GPR64sp:$Rn,
+ uimm12s8:$offset))]>;
+
+def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
+
+//---
+// (literal)
+def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr">;
+def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr">;
+def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr">;
+def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr">;
+def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr">;
+
+// load sign-extended word
+def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw">;
+
+// prefetch
+def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
+// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>;
+
+//---
+// (unscaled immediate)
+defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur",
+ [(set GPR64z:$Rt,
+ (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur",
+ [(set GPR32z:$Rt,
+ (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur",
+ [(set FPR8Op:$Rt,
+ (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur",
+ [(set FPR16Op:$Rt,
+ (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur",
+ [(set (f32 FPR32Op:$Rt),
+ (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur",
+ [(set (f64 FPR64Op:$Rt),
+ (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur",
+ [(set (f128 FPR128Op:$Rt),
+ (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>;
+
+defm LDURHH
+ : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh",
+ [(set GPR32:$Rt,
+ (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURBB
+ : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb",
+ [(set GPR32:$Rt,
+ (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
+
+// Match all load 64 bits width whose type is compatible with FPR64
+let Predicates = [IsLE] in {
+ def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
+ (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
+ (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
+ (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
+ (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
+ (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+}
+def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
+ (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
+ (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+
+// Match all load 128 bits width whose type is compatible with FPR128
+let Predicates = [IsLE] in {
+ def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
+ (LDURQi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
+ (LDURQi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
+ (LDURQi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
+ (LDURQi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
+ (LDURQi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
+ (LDURQi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
+ (LDURQi GPR64sp:$Rn, simm9:$offset)>;
+}
+
+// anyext -> zext
+def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
+ (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+// unscaled zext
+def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
+ (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+
+
+//---
+// LDR mnemonics fall back to LDUR for negative or unaligned offsets.
+
+// Define new assembler match classes as we want to only match these when
+// the don't otherwise match the scaled addressing mode for LDR/STR. Don't
+// associate a DiagnosticType either, as we want the diagnostic for the
+// canonical form (the scaled operand) to take precedence.
+class SImm9OffsetOperand<int Width> : AsmOperandClass {
+ let Name = "SImm9OffsetFB" # Width;
+ let PredicateMethod = "isSImm9OffsetFB<" # Width # ">";
+ let RenderMethod = "addImmOperands";
+}
+
+def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>;
+def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>;
+def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>;
+def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>;
+def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>;
+
+def simm9_offset_fb8 : Operand<i64> {
+ let ParserMatchClass = SImm9OffsetFB8Operand;
+}
+def simm9_offset_fb16 : Operand<i64> {
+ let ParserMatchClass = SImm9OffsetFB16Operand;
+}
+def simm9_offset_fb32 : Operand<i64> {
+ let ParserMatchClass = SImm9OffsetFB32Operand;
+}
+def simm9_offset_fb64 : Operand<i64> {
+ let ParserMatchClass = SImm9OffsetFB64Operand;
+}
+def simm9_offset_fb128 : Operand<i64> {
+ let ParserMatchClass = SImm9OffsetFB128Operand;
+}
+
+def : InstAlias<"ldr $Rt, [$Rn, $offset]",
+ (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
+def : InstAlias<"ldr $Rt, [$Rn, $offset]",
+ (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
+def : InstAlias<"ldr $Rt, [$Rn, $offset]",
+ (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
+def : InstAlias<"ldr $Rt, [$Rn, $offset]",
+ (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
+def : InstAlias<"ldr $Rt, [$Rn, $offset]",
+ (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
+def : InstAlias<"ldr $Rt, [$Rn, $offset]",
+ (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
+def : InstAlias<"ldr $Rt, [$Rn, $offset]",
+ (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
+
+// zextload -> i64
+def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+
+// load sign-extended half-word
+defm LDURSHW
+ : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh",
+ [(set GPR32:$Rt,
+ (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURSHX
+ : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh",
+ [(set GPR64:$Rt,
+ (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
+
+// load sign-extended byte
+defm LDURSBW
+ : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb",
+ [(set GPR32:$Rt,
+ (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURSBX
+ : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb",
+ [(set GPR64:$Rt,
+ (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
+
+// load sign-extended word
+defm LDURSW
+ : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw",
+ [(set GPR64:$Rt,
+ (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
+
+// zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
+def : InstAlias<"ldrb $Rt, [$Rn, $offset]",
+ (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
+def : InstAlias<"ldrh $Rt, [$Rn, $offset]",
+ (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
+def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
+ (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
+def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
+ (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
+def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
+ (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
+def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
+ (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
+def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",
+ (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
+
+// Pre-fetch.
+defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
+ [(AArch64Prefetch imm:$Rt,
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
+
+//---
+// (unscaled immediate, unprivileged)
+defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
+defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">;
+
+defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">;
+defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">;
+
+// load sign-extended half-word
+defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">;
+defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">;
+
+// load sign-extended byte
+defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">;
+defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">;
+
+// load sign-extended word
+defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">;
+
+//---
+// (immediate pre-indexed)
+def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">;
+def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">;
+def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op, "ldr">;
+def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
+def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
+def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
+def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
+
+// load sign-extended half-word
+def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
+def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
+
+// load sign-extended byte
+def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
+def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
+
+// load zero-extended byte
+def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
+def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
+
+// load sign-extended word
+def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
+
+//---
+// (immediate post-indexed)
+def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">;
+def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">;
+def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op, "ldr">;
+def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
+def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
+def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
+def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
+
+// load sign-extended half-word
+def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
+def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
+
+// load sign-extended byte
+def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
+def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
+
+// load zero-extended byte
+def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
+def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
+
+// load sign-extended word
+def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
+
+//===----------------------------------------------------------------------===//
+// Store instructions.
+//===----------------------------------------------------------------------===//
+
+// Pair (indexed, offset)
+// FIXME: Use dedicated range-checked addressing mode operand here.
+defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">;
+defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">;
+defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">;
+defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">;
+defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">;
+
+// Pair (pre-indexed)
+def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">;
+def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">;
+def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
+def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
+def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
+
+// Pair (pre-indexed)
+def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">;
+def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">;
+def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
+def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
+def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
+
+// Pair (no allocate)
+defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">;
+defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">;
+defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">;
+defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">;
+defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">;
+
+//---
+// (Register offset)
+
+// Integer
+defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>;
+defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>;
+defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>;
+defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>;
+
+
+// Floating-point
+defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", untyped, store>;
+defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>;
+defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>;
+defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>;
+defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str", f128, store>;
+
+let Predicates = [UseSTRQro], AddedComplexity = 10 in {
+ def : Pat<(store (f128 FPR128:$Rt),
+ (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend128:$extend)),
+ (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>;
+ def : Pat<(store (f128 FPR128:$Rt),
+ (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend128:$extend)),
+ (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>;
+}
+
+multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
+ Instruction STRW, Instruction STRX> {
+
+ def : Pat<(storeop GPR64:$Rt,
+ (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
+ (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32),
+ GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
+
+ def : Pat<(storeop GPR64:$Rt,
+ (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
+ (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32),
+ GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
+}
+
+let AddedComplexity = 10 in {
+ // truncstore i64
+ defm : TruncStoreFrom64ROPat<ro8, truncstorei8, STRBBroW, STRBBroX>;
+ defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>;
+ defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW, STRWroX>;
+}
+
+multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR,
+ Instruction STRW, Instruction STRX> {
+ def : Pat<(store (VecTy FPR:$Rt),
+ (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
+ (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
+
+ def : Pat<(store (VecTy FPR:$Rt),
+ (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
+ (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
+}
+
+let AddedComplexity = 10 in {
+// Match all store 64 bits width whose type is compatible with FPR64
+let Predicates = [IsLE] in {
+ // We must use ST1 to store vectors in big-endian.
+ defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>;
+ defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
+ defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
+ defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
+ defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>;
+}
+
+defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
+defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
+
+// Match all store 128 bits width whose type is compatible with FPR128
+let Predicates = [IsLE, UseSTRQro] in {
+ // We must use ST1 to store vectors in big-endian.
+ defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
+ defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
+ defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>;
+ defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
+ defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
+ defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
+ defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>;
+}
+} // AddedComplexity = 10
+
+// Match stores from lane 0 to the appropriate subreg's store.
+multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
+ ValueType VecTy, ValueType STy,
+ SubRegIndex SubRegIdx,
+ Instruction STRW, Instruction STRX> {
+
+ def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
+ (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
+ (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
+ GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
+
+ def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
+ (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
+ (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
+ GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
+}
+
+let AddedComplexity = 19 in {
+ defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>;
+ defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, hsub, STRHroW, STRHroX>;
+ defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, ssub, STRSroW, STRSroX>;
+ defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, ssub, STRSroW, STRSroX>;
+ defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, dsub, STRDroW, STRDroX>;
+ defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, dsub, STRDroW, STRDroX>;
+}
+
+//---
+// (unsigned immediate)
+defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str",
+ [(store GPR64z:$Rt,
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
+defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str",
+ [(store GPR32z:$Rt,
+ (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
+defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str",
+ [(store FPR8Op:$Rt,
+ (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>;
+defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str",
+ [(store (f16 FPR16Op:$Rt),
+ (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>;
+defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str",
+ [(store (f32 FPR32Op:$Rt),
+ (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
+defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str",
+ [(store (f64 FPR64Op:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
+defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>;
+
+defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh",
+ [(truncstorei16 GPR32z:$Rt,
+ (am_indexed16 GPR64sp:$Rn,
+ uimm12s2:$offset))]>;
+defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb",
+ [(truncstorei8 GPR32z:$Rt,
+ (am_indexed8 GPR64sp:$Rn,
+ uimm12s1:$offset))]>;
+
+let AddedComplexity = 10 in {
+
+// Match all store 64 bits width whose type is compatible with FPR64
+def : Pat<(store (v1i64 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+ (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(store (v1f64 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+ (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+
+let Predicates = [IsLE] in {
+ // We must use ST1 to store vectors in big-endian.
+ def : Pat<(store (v2f32 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+ (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(store (v8i8 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+ (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(store (v4i16 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+ (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(store (v2i32 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+ (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(store (v4f16 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+ (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+}
+
+// Match all store 128 bits width whose type is compatible with FPR128
+def : Pat<(store (f128 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+
+let Predicates = [IsLE] in {
+ // We must use ST1 to store vectors in big-endian.
+ def : Pat<(store (v4f32 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(store (v2f64 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(store (v16i8 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(store (v8i16 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(store (v4i32 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(store (v2i64 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(store (v8f16 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+}
+
+// truncstore i64
+def : Pat<(truncstorei32 GPR64:$Rt,
+ (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
+ (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>;
+def : Pat<(truncstorei16 GPR64:$Rt,
+ (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
+ (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>;
+def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
+ (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>;
+
+} // AddedComplexity = 10
+
+// Match stores from lane 0 to the appropriate subreg's store.
+multiclass VecStoreLane0Pat<Operand UIAddrMode, SDPatternOperator storeop,
+ ValueType VTy, ValueType STy,
+ SubRegIndex SubRegIdx, Operand IndexType,
+ Instruction STR> {
+ def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), 0)),
+ (UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
+ (STR (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
+ GPR64sp:$Rn, IndexType:$offset)>;
+}
+
+let AddedComplexity = 19 in {
+ defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, hsub, uimm12s2, STRHui>;
+ defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, hsub, uimm12s2, STRHui>;
+ defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, ssub, uimm12s4, STRSui>;
+ defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, ssub, uimm12s4, STRSui>;
+ defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, dsub, uimm12s8, STRDui>;
+ defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, dsub, uimm12s8, STRDui>;
+}
+
+//---
+// (unscaled immediate)
+defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur",
+ [(store GPR64z:$Rt,
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
+defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur",
+ [(store GPR32z:$Rt,
+ (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
+defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur",
+ [(store FPR8Op:$Rt,
+ (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
+defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur",
+ [(store (f16 FPR16Op:$Rt),
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
+defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur",
+ [(store (f32 FPR32Op:$Rt),
+ (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
+defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur",
+ [(store (f64 FPR64Op:$Rt),
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
+defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur",
+ [(store (f128 FPR128Op:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>;
+defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh",
+ [(truncstorei16 GPR32z:$Rt,
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
+defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb",
+ [(truncstorei8 GPR32z:$Rt,
+ (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
+
+// Armv8.4 LDAPR & STLR with Immediate Offset instruction
+let Predicates = [HasV8_4a] in {
+defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>;
+defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>;
+defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>;
+defm STLURX : BaseStoreUnscaleV84<"stlur", 0b11, 0b00, GPR64>;
+defm LDAPURB : BaseLoadUnscaleV84<"ldapurb", 0b00, 0b01, GPR32>;
+defm LDAPURSBW : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>;
+defm LDAPURSBX : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>;
+defm LDAPURH : BaseLoadUnscaleV84<"ldapurh", 0b01, 0b01, GPR32>;
+defm LDAPURSHW : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>;
+defm LDAPURSHX : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>;
+defm LDAPUR : BaseLoadUnscaleV84<"ldapur", 0b10, 0b01, GPR32>;
+defm LDAPURSW : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>;
+defm LDAPURX : BaseLoadUnscaleV84<"ldapur", 0b11, 0b01, GPR64>;
+}
+
+// Match all store 64 bits width whose type is compatible with FPR64
+def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+
+let AddedComplexity = 10 in {
+
+let Predicates = [IsLE] in {
+ // We must use ST1 to store vectors in big-endian.
+ def : Pat<(store (v2f32 FPR64:$Rt),
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v8i8 FPR64:$Rt),
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v4i16 FPR64:$Rt),
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v2i32 FPR64:$Rt),
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v4f16 FPR64:$Rt),
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+}
+
+// Match all store 128 bits width whose type is compatible with FPR128
+def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+
+let Predicates = [IsLE] in {
+ // We must use ST1 to store vectors in big-endian.
+ def : Pat<(store (v4f32 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v2f64 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v16i8 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v8i16 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v4i32 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v2i64 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v2f64 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v8f16 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+}
+
+} // AddedComplexity = 10
+
+// unscaled i64 truncating stores
+def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
+ (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
+ (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
+ (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
+
+// Match stores from lane 0 to the appropriate subreg's store.
+multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
+ ValueType VTy, ValueType STy,
+ SubRegIndex SubRegIdx, Instruction STR> {
+ defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegIdx, simm9, STR>;
+}
+
+let AddedComplexity = 19 in {
+ defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, hsub, STURHi>;
+ defm : VecStoreULane0Pat<store, v8f16, f16, hsub, STURHi>;
+ defm : VecStoreULane0Pat<store, v4i32, i32, ssub, STURSi>;
+ defm : VecStoreULane0Pat<store, v4f32, f32, ssub, STURSi>;
+ defm : VecStoreULane0Pat<store, v2i64, i64, dsub, STURDi>;
+ defm : VecStoreULane0Pat<store, v2f64, f64, dsub, STURDi>;
+}
+
+//---
+// STR mnemonics fall back to STUR for negative or unaligned offsets.
+def : InstAlias<"str $Rt, [$Rn, $offset]",
+ (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
+def : InstAlias<"str $Rt, [$Rn, $offset]",
+ (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
+def : InstAlias<"str $Rt, [$Rn, $offset]",
+ (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
+def : InstAlias<"str $Rt, [$Rn, $offset]",
+ (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
+def : InstAlias<"str $Rt, [$Rn, $offset]",
+ (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
+def : InstAlias<"str $Rt, [$Rn, $offset]",
+ (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
+def : InstAlias<"str $Rt, [$Rn, $offset]",
+ (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
+
+def : InstAlias<"strb $Rt, [$Rn, $offset]",
+ (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
+def : InstAlias<"strh $Rt, [$Rn, $offset]",
+ (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
+
+//---
+// (unscaled immediate, unprivileged)
+defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">;
+defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">;
+
+defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">;
+defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
+
+//---
+// (immediate pre-indexed)
+def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>;
+def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>;
+def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, untyped>;
+def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>;
+def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>;
+def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>;
+def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>;
+
+def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8, i32>;
+def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>;
+
+// truncstore i64
+def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
+ (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
+ simm9:$off)>;
+def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
+ (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
+ simm9:$off)>;
+def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
+ (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
+ simm9:$off)>;
+
+def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+
+def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+
+//---
+// (immediate post-indexed)
+def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>;
+def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>;
+def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, untyped>;
+def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>;
+def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>;
+def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>;
+def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>;
+
+def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>;
+def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>;
+
+// truncstore i64
+def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
+ (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
+ simm9:$off)>;
+def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
+ (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
+ simm9:$off)>;
+def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
+ (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
+ simm9:$off)>;
+
+def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+
+def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+
+//===----------------------------------------------------------------------===//
+// Load/store exclusive instructions.
+//===----------------------------------------------------------------------===//
+
+def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">;
+def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">;
+def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">;
+def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">;
+
+def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">;
+def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">;
+def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">;
+def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">;
+
+def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">;
+def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">;
+def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">;
+def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">;
+
+def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">;
+def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">;
+def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
+def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">;
+
+def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
+def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
+def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
+def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">;
+
+def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">;
+def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">;
+def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">;
+def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">;
+
+def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">;
+def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">;
+
+def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">;
+def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">;
+
+def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">;
+def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
+
+def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
+def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
+
+let Predicates = [HasV8_1a] in {
+ // v8.1a "Limited Order Region" extension load-acquire instructions
+ def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
+ def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
+ def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
+ def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
+
+ // v8.1a "Limited Order Region" extension store-release instructions
+ def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">;
+ def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">;
+ def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
+ def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
+}
+
+//===----------------------------------------------------------------------===//
+// Scaled floating point to integer conversion instructions.
+//===----------------------------------------------------------------------===//
+
+defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>;
+defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>;
+defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>;
+defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>;
+defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>;
+defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>;
+defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>;
+defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>;
+defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", fp_to_sint>;
+defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", fp_to_uint>;
+defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", fp_to_sint>;
+defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", fp_to_uint>;
+
+multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
+ def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
+ def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
+ def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
+ def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
+ def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
+ def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
+
+ def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
+ (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
+ def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
+ (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+ def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
+ (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
+ def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
+ (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
+ def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
+ (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
+ def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
+ (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
+}
+
+defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
+defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
+
+multiclass FPToIntegerPats<SDNode to_int, SDNode round, string INST> {
+ def : Pat<(i32 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+}
+
+defm : FPToIntegerPats<fp_to_sint, fceil, "FCVTPS">;
+defm : FPToIntegerPats<fp_to_uint, fceil, "FCVTPU">;
+defm : FPToIntegerPats<fp_to_sint, ffloor, "FCVTMS">;
+defm : FPToIntegerPats<fp_to_uint, ffloor, "FCVTMU">;
+defm : FPToIntegerPats<fp_to_sint, ftrunc, "FCVTZS">;
+defm : FPToIntegerPats<fp_to_uint, ftrunc, "FCVTZU">;
+defm : FPToIntegerPats<fp_to_sint, fround, "FCVTAS">;
+defm : FPToIntegerPats<fp_to_uint, fround, "FCVTAU">;
+
+//===----------------------------------------------------------------------===//
+// Scaled integer to floating point conversion instructions.
+//===----------------------------------------------------------------------===//
+
+defm SCVTF : IntegerToFP<0, "scvtf", sint_to_fp>;
+defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>;
+
+//===----------------------------------------------------------------------===//
+// Unscaled integer to floating point conversion instruction.
+//===----------------------------------------------------------------------===//
+
+defm FMOV : UnscaledConversion<"fmov">;
+
+// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
+let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
+def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
+ Sched<[WriteF]>, Requires<[HasFullFP16]>;
+def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
+ Sched<[WriteF]>;
+def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
+ Sched<[WriteF]>;
+}
+// Similarly add aliases
+def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>,
+ Requires<[HasFullFP16]>;
+def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>;
+def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>;
+
+//===----------------------------------------------------------------------===//
+// Floating point conversion instruction.
+//===----------------------------------------------------------------------===//
+
+defm FCVT : FPConversion<"fcvt">;
+
+//===----------------------------------------------------------------------===//
+// Floating point single operand instructions.
+//===----------------------------------------------------------------------===//
+
+defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>;
+defm FMOV : SingleOperandFPData<0b0000, "fmov">;
+defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>;
+defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>;
+defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>;
+defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>;
+defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_aarch64_neon_frintn>;
+defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>;
+
+def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))),
+ (FRINTNDr FPR64:$Rn)>;
+
+defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>;
+defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>;
+
+let SchedRW = [WriteFDiv] in {
+defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating point two operand instructions.
+//===----------------------------------------------------------------------===//
+
+defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>;
+let SchedRW = [WriteFDiv] in {
+defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>;
+}
+defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>;
+defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaxnan>;
+defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>;
+defm FMIN : TwoOperandFPData<0b0101, "fmin", fminnan>;
+let SchedRW = [WriteFMul] in {
+defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>;
+defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>;
+}
+defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>;
+
+def : Pat<(v1f64 (fmaxnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(v1f64 (fminnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
+
+//===----------------------------------------------------------------------===//
+// Floating point three operand instructions.
+//===----------------------------------------------------------------------===//
+
+defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>;
+defm FMSUB : ThreeOperandFPData<0, 1, "fmsub",
+ TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
+defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
+ TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >;
+defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
+ TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
+
+// The following def pats catch the case where the LHS of an FMA is negated.
+// The TriOpFrag above catches the case where the middle operand is negated.
+
+// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike
+// the NEON variant.
+def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)),
+ (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
+
+def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)),
+ (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
+
+// We handled -(a + b*c) for FNMADD above, now it's time for "(-a) + (-b)*c" and
+// "(-a) + b*(-c)".
+def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
+ (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
+
+def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
+ (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
+
+def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))),
+ (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
+
+def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))),
+ (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
+
+//===----------------------------------------------------------------------===//
+// Floating point comparison instructions.
+//===----------------------------------------------------------------------===//
+
+defm FCMPE : FPComparison<1, "fcmpe">;
+defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>;
+
+//===----------------------------------------------------------------------===//
+// Floating point conditional comparison instructions.
+//===----------------------------------------------------------------------===//
+
+defm FCCMPE : FPCondComparison<1, "fccmpe">;
+defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>;
+
+//===----------------------------------------------------------------------===//
+// Floating point conditional select instruction.
+//===----------------------------------------------------------------------===//
+
+defm FCSEL : FPCondSelect<"fcsel">;
+
+// CSEL instructions providing f128 types need to be handled by a
+// pseudo-instruction since the eventual code will need to introduce basic
+// blocks and control flow.
+def F128CSEL : Pseudo<(outs FPR128:$Rd),
+ (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond),
+ [(set (f128 FPR128:$Rd),
+ (AArch64csel FPR128:$Rn, FPR128:$Rm,
+ (i32 imm:$cond), NZCV))]> {
+ let Uses = [NZCV];
+ let usesCustomInserter = 1;
+ let hasNoSchedulingInfo = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Floating point immediate move.
+//===----------------------------------------------------------------------===//
+
+let isReMaterializable = 1 in {
+defm FMOV : FPMoveImmediate<"fmov">;
+}
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD two vector instructions.
+//===----------------------------------------------------------------------===//
+
+defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
+ int_aarch64_neon_uabd>;
+// Match UABDL in log2-shuffle patterns.
+def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
+ (zext (v8i8 V64:$opB))))),
+ (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
+def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
+ (v8i16 (add (sub (zext (v8i8 V64:$opA)),
+ (zext (v8i8 V64:$opB))),
+ (AArch64vashr v8i16:$src, (i32 15))))),
+ (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
+def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 V128:$opA)),
+ (zext (extract_high_v16i8 V128:$opB))))),
+ (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
+def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
+ (v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)),
+ (zext (extract_high_v16i8 V128:$opB))),
+ (AArch64vashr v8i16:$src, (i32 15))))),
+ (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
+def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)),
+ (zext (v4i16 V64:$opB))))),
+ (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
+def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 V128:$opA)),
+ (zext (extract_high_v8i16 V128:$opB))))),
+ (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
+def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)),
+ (zext (v2i32 V64:$opB))))),
+ (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
+def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 V128:$opA)),
+ (zext (extract_high_v4i32 V128:$opB))))),
+ (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
+
+defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>;
+defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
+defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
+defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
+defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>;
+defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>;
+defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>;
+defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
+defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
+defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>;
+
+defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
+defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
+defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
+defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
+defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
+defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>;
+defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
+defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
+def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
+ (FCVTLv4i16 V64:$Rn)>;
+def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
+ (i64 4)))),
+ (FCVTLv8i16 V128:$Rn)>;
+def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
+def : Pat<(v2f64 (fpextend (v2f32 (extract_subvector (v4f32 V128:$Rn),
+ (i64 2))))),
+ (FCVTLv4i32 V128:$Rn)>;
+
+def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>;
+def : Pat<(v4f32 (fpextend (v4f16 (extract_subvector (v8f16 V128:$Rn),
+ (i64 4))))),
+ (FCVTLv8i16 V128:$Rn)>;
+
+defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
+defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
+defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
+defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>;
+defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
+def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
+ (FCVTNv4i16 V128:$Rn)>;
+def : Pat<(concat_vectors V64:$Rd,
+ (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
+ (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
+def : Pat<(v2f32 (fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
+def : Pat<(v4f16 (fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>;
+def : Pat<(concat_vectors V64:$Rd, (v2f32 (fpround (v2f64 V128:$Rn)))),
+ (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
+defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
+defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
+defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
+ int_aarch64_neon_fcvtxn>;
+defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>;
+defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>;
+
+def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
+def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;
+def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>;
+def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>;
+def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>;
+
+def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>;
+def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>;
+def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>;
+def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>;
+def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>;
+
+defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>;
+defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
+defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>;
+defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>;
+defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>;
+defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>;
+defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>;
+defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
+defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
+defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
+defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>;
+defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg",
+ UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
+defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
+// Aliases for MVN -> NOT.
+def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}",
+ (NOTv8i8 V64:$Vd, V64:$Vn)>;
+def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}",
+ (NOTv16i8 V128:$Vd, V128:$Vn)>;
+
+def : Pat<(AArch64neg (v8i8 V64:$Rn)), (NEGv8i8 V64:$Rn)>;
+def : Pat<(AArch64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>;
+def : Pat<(AArch64neg (v4i16 V64:$Rn)), (NEGv4i16 V64:$Rn)>;
+def : Pat<(AArch64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>;
+def : Pat<(AArch64neg (v2i32 V64:$Rn)), (NEGv2i32 V64:$Rn)>;
+def : Pat<(AArch64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>;
+def : Pat<(AArch64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>;
+
+def : Pat<(AArch64not (v8i8 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
+def : Pat<(AArch64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+def : Pat<(AArch64not (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
+def : Pat<(AArch64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+def : Pat<(AArch64not (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
+def : Pat<(AArch64not (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
+def : Pat<(AArch64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+def : Pat<(AArch64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+
+def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
+def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
+def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+
+defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>;
+defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
+defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
+defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
+defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
+ BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >;
+defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>;
+defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>;
+defm SHLL : SIMDVectorLShiftLongBySizeBHS;
+defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
+defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
+defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>;
+defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>;
+defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>;
+defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
+ BinOpFrag<(add node:$LHS, (int_aarch64_neon_uaddlp node:$RHS))> >;
+defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp",
+ int_aarch64_neon_uaddlp>;
+defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>;
+defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>;
+defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
+defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
+defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
+defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
+
+def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>;
+def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>;
+def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
+def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
+def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>;
+def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
+
+// Patterns for vector long shift (by element width). These need to match all
+// three of zext, sext and anyext so it's easier to pull the patterns out of the
+// definition.
+multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
+ def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
+ (SHLLv8i8 V64:$Rn)>;
+ def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)),
+ (SHLLv16i8 V128:$Rn)>;
+ def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
+ (SHLLv4i16 V64:$Rn)>;
+ def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)),
+ (SHLLv8i16 V128:$Rn)>;
+ def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
+ (SHLLv2i32 V64:$Rn)>;
+ def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)),
+ (SHLLv4i32 V128:$Rn)>;
+}
+
+defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
+defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
+defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD three vector instructions.
+//===----------------------------------------------------------------------===//
+
+defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>;
+defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>;
+defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>;
+defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>;
+defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
+defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
+defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
+defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
+defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
+let Predicates = [HasNEON] in {
+foreach VT = [ v2f32, v4f32, v2f64 ] in
+def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
+}
+let Predicates = [HasNEON, HasFullFP16] in {
+foreach VT = [ v4f16, v8f16 ] in
+def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
+}
+defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
+defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
+defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>;
+defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>;
+defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
+defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
+defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
+defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>;
+defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
+defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>;
+defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
+defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaxnan>;
+defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
+defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>;
+defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
+defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminnan>;
+
+// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
+// instruction expects the addend first, while the fma intrinsic puts it last.
+defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
+ TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
+defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
+ TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
+
+// The following def pats catch the case where the LHS of an FMA is negated.
+// The TriOpFrag above catches the case where the middle operand is negated.
+def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)),
+ (FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>;
+
+def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
+ (FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>;
+
+def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
+ (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>;
+
+defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
+defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>;
+defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
+defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
+defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>;
+defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla",
+ TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >;
+defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls",
+ TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >;
+defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
+defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
+defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
+ TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >;
+defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
+defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
+defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
+defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
+defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
+defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
+defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
+defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
+defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
+defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
+defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
+defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
+defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
+defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_aarch64_neon_srhadd>;
+defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
+defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
+defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
+defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
+ TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >;
+defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
+defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
+defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
+defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
+defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
+defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
+defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
+defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
+defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
+defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
+defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
+defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>;
+defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
+defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
+defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
+ int_aarch64_neon_sqadd>;
+defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
+ int_aarch64_neon_sqsub>;
+
+defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
+defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
+ BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
+defm BIF : SIMDLogicalThreeVector<1, 0b11, "bif">;
+defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>;
+defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl",
+ TriOpFrag<(or (and node:$LHS, node:$MHS), (and (vnot node:$LHS), node:$RHS))>>;
+defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
+defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
+ BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
+defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
+
+
+def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
+ (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
+def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
+ (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
+def : Pat<(AArch64bsl (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
+ (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
+def : Pat<(AArch64bsl (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
+ (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
+
+def : Pat<(AArch64bsl (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
+ (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
+def : Pat<(AArch64bsl (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
+ (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
+def : Pat<(AArch64bsl (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
+ (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
+def : Pat<(AArch64bsl (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
+ (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
+
+def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
+ (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
+def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}",
+ (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
+def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}",
+ (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
+def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}",
+ (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
+
+def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}",
+ (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>;
+def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}",
+ (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
+def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}",
+ (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
+def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}",
+ (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
+
+def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" #
+ "|cmls.8b\t$dst, $src1, $src2}",
+ (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" #
+ "|cmls.16b\t$dst, $src1, $src2}",
+ (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" #
+ "|cmls.4h\t$dst, $src1, $src2}",
+ (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" #
+ "|cmls.8h\t$dst, $src1, $src2}",
+ (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" #
+ "|cmls.2s\t$dst, $src1, $src2}",
+ (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" #
+ "|cmls.4s\t$dst, $src1, $src2}",
+ (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" #
+ "|cmls.2d\t$dst, $src1, $src2}",
+ (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
+
+def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" #
+ "|cmlo.8b\t$dst, $src1, $src2}",
+ (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" #
+ "|cmlo.16b\t$dst, $src1, $src2}",
+ (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" #
+ "|cmlo.4h\t$dst, $src1, $src2}",
+ (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" #
+ "|cmlo.8h\t$dst, $src1, $src2}",
+ (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" #
+ "|cmlo.2s\t$dst, $src1, $src2}",
+ (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" #
+ "|cmlo.4s\t$dst, $src1, $src2}",
+ (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" #
+ "|cmlo.2d\t$dst, $src1, $src2}",
+ (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
+
+def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" #
+ "|cmle.8b\t$dst, $src1, $src2}",
+ (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" #
+ "|cmle.16b\t$dst, $src1, $src2}",
+ (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" #
+ "|cmle.4h\t$dst, $src1, $src2}",
+ (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" #
+ "|cmle.8h\t$dst, $src1, $src2}",
+ (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" #
+ "|cmle.2s\t$dst, $src1, $src2}",
+ (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" #
+ "|cmle.4s\t$dst, $src1, $src2}",
+ (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" #
+ "|cmle.2d\t$dst, $src1, $src2}",
+ (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
+
+def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" #
+ "|cmlt.8b\t$dst, $src1, $src2}",
+ (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" #
+ "|cmlt.16b\t$dst, $src1, $src2}",
+ (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" #
+ "|cmlt.4h\t$dst, $src1, $src2}",
+ (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" #
+ "|cmlt.8h\t$dst, $src1, $src2}",
+ (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" #
+ "|cmlt.2s\t$dst, $src1, $src2}",
+ (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" #
+ "|cmlt.4s\t$dst, $src1, $src2}",
+ (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
+ "|cmlt.2d\t$dst, $src1, $src2}",
+ (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
+
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" #
+ "|fcmle.4h\t$dst, $src1, $src2}",
+ (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" #
+ "|fcmle.8h\t$dst, $src1, $src2}",
+ (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
+def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
+ "|fcmle.2s\t$dst, $src1, $src2}",
+ (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" #
+ "|fcmle.4s\t$dst, $src1, $src2}",
+ (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
+ "|fcmle.2d\t$dst, $src1, $src2}",
+ (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
+
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" #
+ "|fcmlt.4h\t$dst, $src1, $src2}",
+ (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" #
+ "|fcmlt.8h\t$dst, $src1, $src2}",
+ (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
+def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
+ "|fcmlt.2s\t$dst, $src1, $src2}",
+ (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" #
+ "|fcmlt.4s\t$dst, $src1, $src2}",
+ (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
+ "|fcmlt.2d\t$dst, $src1, $src2}",
+ (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
+
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" #
+ "|facle.4h\t$dst, $src1, $src2}",
+ (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" #
+ "|facle.8h\t$dst, $src1, $src2}",
+ (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
+def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
+ "|facle.2s\t$dst, $src1, $src2}",
+ (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" #
+ "|facle.4s\t$dst, $src1, $src2}",
+ (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
+ "|facle.2d\t$dst, $src1, $src2}",
+ (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
+
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" #
+ "|faclt.4h\t$dst, $src1, $src2}",
+ (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" #
+ "|faclt.8h\t$dst, $src1, $src2}",
+ (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
+def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
+ "|faclt.2s\t$dst, $src1, $src2}",
+ (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" #
+ "|faclt.4s\t$dst, $src1, $src2}",
+ (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
+ "|faclt.2d\t$dst, $src1, $src2}",
+ (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD three scalar instructions.
+//===----------------------------------------------------------------------===//
+
+defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>;
+defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>;
+defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>;
+defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
+defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
+defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
+defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
+defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
+def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (FABD64 FPR64:$Rn, FPR64:$Rm)>;
+let Predicates = [HasFullFP16] in {
+def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>;
+}
+def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>;
+def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>;
+defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
+ int_aarch64_neon_facge>;
+defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
+ int_aarch64_neon_facgt>;
+defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
+defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
+defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
+defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx>;
+defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps>;
+defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts>;
+defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
+defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
+defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
+defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
+defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
+defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
+defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>;
+defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>;
+defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>;
+defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
+defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
+defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
+defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
+defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>;
+defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
+let Predicates = [HasRDM] in {
+ defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
+ defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
+ def : Pat<(i32 (int_aarch64_neon_sqadd
+ (i32 FPR32:$Rd),
+ (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
+ (i32 FPR32:$Rm))))),
+ (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
+ def : Pat<(i32 (int_aarch64_neon_sqsub
+ (i32 FPR32:$Rd),
+ (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
+ (i32 FPR32:$Rm))))),
+ (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
+}
+
+def : InstAlias<"cmls $dst, $src1, $src2",
+ (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
+def : InstAlias<"cmle $dst, $src1, $src2",
+ (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
+def : InstAlias<"cmlo $dst, $src1, $src2",
+ (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
+def : InstAlias<"cmlt $dst, $src1, $src2",
+ (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
+def : InstAlias<"fcmle $dst, $src1, $src2",
+ (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
+def : InstAlias<"fcmle $dst, $src1, $src2",
+ (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
+def : InstAlias<"fcmlt $dst, $src1, $src2",
+ (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
+def : InstAlias<"fcmlt $dst, $src1, $src2",
+ (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
+def : InstAlias<"facle $dst, $src1, $src2",
+ (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
+def : InstAlias<"facle $dst, $src1, $src2",
+ (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
+def : InstAlias<"faclt $dst, $src1, $src2",
+ (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
+def : InstAlias<"faclt $dst, $src1, $src2",
+ (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD three scalar instructions (mixed operands).
+//===----------------------------------------------------------------------===//
+defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
+ int_aarch64_neon_sqdmulls_scalar>;
+defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
+defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
+
+def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
+ (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
+ (i32 FPR32:$Rm))))),
+ (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
+def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
+ (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
+ (i32 FPR32:$Rm))))),
+ (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD two scalar instructions.
+//===----------------------------------------------------------------------===//
+
+defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", abs>;
+defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>;
+defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
+defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
+defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
+defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
+defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
+defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
+defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
+defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
+defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
+defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">;
+defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">;
+defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">;
+defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">;
+defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">;
+defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">;
+defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">;
+defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
+def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
+defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
+defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
+defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
+defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
+defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
+defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg",
+ UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
+defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>;
+defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
+defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
+defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
+defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
+defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
+ int_aarch64_neon_suqadd>;
+defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>;
+defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
+defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
+ int_aarch64_neon_usqadd>;
+
+def : Pat<(AArch64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>;
+
+def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
+ (FCVTASv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),
+ (FCVTAUv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))),
+ (FCVTMSv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))),
+ (FCVTMUv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))),
+ (FCVTNSv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))),
+ (FCVTNUv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
+ (FCVTPSv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
+ (FCVTPUv1i64 FPR64:$Rn)>;
+
+def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))),
+ (FRECPEv1f16 FPR16:$Rn)>;
+def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))),
+ (FRECPEv1i32 FPR32:$Rn)>;
+def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))),
+ (FRECPEv1i64 FPR64:$Rn)>;
+def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))),
+ (FRECPEv1i64 FPR64:$Rn)>;
+
+def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))),
+ (FRECPEv1i32 FPR32:$Rn)>;
+def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))),
+ (FRECPEv2f32 V64:$Rn)>;
+def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))),
+ (FRECPEv4f32 FPR128:$Rn)>;
+def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))),
+ (FRECPEv1i64 FPR64:$Rn)>;
+def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))),
+ (FRECPEv1i64 FPR64:$Rn)>;
+def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))),
+ (FRECPEv2f64 FPR128:$Rn)>;
+
+def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
+ (FRECPS32 FPR32:$Rn, FPR32:$Rm)>;
+def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
+ (FRECPSv2f32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
+ (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>;
+def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
+ (FRECPS64 FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
+ (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>;
+
+def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))),
+ (FRECPXv1f16 FPR16:$Rn)>;
+def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
+ (FRECPXv1i32 FPR32:$Rn)>;
+def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
+ (FRECPXv1i64 FPR64:$Rn)>;
+
+def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))),
+ (FRSQRTEv1f16 FPR16:$Rn)>;
+def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))),
+ (FRSQRTEv1i32 FPR32:$Rn)>;
+def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),
+ (FRSQRTEv1i64 FPR64:$Rn)>;
+def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))),
+ (FRSQRTEv1i64 FPR64:$Rn)>;
+
+def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))),
+ (FRSQRTEv1i32 FPR32:$Rn)>;
+def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))),
+ (FRSQRTEv2f32 V64:$Rn)>;
+def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))),
+ (FRSQRTEv4f32 FPR128:$Rn)>;
+def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))),
+ (FRSQRTEv1i64 FPR64:$Rn)>;
+def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))),
+ (FRSQRTEv1i64 FPR64:$Rn)>;
+def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))),
+ (FRSQRTEv2f64 FPR128:$Rn)>;
+
+def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
+ (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>;
+def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
+ (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
+ (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>;
+def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
+ (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
+ (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>;
+
+// If an integer is about to be converted to a floating point value,
+// just load it on the floating point unit.
+// Here are the patterns for 8 and 16-bits to float.
+// 8-bits -> float.
+multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
+ SDPatternOperator loadop, Instruction UCVTF,
+ ROAddrMode ro, Instruction LDRW, Instruction LDRX,
+ SubRegIndex sub> {
+ def : Pat<(DstTy (uint_to_fp (SrcTy
+ (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm,
+ ro.Wext:$extend))))),
+ (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
+ (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
+ sub))>;
+
+ def : Pat<(DstTy (uint_to_fp (SrcTy
+ (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm,
+ ro.Wext:$extend))))),
+ (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
+ (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
+ sub))>;
+}
+
+defm : UIntToFPROLoadPat<f32, i32, zextloadi8,
+ UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>;
+def : Pat <(f32 (uint_to_fp (i32
+ (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
+ (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
+def : Pat <(f32 (uint_to_fp (i32
+ (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
+ (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
+ (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
+// 16-bits -> float.
+defm : UIntToFPROLoadPat<f32, i32, zextloadi16,
+ UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>;
+def : Pat <(f32 (uint_to_fp (i32
+ (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
+ (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
+def : Pat <(f32 (uint_to_fp (i32
+ (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
+ (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
+ (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
+// 32-bits are handled in target specific dag combine:
+// performIntToFpCombine.
+// 64-bits integer to 32-bits floating point, not possible with
+// UCVTF on floating point registers (both source and destination
+// must have the same size).
+
+// Here are the patterns for 8, 16, 32, and 64-bits to double.
+// 8-bits -> double.
+defm : UIntToFPROLoadPat<f64, i32, zextloadi8,
+ UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>;
+def : Pat <(f64 (uint_to_fp (i32
+ (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
+def : Pat <(f64 (uint_to_fp (i32
+ (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
+ (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
+// 16-bits -> double.
+defm : UIntToFPROLoadPat<f64, i32, zextloadi16,
+ UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>;
+def : Pat <(f64 (uint_to_fp (i32
+ (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
+def : Pat <(f64 (uint_to_fp (i32
+ (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
+ (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
+// 32-bits -> double.
+defm : UIntToFPROLoadPat<f64, i32, load,
+ UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>;
+def : Pat <(f64 (uint_to_fp (i32
+ (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
+ (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>;
+def : Pat <(f64 (uint_to_fp (i32
+ (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
+ (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
+// 64-bits -> double are handled in target specific dag combine:
+// performIntToFpCombine.
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD three different-sized vector instructions.
+//===----------------------------------------------------------------------===//
+
+defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>;
+defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
+defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
+defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
+defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
+defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
+ int_aarch64_neon_sabd>;
+defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
+ int_aarch64_neon_sabd>;
+defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
+ BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
+defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
+ BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
+defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
+ TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
+defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
+ TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
+defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>;
+defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
+ int_aarch64_neon_sqadd>;
+defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
+ int_aarch64_neon_sqsub>;
+defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
+ int_aarch64_neon_sqdmull>;
+defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
+ BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
+defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
+ BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
+defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
+ int_aarch64_neon_uabd>;
+defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
+ BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
+defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
+ BinOpFrag<(add node:$LHS, (zext node:$RHS))>>;
+defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
+ TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
+defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
+ TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
+defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>;
+defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
+ BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>;
+defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",
+ BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>;
+
+// Additional patterns for SMULL and UMULL
+multiclass Neon_mul_widen_patterns<SDPatternOperator opnode,
+ Instruction INST8B, Instruction INST4H, Instruction INST2S> {
+ def : Pat<(v8i16 (opnode (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
+ (INST8B V64:$Rn, V64:$Rm)>;
+ def : Pat<(v4i32 (opnode (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
+ (INST4H V64:$Rn, V64:$Rm)>;
+ def : Pat<(v2i64 (opnode (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
+ (INST2S V64:$Rn, V64:$Rm)>;
+}
+
+defm : Neon_mul_widen_patterns<AArch64smull, SMULLv8i8_v8i16,
+ SMULLv4i16_v4i32, SMULLv2i32_v2i64>;
+defm : Neon_mul_widen_patterns<AArch64umull, UMULLv8i8_v8i16,
+ UMULLv4i16_v4i32, UMULLv2i32_v2i64>;
+
+// Patterns for smull2/umull2.
+multiclass Neon_mul_high_patterns<SDPatternOperator opnode,
+ Instruction INST8B, Instruction INST4H, Instruction INST2S> {
+ def : Pat<(v8i16 (opnode (extract_high_v16i8 V128:$Rn),
+ (extract_high_v16i8 V128:$Rm))),
+ (INST8B V128:$Rn, V128:$Rm)>;
+ def : Pat<(v4i32 (opnode (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 V128:$Rm))),
+ (INST4H V128:$Rn, V128:$Rm)>;
+ def : Pat<(v2i64 (opnode (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 V128:$Rm))),
+ (INST2S V128:$Rn, V128:$Rm)>;
+}
+
+defm : Neon_mul_high_patterns<AArch64smull, SMULLv16i8_v8i16,
+ SMULLv8i16_v4i32, SMULLv4i32_v2i64>;
+defm : Neon_mul_high_patterns<AArch64umull, UMULLv16i8_v8i16,
+ UMULLv8i16_v4i32, UMULLv4i32_v2i64>;
+
+// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL
+multiclass Neon_mulacc_widen_patterns<SDPatternOperator opnode,
+ Instruction INST8B, Instruction INST4H, Instruction INST2S> {
+ def : Pat<(v8i16 (opnode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
+ (INST8B V128:$Rd, V64:$Rn, V64:$Rm)>;
+ def : Pat<(v4i32 (opnode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
+ (INST4H V128:$Rd, V64:$Rn, V64:$Rm)>;
+ def : Pat<(v2i64 (opnode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
+ (INST2S V128:$Rd, V64:$Rn, V64:$Rm)>;
+}
+
+defm : Neon_mulacc_widen_patterns<
+ TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
+ SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
+defm : Neon_mulacc_widen_patterns<
+ TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
+ UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
+defm : Neon_mulacc_widen_patterns<
+ TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
+ SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
+defm : Neon_mulacc_widen_patterns<
+ TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
+ UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
+
+// Patterns for 64-bit pmull
+def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm),
+ (PMULLv1i64 V64:$Rn, V64:$Rm)>;
+def : Pat<(int_aarch64_neon_pmull64 (extractelt (v2i64 V128:$Rn), (i64 1)),
+ (extractelt (v2i64 V128:$Rm), (i64 1))),
+ (PMULLv2i64 V128:$Rn, V128:$Rm)>;
+
+// CodeGen patterns for addhn and subhn instructions, which can actually be
+// written in LLVM IR without too much difficulty.
+
+// ADDHN
+def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
+ (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
+ (i32 16))))),
+ (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
+ (i32 32))))),
+ (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(concat_vectors (v8i8 V64:$Rd),
+ (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
+ (i32 8))))),
+ (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+def : Pat<(concat_vectors (v4i16 V64:$Rd),
+ (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
+ (i32 16))))),
+ (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+def : Pat<(concat_vectors (v2i32 V64:$Rd),
+ (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
+ (i32 32))))),
+ (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+
+// SUBHN
+def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
+ (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
+ (i32 16))))),
+ (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
+ (i32 32))))),
+ (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(concat_vectors (v8i8 V64:$Rd),
+ (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
+ (i32 8))))),
+ (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+def : Pat<(concat_vectors (v4i16 V64:$Rd),
+ (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
+ (i32 16))))),
+ (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+def : Pat<(concat_vectors (v2i32 V64:$Rd),
+ (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
+ (i32 32))))),
+ (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+
+//----------------------------------------------------------------------------
+// AdvSIMD bitwise extract from vector instruction.
+//----------------------------------------------------------------------------
+
+defm EXT : SIMDBitwiseExtract<"ext">;
+
+def : Pat<(v4i16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
+ (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
+def : Pat<(v8i16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+ (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
+def : Pat<(v2i32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
+ (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
+def : Pat<(v2f32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
+ (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
+def : Pat<(v4i32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+ (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
+def : Pat<(v4f32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+ (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
+def : Pat<(v2i64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+ (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
+def : Pat<(v2f64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+ (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
+def : Pat<(v4f16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
+ (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
+def : Pat<(v8f16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+ (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
+
+// We use EXT to handle extract_subvector to copy the upper 64-bits of a
+// 128-bit vector.
+def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 8))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 4))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 4))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+
+
+//----------------------------------------------------------------------------
+// AdvSIMD zip vector
+//----------------------------------------------------------------------------
+
+defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>;
+defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>;
+defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>;
+defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
+defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
+defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
+
+//----------------------------------------------------------------------------
+// AdvSIMD TBL/TBX instructions
+//----------------------------------------------------------------------------
+
+defm TBL : SIMDTableLookup< 0, "tbl">;
+defm TBX : SIMDTableLookupTied<1, "tbx">;
+
+def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
+ (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>;
+def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
+ (TBLv16i8One V128:$Ri, V128:$Rn)>;
+
+def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd),
+ (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
+ (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>;
+def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
+ (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
+ (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
+
+
+//----------------------------------------------------------------------------
+// AdvSIMD scalar CPY instruction
+//----------------------------------------------------------------------------
+
+defm CPY : SIMDScalarCPY<"cpy">;
+
+//----------------------------------------------------------------------------
+// AdvSIMD scalar pairwise instructions
+//----------------------------------------------------------------------------
+
+defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">;
+defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">;
+defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
+defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
+defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
+defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
+def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
+def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
+def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
+ (FADDPv2i32p V64:$Rn)>;
+def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
+ (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
+def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))),
+ (FADDPv2i64p V128:$Rn)>;
+def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))),
+ (FMAXNMPv2i32p V64:$Rn)>;
+def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))),
+ (FMAXNMPv2i64p V128:$Rn)>;
+def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))),
+ (FMAXPv2i32p V64:$Rn)>;
+def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))),
+ (FMAXPv2i64p V128:$Rn)>;
+def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))),
+ (FMINNMPv2i32p V64:$Rn)>;
+def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))),
+ (FMINNMPv2i64p V128:$Rn)>;
+def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))),
+ (FMINPv2i32p V64:$Rn)>;
+def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))),
+ (FMINPv2i64p V128:$Rn)>;
+
+//----------------------------------------------------------------------------
+// AdvSIMD INS/DUP instructions
+//----------------------------------------------------------------------------
+
+def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
+def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
+def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
+def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
+def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
+def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
+def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
+
+def DUPv2i64lane : SIMDDup64FromElement;
+def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
+def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>;
+def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>;
+def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
+def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
+def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
+
+def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
+ (v2f32 (DUPv2i32lane
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
+ (i64 0)))>;
+def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))),
+ (v4f32 (DUPv4i32lane
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
+ (i64 0)))>;
+def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))),
+ (v2f64 (DUPv2i64lane
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
+ (i64 0)))>;
+def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))),
+ (v4f16 (DUPv4i16lane
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
+ (i64 0)))>;
+def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))),
+ (v8f16 (DUPv8i16lane
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
+ (i64 0)))>;
+
+def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
+ (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
+def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
+ (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
+
+def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
+ (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
+def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
+ (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>;
+def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
+ (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>;
+
+// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane
+// instruction even if the types don't match: we just have to remap the lane
+// carefully. N.b. this trick only applies to truncations.
+def VecIndex_x2 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
+}]>;
+def VecIndex_x4 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
+}]>;
+def VecIndex_x8 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
+}]>;
+
+multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT,
+ ValueType Src128VT, ValueType ScalVT,
+ Instruction DUP, SDNodeXForm IdxXFORM> {
+ def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn),
+ imm:$idx)))),
+ (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
+
+ def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn),
+ imm:$idx)))),
+ (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
+}
+
+defm : DUPWithTruncPats<v8i8, v4i16, v8i16, i32, DUPv8i8lane, VecIndex_x2>;
+defm : DUPWithTruncPats<v8i8, v2i32, v4i32, i32, DUPv8i8lane, VecIndex_x4>;
+defm : DUPWithTruncPats<v4i16, v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>;
+
+defm : DUPWithTruncPats<v16i8, v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>;
+defm : DUPWithTruncPats<v16i8, v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>;
+defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
+
+multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP,
+ SDNodeXForm IdxXFORM> {
+ def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn),
+ imm:$idx))))),
+ (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
+
+ def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn),
+ imm:$idx))))),
+ (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
+}
+
+defm : DUPWithTrunci64Pats<v8i8, DUPv8i8lane, VecIndex_x8>;
+defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane, VecIndex_x4>;
+defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane, VecIndex_x2>;
+
+defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>;
+defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>;
+defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>;
+
+// SMOV and UMOV definitions, with some extra patterns for convenience
+defm SMOV : SMov;
+defm UMOV : UMov;
+
+def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
+ (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>;
+def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
+ (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
+def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
+ (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
+def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
+ (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
+def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
+ (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
+def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
+ (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
+
+def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
+ VectorIndexB:$idx)))), i8),
+ (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
+def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
+ VectorIndexH:$idx)))), i16),
+ (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
+
+// Extracting i8 or i16 elements will have the zero-extend transformed to
+// an 'and' mask by type legalization since neither i8 nor i16 are legal types
+// for AArch64. Match these patterns here since UMOV already zeroes out the high
+// bits of the destination register.
+def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx),
+ (i32 0xff)),
+ (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>;
+def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),
+ (i32 0xffff)),
+ (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>;
+
+defm INS : SIMDIns;
+
+def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
+ (SUBREG_TO_REG (i32 0),
+ (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
+def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
+ (SUBREG_TO_REG (i32 0),
+ (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
+
+def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
+ (SUBREG_TO_REG (i32 0),
+ (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
+def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
+ (SUBREG_TO_REG (i32 0),
+ (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
+
+def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
+ (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
+def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
+ (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
+
+def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
+ (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
+ (i32 FPR32:$Rn), ssub))>;
+def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
+ (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (i32 FPR32:$Rn), ssub))>;
+
+def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
+ (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ (i64 FPR64:$Rn), dsub))>;
+
+def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
+ (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
+def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
+ (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
+
+def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
+def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
+
+def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
+
+def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
+ (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
+ (EXTRACT_SUBREG
+ (INSvi16lane
+ (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
+ VectorIndexS:$imm,
+ (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
+ (i64 0)),
+ dsub)>;
+
+def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
+ (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
+ (INSvi16lane
+ V128:$Rn, VectorIndexH:$imm,
+ (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
+ (i64 0))>;
+
+def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
+ (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
+ (EXTRACT_SUBREG
+ (INSvi32lane
+ (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
+ VectorIndexS:$imm,
+ (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
+ (i64 0)),
+ dsub)>;
+def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn),
+ (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
+ (INSvi32lane
+ V128:$Rn, VectorIndexS:$imm,
+ (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
+ (i64 0))>;
+def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
+ (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))),
+ (INSvi64lane
+ V128:$Rn, VectorIndexD:$imm,
+ (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)),
+ (i64 0))>;
+
+// Copy an element at a constant index in one vector into a constant indexed
+// element of another.
+// FIXME refactor to a shared class/dev parameterized on vector type, vector
+// index type and INS extension
+def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane
+ (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs),
+ VectorIndexB:$idx2)),
+ (v16i8 (INSvi8lane
+ V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2)
+ )>;
+def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane
+ (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs),
+ VectorIndexH:$idx2)),
+ (v8i16 (INSvi16lane
+ V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2)
+ )>;
+def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane
+ (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs),
+ VectorIndexS:$idx2)),
+ (v4i32 (INSvi32lane
+ V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2)
+ )>;
+def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
+ (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs),
+ VectorIndexD:$idx2)),
+ (v2i64 (INSvi64lane
+ V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
+ )>;
+
+multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
+ ValueType VTScal, Instruction INS> {
+ def : Pat<(VT128 (vector_insert V128:$src,
+ (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
+ imm:$Immd)),
+ (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
+
+ def : Pat<(VT128 (vector_insert V128:$src,
+ (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
+ imm:$Immd)),
+ (INS V128:$src, imm:$Immd,
+ (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
+
+ def : Pat<(VT64 (vector_insert V64:$src,
+ (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
+ imm:$Immd)),
+ (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
+ imm:$Immd, V128:$Rn, imm:$Immn),
+ dsub)>;
+
+ def : Pat<(VT64 (vector_insert V64:$src,
+ (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
+ imm:$Immd)),
+ (EXTRACT_SUBREG
+ (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
+ (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
+ dsub)>;
+}
+
+defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
+defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
+defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
+
+
+// Floating point vector extractions are codegen'd as either a sequence of
+// subregister extractions, or a MOV (aka CPY here, alias for DUP) if
+// the lane number is anything other than zero.
+def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
+ (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
+def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
+ (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
+def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
+ (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
+
+def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
+ (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>;
+def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
+ (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>;
+def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
+ (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>;
+
+// All concat_vectors operations are canonicalised to act on i64 vectors for
+// AArch64. In the general case we need an instruction, which had just as well be
+// INS.
+class ConcatPat<ValueType DstTy, ValueType SrcTy>
+ : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
+ (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
+
+def : ConcatPat<v2i64, v1i64>;
+def : ConcatPat<v2f64, v1f64>;
+def : ConcatPat<v4i32, v2i32>;
+def : ConcatPat<v4f32, v2f32>;
+def : ConcatPat<v8i16, v4i16>;
+def : ConcatPat<v8f16, v4f16>;
+def : ConcatPat<v16i8, v8i8>;
+
+// If the high lanes are undef, though, we can just ignore them:
+class ConcatUndefPat<ValueType DstTy, ValueType SrcTy>
+ : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
+
+def : ConcatUndefPat<v2i64, v1i64>;
+def : ConcatUndefPat<v2f64, v1f64>;
+def : ConcatUndefPat<v4i32, v2i32>;
+def : ConcatUndefPat<v4f32, v2f32>;
+def : ConcatUndefPat<v8i16, v4i16>;
+def : ConcatUndefPat<v16i8, v8i8>;
+
+//----------------------------------------------------------------------------
+// AdvSIMD across lanes instructions
+//----------------------------------------------------------------------------
+
+defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">;
+defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">;
+defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">;
+defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
+defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
+defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
+defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
+defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>;
+defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
+defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
+defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
+
+// Patterns for across-vector intrinsics, that have a node equivalent, that
+// returns a vector (with only the low lane defined) instead of a scalar.
+// In effect, opNode is the same as (scalar_to_vector (IntNode)).
+multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
+ SDPatternOperator opNode> {
+// If a lane instruction caught the vector_extract around opNode, we can
+// directly match the latter to the instruction.
+def : Pat<(v8i8 (opNode V64:$Rn)),
+ (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
+def : Pat<(v16i8 (opNode V128:$Rn)),
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
+def : Pat<(v4i16 (opNode V64:$Rn)),
+ (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
+def : Pat<(v8i16 (opNode V128:$Rn)),
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
+def : Pat<(v4i32 (opNode V128:$Rn)),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
+
+
+// If none did, fallback to the explicit patterns, consuming the vector_extract.
+def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
+ (i32 0)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
+ bsub), ssub)>;
+def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
+ bsub), ssub)>;
+def : Pat<(i32 (vector_extract (insert_subvector undef,
+ (v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
+ hsub), ssub)>;
+def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
+ hsub), ssub)>;
+def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
+ ssub), ssub)>;
+
+}
+
+multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
+ SDPatternOperator opNode>
+ : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
+// If there is a sign extension after this intrinsic, consume it as smov already
+// performed it
+def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
+ (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ (i64 0)))>;
+def : Pat<(i32 (sext_inreg (i32 (vector_extract
+ (opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
+ (i64 0)))>;
+def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
+ (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)),
+ (i32 (SMOVvi16to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
+ (i64 0)))>;
+def : Pat<(i32 (sext_inreg (i32 (vector_extract
+ (opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
+ (i32 (SMOVvi16to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
+ (i64 0)))>;
+}
+
+multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
+ SDPatternOperator opNode>
+ : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
+// If there is a masking operation keeping only what has been actually
+// generated, consume it.
+def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
+ (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ ssub))>;
+def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
+ maski8_or_more)),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
+ ssub))>;
+def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
+ (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
+ ssub))>;
+def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
+ maski16_or_more)),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
+ ssub))>;
+}
+
+defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
+ (ADDPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
+ (ADDPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
+def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
+ (SMAXPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
+def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
+ (SMINPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
+def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
+ (UMAXPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
+def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
+ (UMINPv2i32 V64:$Rn, V64:$Rn)>;
+
+multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
+ def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
+ (i32 (SMOVvi16to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
+ (i64 0)))>;
+def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
+ (i32 (SMOVvi16to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
+ (i64 0)))>;
+
+def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
+ ssub))>;
+def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
+ ssub))>;
+
+def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
+ (i64 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
+ dsub))>;
+}
+
+multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc,
+ Intrinsic intOp> {
+ def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
+ ssub))>;
+def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
+ ssub))>;
+
+def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
+ ssub))>;
+def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
+ ssub))>;
+
+def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
+ (i64 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
+ dsub))>;
+}
+
+defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
+defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
+
+// The vaddlv_s32 intrinsic gets mapped to SADDLP.
+def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))),
+ (i64 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (SADDLPv2i32_v1i64 V64:$Rn), dsub),
+ dsub))>;
+// The vaddlv_u32 intrinsic gets mapped to UADDLP.
+def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))),
+ (i64 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (UADDLPv2i32_v1i64 V64:$Rn), dsub),
+ dsub))>;
+
+//------------------------------------------------------------------------------
+// AdvSIMD modified immediate instructions
+//------------------------------------------------------------------------------
+
+// AdvSIMD BIC
+defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>;
+// AdvSIMD ORR
+defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>;
+
+def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
+
+def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
+
+def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
+
+def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
+
+// AdvSIMD FMOV
+def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
+ "fmov", ".2d",
+ [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8,
+ "fmov", ".2s",
+ [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
+ "fmov", ".4s",
+ [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8,
+ "fmov", ".4h",
+ [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
+ "fmov", ".8h",
+ [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+} // Predicates = [HasNEON, HasFullFP16]
+
+// AdvSIMD MOVI
+
+// EDIT byte mask: scalar
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
+ [(set FPR64:$Rd, simdimmtype10:$imm8)]>;
+// The movi_edit node has the immediate value already encoded, so we use
+// a plain imm0_255 here.
+def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
+ (MOVID imm0_255:$shift)>;
+
+def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>;
+def : Pat<(v2i32 immAllZerosV), (MOVID (i32 0))>;
+def : Pat<(v4i16 immAllZerosV), (MOVID (i32 0))>;
+def : Pat<(v8i8 immAllZerosV), (MOVID (i32 0))>;
+
+def : Pat<(v1i64 immAllOnesV), (MOVID (i32 255))>;
+def : Pat<(v2i32 immAllOnesV), (MOVID (i32 255))>;
+def : Pat<(v4i16 immAllOnesV), (MOVID (i32 255))>;
+def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>;
+
+// EDIT byte mask: 2d
+
+// The movi_edit node has the immediate value already encoded, so we use
+// a plain imm0_255 in the pattern
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
+ simdimmtype10,
+ "movi", ".2d",
+ [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
+
+def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
+
+def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
+def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
+def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
+def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
+
+// EDIT per word & halfword: 2s, 4h, 4s, & 8h
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
+
+def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
+
+def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
+
+def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
+ (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
+def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
+ (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
+def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
+ (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
+def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
+ (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+// EDIT per word: 2s & 4s with MSL shifter
+def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
+ [(set (v2i32 V64:$Rd),
+ (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
+def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
+ [(set (v4i32 V128:$Rd),
+ (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
+
+// Per byte: 8b & 16b
+def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255,
+ "movi", ".8b",
+ [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
+
+def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
+ "movi", ".16b",
+ [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
+}
+
+// AdvSIMD MVNI
+
+// EDIT per word & halfword: 2s, 4h, 4s, & 8h
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
+
+def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
+
+def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
+
+def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
+ (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
+def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
+ (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
+def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
+ (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
+def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
+ (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
+
+// EDIT per word: 2s & 4s with MSL shifter
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
+ [(set (v2i32 V64:$Rd),
+ (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
+def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
+ [(set (v4i32 V128:$Rd),
+ (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD indexed element
+//----------------------------------------------------------------------------
+
+let hasSideEffects = 0 in {
+ defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">;
+ defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">;
+}
+
+// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
+// instruction expects the addend first, while the intrinsic expects it last.
+
+// On the other hand, there are quite a few valid combinatorial options due to
+// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
+defm : SIMDFPIndexedTiedPatterns<"FMLA",
+ TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>;
+defm : SIMDFPIndexedTiedPatterns<"FMLA",
+ TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>;
+
+defm : SIMDFPIndexedTiedPatterns<"FMLS",
+ TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
+defm : SIMDFPIndexedTiedPatterns<"FMLS",
+ TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
+defm : SIMDFPIndexedTiedPatterns<"FMLS",
+ TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
+defm : SIMDFPIndexedTiedPatterns<"FMLS",
+ TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
+
+multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
+ // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
+ // and DUP scalar.
+ def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
+ (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
+ VectorIndexS:$idx))),
+ (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
+ def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
+ (v2f32 (AArch64duplane32
+ (v4f32 (insert_subvector undef,
+ (v2f32 (fneg V64:$Rm)),
+ (i32 0))),
+ VectorIndexS:$idx)))),
+ (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
+ (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
+ VectorIndexS:$idx)>;
+ def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
+ (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
+ (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
+ (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
+
+ // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
+ // and DUP scalar.
+ def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
+ (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
+ VectorIndexS:$idx))),
+ (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm,
+ VectorIndexS:$idx)>;
+ def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
+ (v4f32 (AArch64duplane32
+ (v4f32 (insert_subvector undef,
+ (v2f32 (fneg V64:$Rm)),
+ (i32 0))),
+ VectorIndexS:$idx)))),
+ (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
+ (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
+ VectorIndexS:$idx)>;
+ def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
+ (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
+ (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
+ (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
+
+ // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
+ // (DUPLANE from 64-bit would be trivial).
+ def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
+ (AArch64duplane64 (v2f64 (fneg V128:$Rm)),
+ VectorIndexD:$idx))),
+ (FMLSv2i64_indexed
+ V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
+ def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
+ (AArch64dup (f64 (fneg FPR64Op:$Rm))))),
+ (FMLSv2i64_indexed V128:$Rd, V128:$Rn,
+ (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
+
+ // 2 variants for 32-bit scalar version: extract from .2s or from .4s
+ def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
+ (vector_extract (v4f32 (fneg V128:$Rm)),
+ VectorIndexS:$idx))),
+ (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
+ V128:$Rm, VectorIndexS:$idx)>;
+ def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
+ (vector_extract (v4f32 (insert_subvector undef,
+ (v2f32 (fneg V64:$Rm)),
+ (i32 0))),
+ VectorIndexS:$idx))),
+ (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
+ (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
+
+ // 1 variant for 64-bit scalar version: extract from .1d or from .2d
+ def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
+ (vector_extract (v2f64 (fneg V128:$Rm)),
+ VectorIndexS:$idx))),
+ (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn,
+ V128:$Rm, VectorIndexS:$idx)>;
+}
+
+defm : FMLSIndexedAfterNegPatterns<
+ TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
+defm : FMLSIndexedAfterNegPatterns<
+ TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >;
+
+defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
+defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", fmul>;
+
+def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
+ (FMULv2i32_indexed V64:$Rn,
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
+ (i64 0))>;
+def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
+ (FMULv4i32_indexed V128:$Rn,
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
+ (i64 0))>;
+def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
+ (FMULv2i64_indexed V128:$Rn,
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
+ (i64 0))>;
+
+defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
+defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
+defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla",
+ TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>;
+defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls",
+ TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>;
+defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
+defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
+ TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
+defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
+ TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
+defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull",
+ int_aarch64_neon_smull>;
+defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
+ int_aarch64_neon_sqadd>;
+defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
+ int_aarch64_neon_sqsub>;
+defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
+ int_aarch64_neon_sqadd>;
+defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
+ int_aarch64_neon_sqsub>;
+defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
+defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
+ TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
+defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
+ TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
+defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull",
+ int_aarch64_neon_umull>;
+
+// A scalar sqdmull with the second operand being a vector lane can be
+// handled directly with the indexed instruction encoding.
+def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
+ (vector_extract (v4i32 V128:$Vm),
+ VectorIndexS:$idx)),
+ (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
+
+//----------------------------------------------------------------------------
+// AdvSIMD scalar shift instructions
+//----------------------------------------------------------------------------
+defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
+defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
+defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
+defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
+// Codegen patterns for the above. We don't put these directly on the
+// instructions because TableGen's type inference can't handle the truth.
+// Having the same base pattern for fp <--> int totally freaks it out.
+def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
+ (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
+def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
+ (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
+def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
+ (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
+def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
+ (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
+def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
+ vecshiftR64:$imm)),
+ (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
+def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
+ vecshiftR64:$imm)),
+ (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
+def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
+ (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
+def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
+ (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
+def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
+ vecshiftR64:$imm)),
+ (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
+def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
+ (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
+def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
+ vecshiftR64:$imm)),
+ (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
+def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
+ (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
+
+// Patterns for FP16 Instrinsics - requires reg copy to/from as i16s not supported.
+
+def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
+ (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
+def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
+ (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
+def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
+ (and FPR32:$Rn, (i32 65535)),
+ vecshiftR16:$imm)),
+ (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
+def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
+ (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
+def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
+ (UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
+def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
+ (i32 (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)),
+ (FCVTZSh FPR16:$Rn, vecshiftR32:$imm),
+ hsub))>;
+def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)),
+ (i64 (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)),
+ (FCVTZSh FPR16:$Rn, vecshiftR64:$imm),
+ hsub))>;
+def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)),
+ (i32 (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)),
+ (FCVTZUh FPR16:$Rn, vecshiftR32:$imm),
+ hsub))>;
+def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
+ (i64 (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)),
+ (FCVTZUh FPR16:$Rn, vecshiftR64:$imm),
+ hsub))>;
+
+defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>;
+defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
+defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
+ int_aarch64_neon_sqrshrn>;
+defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
+ int_aarch64_neon_sqrshrun>;
+defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
+defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
+defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
+ int_aarch64_neon_sqshrn>;
+defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
+ int_aarch64_neon_sqshrun>;
+defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">;
+defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>;
+defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra",
+ TriOpFrag<(add node:$LHS,
+ (AArch64srshri node:$MHS, node:$RHS))>>;
+defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>;
+defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra",
+ TriOpFrag<(add node:$LHS,
+ (AArch64vashr node:$MHS, node:$RHS))>>;
+defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
+ int_aarch64_neon_uqrshrn>;
+defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
+defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
+ int_aarch64_neon_uqshrn>;
+defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>;
+defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra",
+ TriOpFrag<(add node:$LHS,
+ (AArch64urshri node:$MHS, node:$RHS))>>;
+defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>;
+defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra",
+ TriOpFrag<(add node:$LHS,
+ (AArch64vlshr node:$MHS, node:$RHS))>>;
+
+//----------------------------------------------------------------------------
+// AdvSIMD vector shift instructions
+//----------------------------------------------------------------------------
+defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
+defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
+defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
+ int_aarch64_neon_vcvtfxs2fp>;
+defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
+ int_aarch64_neon_rshrn>;
+defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
+defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
+ BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
+defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>;
+def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
+ (i32 vecshiftL64:$imm))),
+ (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
+defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
+ int_aarch64_neon_sqrshrn>;
+defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
+ int_aarch64_neon_sqrshrun>;
+defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
+defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
+defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
+ int_aarch64_neon_sqshrn>;
+defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
+ int_aarch64_neon_sqshrun>;
+defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>;
+def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
+ (i32 vecshiftR64:$imm))),
+ (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
+defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
+defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
+ TriOpFrag<(add node:$LHS,
+ (AArch64srshri node:$MHS, node:$RHS))> >;
+defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
+ BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;
+
+defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
+defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
+ TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
+defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
+ int_aarch64_neon_vcvtfxu2fp>;
+defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
+ int_aarch64_neon_uqrshrn>;
+defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
+defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
+ int_aarch64_neon_uqshrn>;
+defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
+defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
+ TriOpFrag<(add node:$LHS,
+ (AArch64urshri node:$MHS, node:$RHS))> >;
+defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
+ BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
+defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
+defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
+ TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
+
+// SHRN patterns for when a logical right shift was used instead of arithmetic
+// (the immediate guarantees no sign bits actually end up in the result so it
+// doesn't matter).
+def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
+ (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>;
+def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
+ (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>;
+def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
+ (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>;
+
+def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd),
+ (trunc (AArch64vlshr (v8i16 V128:$Rn),
+ vecshiftR16Narrow:$imm)))),
+ (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, vecshiftR16Narrow:$imm)>;
+def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd),
+ (trunc (AArch64vlshr (v4i32 V128:$Rn),
+ vecshiftR32Narrow:$imm)))),
+ (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, vecshiftR32Narrow:$imm)>;
+def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
+ (trunc (AArch64vlshr (v2i64 V128:$Rn),
+ vecshiftR64Narrow:$imm)))),
+ (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, vecshiftR32Narrow:$imm)>;
+
+// Vector sign and zero extensions are implemented with SSHLL and USSHLL.
+// Anyexts are implemented as zexts.
+def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>;
+def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>;
+def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>;
+def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>;
+def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
+def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
+def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>;
+def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
+def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
+// Also match an extend from the upper half of a 128 bit source register.
+def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
+ (USHLLv16i8_shift V128:$Rn, (i32 0))>;
+def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
+ (USHLLv16i8_shift V128:$Rn, (i32 0))>;
+def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
+ (SSHLLv16i8_shift V128:$Rn, (i32 0))>;
+def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
+ (USHLLv8i16_shift V128:$Rn, (i32 0))>;
+def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
+ (USHLLv8i16_shift V128:$Rn, (i32 0))>;
+def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
+ (SSHLLv8i16_shift V128:$Rn, (i32 0))>;
+def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
+ (USHLLv4i32_shift V128:$Rn, (i32 0))>;
+def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
+ (USHLLv4i32_shift V128:$Rn, (i32 0))>;
+def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
+ (SSHLLv4i32_shift V128:$Rn, (i32 0))>;
+
+// Vector shift sxtl aliases
+def : InstAlias<"sxtl.8h $dst, $src1",
+ (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"sxtl $dst.8h, $src1.8b",
+ (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"sxtl.4s $dst, $src1",
+ (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"sxtl $dst.4s, $src1.4h",
+ (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"sxtl.2d $dst, $src1",
+ (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"sxtl $dst.2d, $src1.2s",
+ (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
+
+// Vector shift sxtl2 aliases
+def : InstAlias<"sxtl2.8h $dst, $src1",
+ (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"sxtl2 $dst.8h, $src1.16b",
+ (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"sxtl2.4s $dst, $src1",
+ (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"sxtl2 $dst.4s, $src1.8h",
+ (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"sxtl2.2d $dst, $src1",
+ (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"sxtl2 $dst.2d, $src1.4s",
+ (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
+
+// Vector shift uxtl aliases
+def : InstAlias<"uxtl.8h $dst, $src1",
+ (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"uxtl $dst.8h, $src1.8b",
+ (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"uxtl.4s $dst, $src1",
+ (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"uxtl $dst.4s, $src1.4h",
+ (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"uxtl.2d $dst, $src1",
+ (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"uxtl $dst.2d, $src1.2s",
+ (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
+
+// Vector shift uxtl2 aliases
+def : InstAlias<"uxtl2.8h $dst, $src1",
+ (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"uxtl2 $dst.8h, $src1.16b",
+ (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"uxtl2.4s $dst, $src1",
+ (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"uxtl2 $dst.4s, $src1.8h",
+ (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"uxtl2.2d $dst, $src1",
+ (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
+ (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
+
+// If an integer is about to be converted to a floating point value,
+// just load it on the floating point unit.
+// These patterns are more complex because floating point loads do not
+// support sign extension.
+// The sign extension has to be explicitly added and is only supported for
+// one step: byte-to-half, half-to-word, word-to-doubleword.
+// SCVTF GPR -> FPR is 9 cycles.
+// SCVTF FPR -> FPR is 4 cyclces.
+// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
+// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
+// and still being faster.
+// However, this is not good for code size.
+// 8-bits -> float. 2 sizes step-up.
+class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
+ : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))),
+ (SCVTFv1i32 (f32 (EXTRACT_SUBREG
+ (SSHLLv4i16_shift
+ (f64
+ (EXTRACT_SUBREG
+ (SSHLLv8i8_shift
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ INST,
+ bsub),
+ 0),
+ dsub)),
+ 0),
+ ssub)))>,
+ Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
+
+def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
+ (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
+def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext),
+ (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>;
+def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset),
+ (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>;
+def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset),
+ (LDURBi GPR64sp:$Rn, simm9:$offset)>;
+
+// 16-bits -> float. 1 size step-up.
+class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
+ : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))),
+ (SCVTFv1i32 (f32 (EXTRACT_SUBREG
+ (SSHLLv4i16_shift
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ INST,
+ hsub),
+ 0),
+ ssub)))>, Requires<[NotForCodeSize]>;
+
+def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
+ (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
+def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
+ (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
+def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
+ (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
+def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
+ (LDURHi GPR64sp:$Rn, simm9:$offset)>;
+
+// 32-bits to 32-bits are handled in target specific dag combine:
+// performIntToFpCombine.
+// 64-bits integer to 32-bits floating point, not possible with
+// SCVTF on floating point registers (both source and destination
+// must have the same size).
+
+// Here are the patterns for 8, 16, 32, and 64-bits to double.
+// 8-bits -> double. 3 size step-up: give up.
+// 16-bits -> double. 2 size step.
+class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
+ : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))),
+ (SCVTFv1i64 (f64 (EXTRACT_SUBREG
+ (SSHLLv2i32_shift
+ (f64
+ (EXTRACT_SUBREG
+ (SSHLLv4i16_shift
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ INST,
+ hsub),
+ 0),
+ dsub)),
+ 0),
+ dsub)))>,
+ Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
+
+def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
+ (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
+def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
+ (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
+def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
+ (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
+def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
+ (LDURHi GPR64sp:$Rn, simm9:$offset)>;
+// 32-bits -> double. 1 size step-up.
+class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
+ : Pat <(f64 (sint_to_fp (i32 (load addrmode)))),
+ (SCVTFv1i64 (f64 (EXTRACT_SUBREG
+ (SSHLLv2i32_shift
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ INST,
+ ssub),
+ 0),
+ dsub)))>, Requires<[NotForCodeSize]>;
+
+def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
+ (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
+def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext),
+ (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>;
+def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset),
+ (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;
+def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset),
+ (LDURSi GPR64sp:$Rn, simm9:$offset)>;
+
+// 64-bits -> double are handled in target specific dag combine:
+// performIntToFpCombine.
+
+
+//----------------------------------------------------------------------------
+// AdvSIMD Load-Store Structure
+//----------------------------------------------------------------------------
+defm LD1 : SIMDLd1Multiple<"ld1">;
+defm LD2 : SIMDLd2Multiple<"ld2">;
+defm LD3 : SIMDLd3Multiple<"ld3">;
+defm LD4 : SIMDLd4Multiple<"ld4">;
+
+defm ST1 : SIMDSt1Multiple<"st1">;
+defm ST2 : SIMDSt2Multiple<"st2">;
+defm ST3 : SIMDSt3Multiple<"st3">;
+defm ST4 : SIMDSt4Multiple<"st4">;
+
+class Ld1Pat<ValueType ty, Instruction INST>
+ : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>;
+
+def : Ld1Pat<v16i8, LD1Onev16b>;
+def : Ld1Pat<v8i16, LD1Onev8h>;
+def : Ld1Pat<v4i32, LD1Onev4s>;
+def : Ld1Pat<v2i64, LD1Onev2d>;
+def : Ld1Pat<v8i8, LD1Onev8b>;
+def : Ld1Pat<v4i16, LD1Onev4h>;
+def : Ld1Pat<v2i32, LD1Onev2s>;
+def : Ld1Pat<v1i64, LD1Onev1d>;
+
+class St1Pat<ValueType ty, Instruction INST>
+ : Pat<(store ty:$Vt, GPR64sp:$Rn),
+ (INST ty:$Vt, GPR64sp:$Rn)>;
+
+def : St1Pat<v16i8, ST1Onev16b>;
+def : St1Pat<v8i16, ST1Onev8h>;
+def : St1Pat<v4i32, ST1Onev4s>;
+def : St1Pat<v2i64, ST1Onev2d>;
+def : St1Pat<v8i8, ST1Onev8b>;
+def : St1Pat<v4i16, ST1Onev4h>;
+def : St1Pat<v2i32, ST1Onev2s>;
+def : St1Pat<v1i64, ST1Onev1d>;
+
+//---
+// Single-element
+//---
+
+defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
+defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
+defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
+defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
+let mayLoad = 1, hasSideEffects = 0 in {
+defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>;
+defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>;
+defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>;
+defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>;
+defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>;
+defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>;
+defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>;
+defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>;
+defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>;
+defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>;
+defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>;
+defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>;
+defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>;
+defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>;
+defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>;
+defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>;
+}
+
+def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
+ (LD1Rv8b GPR64sp:$Rn)>;
+def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
+ (LD1Rv16b GPR64sp:$Rn)>;
+def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
+ (LD1Rv4h GPR64sp:$Rn)>;
+def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
+ (LD1Rv8h GPR64sp:$Rn)>;
+def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
+ (LD1Rv2s GPR64sp:$Rn)>;
+def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
+ (LD1Rv4s GPR64sp:$Rn)>;
+def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
+ (LD1Rv2d GPR64sp:$Rn)>;
+def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
+ (LD1Rv1d GPR64sp:$Rn)>;
+// Grab the floating point version too
+def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
+ (LD1Rv2s GPR64sp:$Rn)>;
+def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
+ (LD1Rv4s GPR64sp:$Rn)>;
+def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
+ (LD1Rv2d GPR64sp:$Rn)>;
+def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
+ (LD1Rv1d GPR64sp:$Rn)>;
+def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
+ (LD1Rv4h GPR64sp:$Rn)>;
+def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
+ (LD1Rv8h GPR64sp:$Rn)>;
+
+class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction LD1>
+ : Pat<(vector_insert (VTy VecListOne128:$Rd),
+ (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
+ (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;
+
+def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>;
+def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
+def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>;
+def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>;
+def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>;
+def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>;
+def : Ld1Lane128Pat<load, VectorIndexH, v8f16, f16, LD1i16>;
+
+class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction LD1>
+ : Pat<(vector_insert (VTy VecListOne64:$Rd),
+ (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
+ (EXTRACT_SUBREG
+ (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
+ VecIndex:$idx, GPR64sp:$Rn),
+ dsub)>;
+
+def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>;
+def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
+def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>;
+def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>;
+def : Ld1Lane64Pat<load, VectorIndexH, v4f16, f16, LD1i16>;
+
+
+defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
+defm LD2 : SIMDLdSt2SingleAliases<"ld2">;
+defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
+defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
+
+// Stores
+defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>;
+defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>;
+defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
+defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
+
+let AddedComplexity = 19 in
+class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction ST1>
+ : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
+ GPR64sp:$Rn),
+ (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>;
+
+def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>;
+def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
+def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>;
+def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>;
+def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>;
+def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>;
+def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>;
+
+let AddedComplexity = 19 in
+class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction ST1>
+ : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
+ GPR64sp:$Rn),
+ (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
+ VecIndex:$idx, GPR64sp:$Rn)>;
+
+def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>;
+def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
+def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>;
+def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>;
+def : St1Lane64Pat<store, VectorIndexH, v4f16, f16, ST1i16>;
+
+multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction ST1,
+ int offset> {
+ def : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
+ GPR64sp:$Rn, offset),
+ (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
+ VecIndex:$idx, GPR64sp:$Rn, XZR)>;
+
+ def : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
+ GPR64sp:$Rn, GPR64:$Rm),
+ (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
+ VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
+}
+
+defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>;
+defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST,
+ 2>;
+defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>;
+defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
+defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
+defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
+defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>;
+
+multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction ST1,
+ int offset> {
+ def : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
+ GPR64sp:$Rn, offset),
+ (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>;
+
+ def : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
+ GPR64sp:$Rn, GPR64:$Rm),
+ (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
+}
+
+defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
+ 1>;
+defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
+ 2>;
+defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
+defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
+defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
+defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
+defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>;
+
+let mayStore = 1, hasSideEffects = 0 in {
+defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>;
+defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>;
+defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>;
+defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>;
+defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>;
+defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>;
+defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
+defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
+defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>;
+defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>;
+defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>;
+defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>;
+}
+
+defm ST1 : SIMDLdSt1SingleAliases<"st1">;
+defm ST2 : SIMDLdSt2SingleAliases<"st2">;
+defm ST3 : SIMDLdSt3SingleAliases<"st3">;
+defm ST4 : SIMDLdSt4SingleAliases<"st4">;
+
+//----------------------------------------------------------------------------
+// Crypto extensions
+//----------------------------------------------------------------------------
+
+let Predicates = [HasAES] in {
+def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>;
+def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>;
+def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>;
+def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>;
+}
+
+// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required
+// for AES fusion on some CPUs.
+let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
+def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
+ Sched<[WriteV]>;
+def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
+ Sched<[WriteV]>;
+}
+
+// Only use constrained versions of AES(I)MC instructions if they are paired with
+// AESE/AESD.
+def : Pat<(v16i8 (int_aarch64_crypto_aesmc
+ (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1),
+ (v16i8 V128:$src2))))),
+ (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1),
+ (v16i8 V128:$src2)))))>,
+ Requires<[HasFuseAES]>;
+
+def : Pat<(v16i8 (int_aarch64_crypto_aesimc
+ (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1),
+ (v16i8 V128:$src2))))),
+ (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1),
+ (v16i8 V128:$src2)))))>,
+ Requires<[HasFuseAES]>;
+
+let Predicates = [HasSHA2] in {
+def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>;
+def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>;
+def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>;
+def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>;
+def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>;
+def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>;
+def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>;
+
+def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>;
+def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>;
+def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>;
+}
+
+//----------------------------------------------------------------------------
+// Compiler-pseudos
+//----------------------------------------------------------------------------
+// FIXME: Like for X86, these should go in their own separate .td file.
+
+def def32 : PatLeaf<(i32 GPR32:$src), [{
+ return isDef32(*N);
+}]>;
+
+// In the case of a 32-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>;
+
+// For an anyext, we don't care what the high bits are, so we can perform an
+// INSERT_SUBREF into an IMPLICIT_DEF.
+def : Pat<(i64 (anyext GPR32:$src)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
+
+// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
+// then assert the extension has happened.
+def : Pat<(i64 (zext GPR32:$src)),
+ (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
+
+// To sign extend, we use a signed bitfield move instruction (SBFM) on the
+// containing super-reg.
+def : Pat<(i64 (sext GPR32:$src)),
+ (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
+def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>;
+def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>;
+def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>;
+def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>;
+def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>;
+def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>;
+def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>;
+
+def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)),
+ (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
+ (i64 (i32shift_sext_i8 imm0_31:$imm)))>;
+def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)),
+ (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
+ (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
+
+def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)),
+ (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
+ (i64 (i32shift_sext_i16 imm0_31:$imm)))>;
+def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)),
+ (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
+ (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
+
+def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)),
+ (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
+ (i64 (i64shift_a imm0_63:$imm)),
+ (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
+
+// sra patterns have an AddedComplexity of 10, so make sure we have a higher
+// AddedComplexity for the following patterns since we want to match sext + sra
+// patterns before we attempt to match a single sra node.
+let AddedComplexity = 20 in {
+// We support all sext + sra combinations which preserve at least one bit of the
+// original value which is to be sign extended. E.g. we support shifts up to
+// bitwidth-1 bits.
+def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)),
+ (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>;
+def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)),
+ (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>;
+
+def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)),
+ (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>;
+def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)),
+ (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>;
+
+def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)),
+ (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
+ (i64 imm0_31:$imm), 31)>;
+} // AddedComplexity = 20
+
+// To truncate, we can simply extract from a subregister.
+def : Pat<(i32 (trunc GPR64sp:$src)),
+ (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>;
+
+// __builtin_trap() uses the BRK instruction on AArch64.
+def : Pat<(trap), (BRK 1)>;
+
+// Conversions within AdvSIMD types in the same register size are free.
+// But because we need a consistent lane ordering, in big endian many
+// conversions require one or more REV instructions.
+//
+// Consider a simple memory load followed by a bitconvert then a store.
+// v0 = load v2i32
+// v1 = BITCAST v2i32 v0 to v4i16
+// store v4i16 v2
+//
+// In big endian mode every memory access has an implicit byte swap. LDR and
+// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
+// is, they treat the vector as a sequence of elements to be byte-swapped.
+// The two pairs of instructions are fundamentally incompatible. We've decided
+// to use LD1/ST1 only to simplify compiler implementation.
+//
+// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes
+// the original code sequence:
+// v0 = load v2i32
+// v1 = REV v2i32 (implicit)
+// v2 = BITCAST v2i32 v1 to v4i16
+// v3 = REV v4i16 v2 (implicit)
+// store v4i16 v3
+//
+// But this is now broken - the value stored is different to the value loaded
+// due to lane reordering. To fix this, on every BITCAST we must perform two
+// other REVs:
+// v0 = load v2i32
+// v1 = REV v2i32 (implicit)
+// v2 = REV v2i32
+// v3 = BITCAST v2i32 v2 to v4i16
+// v4 = REV v4i16
+// v5 = REV v4i16 v4 (implicit)
+// store v4i16 v5
+//
+// This means an extra two instructions, but actually in most cases the two REV
+// instructions can be combined into one. For example:
+// (REV64_2s (REV64_4h X)) === (REV32_4h X)
+//
+// There is also no 128-bit REV instruction. This must be synthesized with an
+// EXT instruction.
+//
+// Most bitconverts require some sort of conversion. The only exceptions are:
+// a) Identity conversions - vNfX <-> vNiX
+// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
+//
+
+// Natural vector casts (64 bit)
+def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2f32 (AArch64NvCast (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1f64 (AArch64NvCast (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
+
+def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+// Natural vector casts (128 bit)
+def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+
+def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v8i8 (bitconvert GPR64:$Xn)),
+ (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
+def : Pat<(v4i16 (bitconvert GPR64:$Xn)),
+ (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
+def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
+ (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
+def : Pat<(v4f16 (bitconvert GPR64:$Xn)),
+ (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
+def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
+ (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
+
+def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))),
+ (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
+def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
+ (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
+def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
+ (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
+def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
+ (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
+def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
+ (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
+}
+def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)),
+ (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)),
+ (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
+
+def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
+ (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;
+def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))),
+ (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>;
+def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))),
+ (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))),
+ (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>;
+def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))),
+ (v1i64 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))),
+ (v1i64 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))),
+ (v1i64 (REV64v8i8 FPR64:$src))>;
+def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))),
+ (v1i64 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
+ (v1i64 (REV64v2i32 FPR64:$src))>;
+}
+def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
+ (v2i32 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))),
+ (v2i32 (REV32v4i16 FPR64:$src))>;
+def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))),
+ (v2i32 (REV32v8i8 FPR64:$src))>;
+def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))),
+ (v2i32 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
+ (v2i32 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))),
+ (v2i32 (REV32v4i16 FPR64:$src))>;
+}
+def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))),
+ (v4i16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))),
+ (v4i16 (REV32v4i16 FPR64:$src))>;
+def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))),
+ (v4i16 (REV16v8i8 FPR64:$src))>;
+def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))),
+ (v4i16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))),
+ (v4i16 (REV32v4i16 FPR64:$src))>;
+def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
+ (v4i16 (REV64v4i16 FPR64:$src))>;
+}
+def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))),
+ (v4f16 (REV32v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))),
+ (v4f16 (REV16v8i8 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))),
+ (v4f16 (REV32v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+}
+def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))),
+ (v8i8 (REV64v8i8 FPR64:$src))>;
+def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))),
+ (v8i8 (REV32v8i8 FPR64:$src))>;
+def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))),
+ (v8i8 (REV16v8i8 FPR64:$src))>;
+def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))),
+ (v8i8 (REV64v8i8 FPR64:$src))>;
+def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))),
+ (v8i8 (REV32v8i8 FPR64:$src))>;
+def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))),
+ (v8i8 (REV64v8i8 FPR64:$src))>;
+def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))),
+ (v8i8 (REV16v8i8 FPR64:$src))>;
+}
+
+let Predicates = [IsLE] in {
+def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>;
+def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>;
+def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>;
+def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>;
+def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))),
+ (f64 (REV64v2i32 FPR64:$src))>;
+def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))),
+ (f64 (REV64v4i16 FPR64:$src))>;
+def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))),
+ (f64 (REV64v2i32 FPR64:$src))>;
+def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))),
+ (f64 (REV64v8i8 FPR64:$src))>;
+def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))),
+ (f64 (REV64v4i16 FPR64:$src))>;
+}
+def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
+def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
+def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
+def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>;
+def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
+def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
+ (v1f64 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))),
+ (v1f64 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))),
+ (v1f64 (REV64v8i8 FPR64:$src))>;
+def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
+ (v1f64 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))),
+ (v1f64 (REV64v4i16 FPR64:$src))>;
+}
+def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
+def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
+ (v2f32 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))),
+ (v2f32 (REV32v4i16 FPR64:$src))>;
+def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))),
+ (v2f32 (REV32v8i8 FPR64:$src))>;
+def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))),
+ (v2f32 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))),
+ (v2f32 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))),
+ (v2f32 (REV32v4i16 FPR64:$src))>;
+}
+def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>;
+def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
+def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
+def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
+def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
+def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>;
+def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))),
+ (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
+def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))),
+ (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
+ (REV64v4i32 FPR128:$src), (i32 8)))>;
+def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
+ (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
+ (REV64v8i16 FPR128:$src), (i32 8)))>;
+def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))),
+ (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
+ (REV64v8i16 FPR128:$src), (i32 8)))>;
+def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
+ (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
+def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
+ (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
+ (REV64v4i32 FPR128:$src), (i32 8)))>;
+def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))),
+ (f128 (EXTv16i8 (REV64v16i8 FPR128:$src),
+ (REV64v16i8 FPR128:$src), (i32 8)))>;
+}
+
+let Predicates = [IsLE] in {
+def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))),
+ (v2f64 (EXTv16i8 FPR128:$src,
+ FPR128:$src, (i32 8)))>;
+def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))),
+ (v2f64 (REV64v4i32 FPR128:$src))>;
+def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
+ (v2f64 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))),
+ (v2f64 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
+ (v2f64 (REV64v16i8 FPR128:$src))>;
+def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
+ (v2f64 (REV64v4i32 FPR128:$src))>;
+}
+def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))),
+ (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src),
+ (REV64v4i32 FPR128:$src), (i32 8)))>;
+def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
+ (v4f32 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))),
+ (v4f32 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
+ (v4f32 (REV32v16i8 FPR128:$src))>;
+def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
+ (v4f32 (REV64v4i32 FPR128:$src))>;
+def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))),
+ (v4f32 (REV64v4i32 FPR128:$src))>;
+}
+def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))),
+ (v2i64 (EXTv16i8 FPR128:$src,
+ FPR128:$src, (i32 8)))>;
+def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))),
+ (v2i64 (REV64v4i32 FPR128:$src))>;
+def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))),
+ (v2i64 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))),
+ (v2i64 (REV64v16i8 FPR128:$src))>;
+def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
+ (v2i64 (REV64v4i32 FPR128:$src))>;
+def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))),
+ (v2i64 (REV64v8i16 FPR128:$src))>;
+}
+def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))),
+ (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src),
+ (REV64v4i32 FPR128:$src),
+ (i32 8)))>;
+def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))),
+ (v4i32 (REV64v4i32 FPR128:$src))>;
+def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))),
+ (v4i32 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))),
+ (v4i32 (REV32v16i8 FPR128:$src))>;
+def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
+ (v4i32 (REV64v4i32 FPR128:$src))>;
+def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))),
+ (v4i32 (REV32v8i16 FPR128:$src))>;
+}
+def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))),
+ (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src),
+ (REV64v8i16 FPR128:$src),
+ (i32 8)))>;
+def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))),
+ (v8i16 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))),
+ (v8i16 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))),
+ (v8i16 (REV16v16i8 FPR128:$src))>;
+def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))),
+ (v8i16 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
+ (v8i16 (REV32v8i16 FPR128:$src))>;
+}
+def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))),
+ (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src),
+ (REV64v8i16 FPR128:$src),
+ (i32 8)))>;
+def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))),
+ (v8f16 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))),
+ (v8f16 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))),
+ (v8f16 (REV16v16i8 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))),
+ (v8f16 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))),
+ (v8f16 (REV32v8i16 FPR128:$src))>;
+}
+def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))),
+ (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src),
+ (REV64v16i8 FPR128:$src),
+ (i32 8)))>;
+def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))),
+ (v16i8 (REV64v16i8 FPR128:$src))>;
+def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))),
+ (v16i8 (REV32v16i8 FPR128:$src))>;
+def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))),
+ (v16i8 (REV16v16i8 FPR128:$src))>;
+def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))),
+ (v16i8 (REV64v16i8 FPR128:$src))>;
+def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
+ (v16i8 (REV32v16i8 FPR128:$src))>;
+def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
+ (v16i8 (REV16v16i8 FPR128:$src))>;
+}
+
+def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+
+def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
+ (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
+def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
+ (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
+def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))),
+ (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
+def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
+ (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
+
+// A 64-bit subvector insert to the first 128-bit vector position
+// is a subregister copy that needs no instruction.
+multiclass InsertSubvectorUndef<ValueType Ty> {
+ def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+ def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+ def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+ def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+ def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)),
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+ def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)),
+ (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+ def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)),
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+}
+
+defm : InsertSubvectorUndef<i32>;
+defm : InsertSubvectorUndef<i64>;
+
+// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
+// or v2f32.
+def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)),
+ (vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
+ (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>;
+def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
+ (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
+ (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>;
+ // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
+ // so we match on v4f32 here, not v2f32. This will also catch adding
+ // the low two lanes of a true v4f32 vector.
+def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
+ (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
+ (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
+
+// Scalar 64-bit shifts in FPR64 registers.
+def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
+ (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
+ (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
+ (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
+ (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
+
+// Patterns for nontemporal/no-allocate stores.
+// We have to resort to tricks to turn a single-input store into a store pair,
+// because there is no single-input nontemporal store, only STNP.
+let Predicates = [IsLE] in {
+let AddedComplexity = 15 in {
+class NTStore128Pat<ValueType VT> :
+ Pat<(nontemporalstore (VT FPR128:$Rt),
+ (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
+ (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
+ (CPYi64 FPR128:$Rt, (i64 1)),
+ GPR64sp:$Rn, simm7s8:$offset)>;
+
+def : NTStore128Pat<v2i64>;
+def : NTStore128Pat<v4i32>;
+def : NTStore128Pat<v8i16>;
+def : NTStore128Pat<v16i8>;
+
+class NTStore64Pat<ValueType VT> :
+ Pat<(nontemporalstore (VT FPR64:$Rt),
+ (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
+ (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
+ (CPYi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
+ GPR64sp:$Rn, simm7s4:$offset)>;
+
+// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
+def : NTStore64Pat<v1f64>;
+def : NTStore64Pat<v1i64>;
+def : NTStore64Pat<v2i32>;
+def : NTStore64Pat<v4i16>;
+def : NTStore64Pat<v8i8>;
+
+def : Pat<(nontemporalstore GPR64:$Rt,
+ (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
+ (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
+ (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32),
+ GPR64sp:$Rn, simm7s4:$offset)>;
+} // AddedComplexity=10
+} // Predicates = [IsLE]
+
+// Tail call return handling. These are all compiler pseudo-instructions,
+// so no encoding information or anything like that.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
+ def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>,
+ Sched<[WriteBrReg]>;
+ def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>,
+ Sched<[WriteBrReg]>;
+}
+
+def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
+ (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>;
+def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
+ (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
+def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
+ (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
+
+include "AArch64InstrAtomics.td"
+include "AArch64SVEInstrInfo.td"
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td b/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td
new file mode 100644
index 000000000..eee584708
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td
@@ -0,0 +1,20 @@
+//=- AArch64RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+/// General Purpose Registers: W, X.
+def GPRRegBank : RegisterBank<"GPR", [GPR64all]>;
+
+/// Floating Point/Vector Registers: B, H, S, D, Q.
+def FPRRegBank : RegisterBank<"FPR", [QQQQ]>;
+
+/// Conditional register: NZCV.
+def CCRegBank : RegisterBank<"CC", [CCR]>;
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td b/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td
new file mode 100644
index 000000000..bbf401b47
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td
@@ -0,0 +1,1113 @@
+//=- AArch64RegisterInfo.td - Describe the AArch64 Registers -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+
+class AArch64Reg<bits<16> enc, string n, list<Register> subregs = [],
+ list<string> altNames = []>
+ : Register<n, altNames> {
+ let HWEncoding = enc;
+ let Namespace = "AArch64";
+ let SubRegs = subregs;
+}
+
+let Namespace = "AArch64" in {
+ def sub_32 : SubRegIndex<32>;
+
+ def bsub : SubRegIndex<8>;
+ def hsub : SubRegIndex<16>;
+ def ssub : SubRegIndex<32>;
+ def dsub : SubRegIndex<32>;
+ def sube32 : SubRegIndex<32>;
+ def subo32 : SubRegIndex<32>;
+ def qhisub : SubRegIndex<64>;
+ def qsub : SubRegIndex<64>;
+ def sube64 : SubRegIndex<64>;
+ def subo64 : SubRegIndex<64>;
+ // SVE
+ def zsub : SubRegIndex<128>;
+ // Note: zsub_hi should never be used directly because it represents
+ // the scalable part of the SVE vector and cannot be manipulated as a
+ // subvector in the same way the lower 128bits can.
+ def zsub_hi : SubRegIndex<128>;
+ // Note: Code depends on these having consecutive numbers
+ def dsub0 : SubRegIndex<64>;
+ def dsub1 : SubRegIndex<64>;
+ def dsub2 : SubRegIndex<64>;
+ def dsub3 : SubRegIndex<64>;
+ // Note: Code depends on these having consecutive numbers
+ def qsub0 : SubRegIndex<128>;
+ def qsub1 : SubRegIndex<128>;
+ def qsub2 : SubRegIndex<128>;
+ def qsub3 : SubRegIndex<128>;
+}
+
+let Namespace = "AArch64" in {
+ def vreg : RegAltNameIndex;
+ def vlist1 : RegAltNameIndex;
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+def W0 : AArch64Reg<0, "w0" >, DwarfRegNum<[0]>;
+def W1 : AArch64Reg<1, "w1" >, DwarfRegNum<[1]>;
+def W2 : AArch64Reg<2, "w2" >, DwarfRegNum<[2]>;
+def W3 : AArch64Reg<3, "w3" >, DwarfRegNum<[3]>;
+def W4 : AArch64Reg<4, "w4" >, DwarfRegNum<[4]>;
+def W5 : AArch64Reg<5, "w5" >, DwarfRegNum<[5]>;
+def W6 : AArch64Reg<6, "w6" >, DwarfRegNum<[6]>;
+def W7 : AArch64Reg<7, "w7" >, DwarfRegNum<[7]>;
+def W8 : AArch64Reg<8, "w8" >, DwarfRegNum<[8]>;
+def W9 : AArch64Reg<9, "w9" >, DwarfRegNum<[9]>;
+def W10 : AArch64Reg<10, "w10">, DwarfRegNum<[10]>;
+def W11 : AArch64Reg<11, "w11">, DwarfRegNum<[11]>;
+def W12 : AArch64Reg<12, "w12">, DwarfRegNum<[12]>;
+def W13 : AArch64Reg<13, "w13">, DwarfRegNum<[13]>;
+def W14 : AArch64Reg<14, "w14">, DwarfRegNum<[14]>;
+def W15 : AArch64Reg<15, "w15">, DwarfRegNum<[15]>;
+def W16 : AArch64Reg<16, "w16">, DwarfRegNum<[16]>;
+def W17 : AArch64Reg<17, "w17">, DwarfRegNum<[17]>;
+def W18 : AArch64Reg<18, "w18">, DwarfRegNum<[18]>;
+def W19 : AArch64Reg<19, "w19">, DwarfRegNum<[19]>;
+def W20 : AArch64Reg<20, "w20">, DwarfRegNum<[20]>;
+def W21 : AArch64Reg<21, "w21">, DwarfRegNum<[21]>;
+def W22 : AArch64Reg<22, "w22">, DwarfRegNum<[22]>;
+def W23 : AArch64Reg<23, "w23">, DwarfRegNum<[23]>;
+def W24 : AArch64Reg<24, "w24">, DwarfRegNum<[24]>;
+def W25 : AArch64Reg<25, "w25">, DwarfRegNum<[25]>;
+def W26 : AArch64Reg<26, "w26">, DwarfRegNum<[26]>;
+def W27 : AArch64Reg<27, "w27">, DwarfRegNum<[27]>;
+def W28 : AArch64Reg<28, "w28">, DwarfRegNum<[28]>;
+def W29 : AArch64Reg<29, "w29">, DwarfRegNum<[29]>;
+def W30 : AArch64Reg<30, "w30">, DwarfRegNum<[30]>;
+def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>;
+def WZR : AArch64Reg<31, "wzr">, DwarfRegAlias<WSP>;
+
+let SubRegIndices = [sub_32] in {
+def X0 : AArch64Reg<0, "x0", [W0]>, DwarfRegAlias<W0>;
+def X1 : AArch64Reg<1, "x1", [W1]>, DwarfRegAlias<W1>;
+def X2 : AArch64Reg<2, "x2", [W2]>, DwarfRegAlias<W2>;
+def X3 : AArch64Reg<3, "x3", [W3]>, DwarfRegAlias<W3>;
+def X4 : AArch64Reg<4, "x4", [W4]>, DwarfRegAlias<W4>;
+def X5 : AArch64Reg<5, "x5", [W5]>, DwarfRegAlias<W5>;
+def X6 : AArch64Reg<6, "x6", [W6]>, DwarfRegAlias<W6>;
+def X7 : AArch64Reg<7, "x7", [W7]>, DwarfRegAlias<W7>;
+def X8 : AArch64Reg<8, "x8", [W8]>, DwarfRegAlias<W8>;
+def X9 : AArch64Reg<9, "x9", [W9]>, DwarfRegAlias<W9>;
+def X10 : AArch64Reg<10, "x10", [W10]>, DwarfRegAlias<W10>;
+def X11 : AArch64Reg<11, "x11", [W11]>, DwarfRegAlias<W11>;
+def X12 : AArch64Reg<12, "x12", [W12]>, DwarfRegAlias<W12>;
+def X13 : AArch64Reg<13, "x13", [W13]>, DwarfRegAlias<W13>;
+def X14 : AArch64Reg<14, "x14", [W14]>, DwarfRegAlias<W14>;
+def X15 : AArch64Reg<15, "x15", [W15]>, DwarfRegAlias<W15>;
+def X16 : AArch64Reg<16, "x16", [W16]>, DwarfRegAlias<W16>;
+def X17 : AArch64Reg<17, "x17", [W17]>, DwarfRegAlias<W17>;
+def X18 : AArch64Reg<18, "x18", [W18]>, DwarfRegAlias<W18>;
+def X19 : AArch64Reg<19, "x19", [W19]>, DwarfRegAlias<W19>;
+def X20 : AArch64Reg<20, "x20", [W20]>, DwarfRegAlias<W20>;
+def X21 : AArch64Reg<21, "x21", [W21]>, DwarfRegAlias<W21>;
+def X22 : AArch64Reg<22, "x22", [W22]>, DwarfRegAlias<W22>;
+def X23 : AArch64Reg<23, "x23", [W23]>, DwarfRegAlias<W23>;
+def X24 : AArch64Reg<24, "x24", [W24]>, DwarfRegAlias<W24>;
+def X25 : AArch64Reg<25, "x25", [W25]>, DwarfRegAlias<W25>;
+def X26 : AArch64Reg<26, "x26", [W26]>, DwarfRegAlias<W26>;
+def X27 : AArch64Reg<27, "x27", [W27]>, DwarfRegAlias<W27>;
+def X28 : AArch64Reg<28, "x28", [W28]>, DwarfRegAlias<W28>;
+def FP : AArch64Reg<29, "x29", [W29]>, DwarfRegAlias<W29>;
+def LR : AArch64Reg<30, "x30", [W30]>, DwarfRegAlias<W30>;
+def SP : AArch64Reg<31, "sp", [WSP]>, DwarfRegAlias<WSP>;
+def XZR : AArch64Reg<31, "xzr", [WZR]>, DwarfRegAlias<WSP>;
+}
+
+// Condition code register.
+def NZCV : AArch64Reg<0, "nzcv">;
+
+// First fault status register
+def FFR : AArch64Reg<0, "ffr">, DwarfRegNum<[47]>;
+
+// GPR register classes with the intersections of GPR32/GPR32sp and
+// GPR64/GPR64sp for use by the coalescer.
+def GPR32common : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 30)> {
+ let AltOrders = [(rotl GPR32common, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+def GPR64common : RegisterClass<"AArch64", [i64], 64,
+ (add (sequence "X%u", 0, 28), FP, LR)> {
+ let AltOrders = [(rotl GPR64common, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+// GPR register classes which exclude SP/WSP.
+def GPR32 : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR)> {
+ let AltOrders = [(rotl GPR32, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+def GPR64 : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR)> {
+ let AltOrders = [(rotl GPR64, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+// GPR register classes which include SP/WSP.
+def GPR32sp : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WSP)> {
+ let AltOrders = [(rotl GPR32sp, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+def GPR64sp : RegisterClass<"AArch64", [i64], 64, (add GPR64common, SP)> {
+ let AltOrders = [(rotl GPR64sp, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+
+def GPR32sponly : RegisterClass<"AArch64", [i32], 32, (add WSP)>;
+def GPR64sponly : RegisterClass<"AArch64", [i64], 64, (add SP)>;
+
+def GPR64spPlus0Operand : AsmOperandClass {
+ let Name = "GPR64sp0";
+ let RenderMethod = "addRegOperands";
+ let PredicateMethod = "isGPR64<AArch64::GPR64spRegClassID>";
+ let ParserMethod = "tryParseGPR64sp0Operand";
+}
+
+def GPR64sp0 : RegisterOperand<GPR64sp> {
+ let ParserMatchClass = GPR64spPlus0Operand;
+}
+
+// GPR32/GPR64 but with zero-register substitution enabled.
+// TODO: Roll this out to GPR32/GPR64/GPR32all/GPR64all.
+def GPR32z : RegisterOperand<GPR32> {
+ let GIZeroRegister = WZR;
+}
+def GPR64z : RegisterOperand<GPR64> {
+ let GIZeroRegister = XZR;
+}
+
+// GPR register classes which include WZR/XZR AND SP/WSP. This is not a
+// constraint used by any instructions, it is used as a common super-class.
+def GPR32all : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR, WSP)>;
+def GPR64all : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR, SP)>;
+
+// For tail calls, we can't use callee-saved registers, as they are restored
+// to the saved value before the tail call, which would clobber a call address.
+// This is for indirect tail calls to store the address of the destination.
+def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X21,
+ X22, X23, X24, X25, X26,
+ X27, X28, FP, LR)>;
+
+// GPR register classes for post increment amount of vector load/store that
+// has alternate printing when Rm=31 and prints a constant immediate value
+// equal to the total number of bytes transferred.
+
+// FIXME: TableGen *should* be able to do these itself now. There appears to be
+// a bug in counting how many operands a Post-indexed MCInst should have which
+// means the aliases don't trigger.
+def GPR64pi1 : RegisterOperand<GPR64, "printPostIncOperand<1>">;
+def GPR64pi2 : RegisterOperand<GPR64, "printPostIncOperand<2>">;
+def GPR64pi3 : RegisterOperand<GPR64, "printPostIncOperand<3>">;
+def GPR64pi4 : RegisterOperand<GPR64, "printPostIncOperand<4>">;
+def GPR64pi6 : RegisterOperand<GPR64, "printPostIncOperand<6>">;
+def GPR64pi8 : RegisterOperand<GPR64, "printPostIncOperand<8>">;
+def GPR64pi12 : RegisterOperand<GPR64, "printPostIncOperand<12>">;
+def GPR64pi16 : RegisterOperand<GPR64, "printPostIncOperand<16>">;
+def GPR64pi24 : RegisterOperand<GPR64, "printPostIncOperand<24>">;
+def GPR64pi32 : RegisterOperand<GPR64, "printPostIncOperand<32>">;
+def GPR64pi48 : RegisterOperand<GPR64, "printPostIncOperand<48>">;
+def GPR64pi64 : RegisterOperand<GPR64, "printPostIncOperand<64>">;
+
+// Condition code regclass.
+def CCR : RegisterClass<"AArch64", [i32], 32, (add NZCV)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+
+ // CCR is not allocatable.
+ let isAllocatable = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Scalar Registers
+//===----------------------------------------------------------------------===//
+
+def B0 : AArch64Reg<0, "b0">, DwarfRegNum<[64]>;
+def B1 : AArch64Reg<1, "b1">, DwarfRegNum<[65]>;
+def B2 : AArch64Reg<2, "b2">, DwarfRegNum<[66]>;
+def B3 : AArch64Reg<3, "b3">, DwarfRegNum<[67]>;
+def B4 : AArch64Reg<4, "b4">, DwarfRegNum<[68]>;
+def B5 : AArch64Reg<5, "b5">, DwarfRegNum<[69]>;
+def B6 : AArch64Reg<6, "b6">, DwarfRegNum<[70]>;
+def B7 : AArch64Reg<7, "b7">, DwarfRegNum<[71]>;
+def B8 : AArch64Reg<8, "b8">, DwarfRegNum<[72]>;
+def B9 : AArch64Reg<9, "b9">, DwarfRegNum<[73]>;
+def B10 : AArch64Reg<10, "b10">, DwarfRegNum<[74]>;
+def B11 : AArch64Reg<11, "b11">, DwarfRegNum<[75]>;
+def B12 : AArch64Reg<12, "b12">, DwarfRegNum<[76]>;
+def B13 : AArch64Reg<13, "b13">, DwarfRegNum<[77]>;
+def B14 : AArch64Reg<14, "b14">, DwarfRegNum<[78]>;
+def B15 : AArch64Reg<15, "b15">, DwarfRegNum<[79]>;
+def B16 : AArch64Reg<16, "b16">, DwarfRegNum<[80]>;
+def B17 : AArch64Reg<17, "b17">, DwarfRegNum<[81]>;
+def B18 : AArch64Reg<18, "b18">, DwarfRegNum<[82]>;
+def B19 : AArch64Reg<19, "b19">, DwarfRegNum<[83]>;
+def B20 : AArch64Reg<20, "b20">, DwarfRegNum<[84]>;
+def B21 : AArch64Reg<21, "b21">, DwarfRegNum<[85]>;
+def B22 : AArch64Reg<22, "b22">, DwarfRegNum<[86]>;
+def B23 : AArch64Reg<23, "b23">, DwarfRegNum<[87]>;
+def B24 : AArch64Reg<24, "b24">, DwarfRegNum<[88]>;
+def B25 : AArch64Reg<25, "b25">, DwarfRegNum<[89]>;
+def B26 : AArch64Reg<26, "b26">, DwarfRegNum<[90]>;
+def B27 : AArch64Reg<27, "b27">, DwarfRegNum<[91]>;
+def B28 : AArch64Reg<28, "b28">, DwarfRegNum<[92]>;
+def B29 : AArch64Reg<29, "b29">, DwarfRegNum<[93]>;
+def B30 : AArch64Reg<30, "b30">, DwarfRegNum<[94]>;
+def B31 : AArch64Reg<31, "b31">, DwarfRegNum<[95]>;
+
+let SubRegIndices = [bsub] in {
+def H0 : AArch64Reg<0, "h0", [B0]>, DwarfRegAlias<B0>;
+def H1 : AArch64Reg<1, "h1", [B1]>, DwarfRegAlias<B1>;
+def H2 : AArch64Reg<2, "h2", [B2]>, DwarfRegAlias<B2>;
+def H3 : AArch64Reg<3, "h3", [B3]>, DwarfRegAlias<B3>;
+def H4 : AArch64Reg<4, "h4", [B4]>, DwarfRegAlias<B4>;
+def H5 : AArch64Reg<5, "h5", [B5]>, DwarfRegAlias<B5>;
+def H6 : AArch64Reg<6, "h6", [B6]>, DwarfRegAlias<B6>;
+def H7 : AArch64Reg<7, "h7", [B7]>, DwarfRegAlias<B7>;
+def H8 : AArch64Reg<8, "h8", [B8]>, DwarfRegAlias<B8>;
+def H9 : AArch64Reg<9, "h9", [B9]>, DwarfRegAlias<B9>;
+def H10 : AArch64Reg<10, "h10", [B10]>, DwarfRegAlias<B10>;
+def H11 : AArch64Reg<11, "h11", [B11]>, DwarfRegAlias<B11>;
+def H12 : AArch64Reg<12, "h12", [B12]>, DwarfRegAlias<B12>;
+def H13 : AArch64Reg<13, "h13", [B13]>, DwarfRegAlias<B13>;
+def H14 : AArch64Reg<14, "h14", [B14]>, DwarfRegAlias<B14>;
+def H15 : AArch64Reg<15, "h15", [B15]>, DwarfRegAlias<B15>;
+def H16 : AArch64Reg<16, "h16", [B16]>, DwarfRegAlias<B16>;
+def H17 : AArch64Reg<17, "h17", [B17]>, DwarfRegAlias<B17>;
+def H18 : AArch64Reg<18, "h18", [B18]>, DwarfRegAlias<B18>;
+def H19 : AArch64Reg<19, "h19", [B19]>, DwarfRegAlias<B19>;
+def H20 : AArch64Reg<20, "h20", [B20]>, DwarfRegAlias<B20>;
+def H21 : AArch64Reg<21, "h21", [B21]>, DwarfRegAlias<B21>;
+def H22 : AArch64Reg<22, "h22", [B22]>, DwarfRegAlias<B22>;
+def H23 : AArch64Reg<23, "h23", [B23]>, DwarfRegAlias<B23>;
+def H24 : AArch64Reg<24, "h24", [B24]>, DwarfRegAlias<B24>;
+def H25 : AArch64Reg<25, "h25", [B25]>, DwarfRegAlias<B25>;
+def H26 : AArch64Reg<26, "h26", [B26]>, DwarfRegAlias<B26>;
+def H27 : AArch64Reg<27, "h27", [B27]>, DwarfRegAlias<B27>;
+def H28 : AArch64Reg<28, "h28", [B28]>, DwarfRegAlias<B28>;
+def H29 : AArch64Reg<29, "h29", [B29]>, DwarfRegAlias<B29>;
+def H30 : AArch64Reg<30, "h30", [B30]>, DwarfRegAlias<B30>;
+def H31 : AArch64Reg<31, "h31", [B31]>, DwarfRegAlias<B31>;
+}
+
+let SubRegIndices = [hsub] in {
+def S0 : AArch64Reg<0, "s0", [H0]>, DwarfRegAlias<B0>;
+def S1 : AArch64Reg<1, "s1", [H1]>, DwarfRegAlias<B1>;
+def S2 : AArch64Reg<2, "s2", [H2]>, DwarfRegAlias<B2>;
+def S3 : AArch64Reg<3, "s3", [H3]>, DwarfRegAlias<B3>;
+def S4 : AArch64Reg<4, "s4", [H4]>, DwarfRegAlias<B4>;
+def S5 : AArch64Reg<5, "s5", [H5]>, DwarfRegAlias<B5>;
+def S6 : AArch64Reg<6, "s6", [H6]>, DwarfRegAlias<B6>;
+def S7 : AArch64Reg<7, "s7", [H7]>, DwarfRegAlias<B7>;
+def S8 : AArch64Reg<8, "s8", [H8]>, DwarfRegAlias<B8>;
+def S9 : AArch64Reg<9, "s9", [H9]>, DwarfRegAlias<B9>;
+def S10 : AArch64Reg<10, "s10", [H10]>, DwarfRegAlias<B10>;
+def S11 : AArch64Reg<11, "s11", [H11]>, DwarfRegAlias<B11>;
+def S12 : AArch64Reg<12, "s12", [H12]>, DwarfRegAlias<B12>;
+def S13 : AArch64Reg<13, "s13", [H13]>, DwarfRegAlias<B13>;
+def S14 : AArch64Reg<14, "s14", [H14]>, DwarfRegAlias<B14>;
+def S15 : AArch64Reg<15, "s15", [H15]>, DwarfRegAlias<B15>;
+def S16 : AArch64Reg<16, "s16", [H16]>, DwarfRegAlias<B16>;
+def S17 : AArch64Reg<17, "s17", [H17]>, DwarfRegAlias<B17>;
+def S18 : AArch64Reg<18, "s18", [H18]>, DwarfRegAlias<B18>;
+def S19 : AArch64Reg<19, "s19", [H19]>, DwarfRegAlias<B19>;
+def S20 : AArch64Reg<20, "s20", [H20]>, DwarfRegAlias<B20>;
+def S21 : AArch64Reg<21, "s21", [H21]>, DwarfRegAlias<B21>;
+def S22 : AArch64Reg<22, "s22", [H22]>, DwarfRegAlias<B22>;
+def S23 : AArch64Reg<23, "s23", [H23]>, DwarfRegAlias<B23>;
+def S24 : AArch64Reg<24, "s24", [H24]>, DwarfRegAlias<B24>;
+def S25 : AArch64Reg<25, "s25", [H25]>, DwarfRegAlias<B25>;
+def S26 : AArch64Reg<26, "s26", [H26]>, DwarfRegAlias<B26>;
+def S27 : AArch64Reg<27, "s27", [H27]>, DwarfRegAlias<B27>;
+def S28 : AArch64Reg<28, "s28", [H28]>, DwarfRegAlias<B28>;
+def S29 : AArch64Reg<29, "s29", [H29]>, DwarfRegAlias<B29>;
+def S30 : AArch64Reg<30, "s30", [H30]>, DwarfRegAlias<B30>;
+def S31 : AArch64Reg<31, "s31", [H31]>, DwarfRegAlias<B31>;
+}
+
+let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in {
+def D0 : AArch64Reg<0, "d0", [S0], ["v0", ""]>, DwarfRegAlias<B0>;
+def D1 : AArch64Reg<1, "d1", [S1], ["v1", ""]>, DwarfRegAlias<B1>;
+def D2 : AArch64Reg<2, "d2", [S2], ["v2", ""]>, DwarfRegAlias<B2>;
+def D3 : AArch64Reg<3, "d3", [S3], ["v3", ""]>, DwarfRegAlias<B3>;
+def D4 : AArch64Reg<4, "d4", [S4], ["v4", ""]>, DwarfRegAlias<B4>;
+def D5 : AArch64Reg<5, "d5", [S5], ["v5", ""]>, DwarfRegAlias<B5>;
+def D6 : AArch64Reg<6, "d6", [S6], ["v6", ""]>, DwarfRegAlias<B6>;
+def D7 : AArch64Reg<7, "d7", [S7], ["v7", ""]>, DwarfRegAlias<B7>;
+def D8 : AArch64Reg<8, "d8", [S8], ["v8", ""]>, DwarfRegAlias<B8>;
+def D9 : AArch64Reg<9, "d9", [S9], ["v9", ""]>, DwarfRegAlias<B9>;
+def D10 : AArch64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias<B10>;
+def D11 : AArch64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias<B11>;
+def D12 : AArch64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias<B12>;
+def D13 : AArch64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias<B13>;
+def D14 : AArch64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias<B14>;
+def D15 : AArch64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias<B15>;
+def D16 : AArch64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias<B16>;
+def D17 : AArch64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias<B17>;
+def D18 : AArch64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias<B18>;
+def D19 : AArch64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias<B19>;
+def D20 : AArch64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias<B20>;
+def D21 : AArch64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias<B21>;
+def D22 : AArch64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias<B22>;
+def D23 : AArch64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias<B23>;
+def D24 : AArch64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias<B24>;
+def D25 : AArch64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias<B25>;
+def D26 : AArch64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias<B26>;
+def D27 : AArch64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias<B27>;
+def D28 : AArch64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias<B28>;
+def D29 : AArch64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias<B29>;
+def D30 : AArch64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias<B30>;
+def D31 : AArch64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias<B31>;
+}
+
+let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in {
+def Q0 : AArch64Reg<0, "q0", [D0], ["v0", ""]>, DwarfRegAlias<B0>;
+def Q1 : AArch64Reg<1, "q1", [D1], ["v1", ""]>, DwarfRegAlias<B1>;
+def Q2 : AArch64Reg<2, "q2", [D2], ["v2", ""]>, DwarfRegAlias<B2>;
+def Q3 : AArch64Reg<3, "q3", [D3], ["v3", ""]>, DwarfRegAlias<B3>;
+def Q4 : AArch64Reg<4, "q4", [D4], ["v4", ""]>, DwarfRegAlias<B4>;
+def Q5 : AArch64Reg<5, "q5", [D5], ["v5", ""]>, DwarfRegAlias<B5>;
+def Q6 : AArch64Reg<6, "q6", [D6], ["v6", ""]>, DwarfRegAlias<B6>;
+def Q7 : AArch64Reg<7, "q7", [D7], ["v7", ""]>, DwarfRegAlias<B7>;
+def Q8 : AArch64Reg<8, "q8", [D8], ["v8", ""]>, DwarfRegAlias<B8>;
+def Q9 : AArch64Reg<9, "q9", [D9], ["v9", ""]>, DwarfRegAlias<B9>;
+def Q10 : AArch64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias<B10>;
+def Q11 : AArch64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias<B11>;
+def Q12 : AArch64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias<B12>;
+def Q13 : AArch64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias<B13>;
+def Q14 : AArch64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias<B14>;
+def Q15 : AArch64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias<B15>;
+def Q16 : AArch64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias<B16>;
+def Q17 : AArch64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias<B17>;
+def Q18 : AArch64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias<B18>;
+def Q19 : AArch64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias<B19>;
+def Q20 : AArch64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias<B20>;
+def Q21 : AArch64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias<B21>;
+def Q22 : AArch64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias<B22>;
+def Q23 : AArch64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias<B23>;
+def Q24 : AArch64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias<B24>;
+def Q25 : AArch64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias<B25>;
+def Q26 : AArch64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias<B26>;
+def Q27 : AArch64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias<B27>;
+def Q28 : AArch64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias<B28>;
+def Q29 : AArch64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias<B29>;
+def Q30 : AArch64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias<B30>;
+def Q31 : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
+}
+
+def FPR8 : RegisterClass<"AArch64", [untyped], 8, (sequence "B%u", 0, 31)> {
+ let Size = 8;
+}
+def FPR16 : RegisterClass<"AArch64", [f16], 16, (sequence "H%u", 0, 31)> {
+ let Size = 16;
+}
+def FPR32 : RegisterClass<"AArch64", [f32, i32], 32,(sequence "S%u", 0, 31)>;
+def FPR64 : RegisterClass<"AArch64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32,
+ v1i64, v4f16],
+ 64, (sequence "D%u", 0, 31)>;
+// We don't (yet) have an f128 legal type, so don't use that here. We
+// normalize 128-bit vectors to v2f64 for arg passing and such, so use
+// that here.
+def FPR128 : RegisterClass<"AArch64",
+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128,
+ v8f16],
+ 128, (sequence "Q%u", 0, 31)>;
+
+// The lower 16 vector registers. Some instructions can only take registers
+// in this range.
+def FPR128_lo : RegisterClass<"AArch64",
+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16],
+ 128, (trunc FPR128, 16)>;
+
+// Pairs, triples, and quads of 64-bit vector registers.
+def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>;
+def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2],
+ [(rotl FPR64, 0), (rotl FPR64, 1),
+ (rotl FPR64, 2)]>;
+def DSeqQuads : RegisterTuples<[dsub0, dsub1, dsub2, dsub3],
+ [(rotl FPR64, 0), (rotl FPR64, 1),
+ (rotl FPR64, 2), (rotl FPR64, 3)]>;
+def DD : RegisterClass<"AArch64", [untyped], 64, (add DSeqPairs)> {
+ let Size = 128;
+}
+def DDD : RegisterClass<"AArch64", [untyped], 64, (add DSeqTriples)> {
+ let Size = 192;
+}
+def DDDD : RegisterClass<"AArch64", [untyped], 64, (add DSeqQuads)> {
+ let Size = 256;
+}
+
+// Pairs, triples, and quads of 128-bit vector registers.
+def QSeqPairs : RegisterTuples<[qsub0, qsub1], [(rotl FPR128, 0), (rotl FPR128, 1)]>;
+def QSeqTriples : RegisterTuples<[qsub0, qsub1, qsub2],
+ [(rotl FPR128, 0), (rotl FPR128, 1),
+ (rotl FPR128, 2)]>;
+def QSeqQuads : RegisterTuples<[qsub0, qsub1, qsub2, qsub3],
+ [(rotl FPR128, 0), (rotl FPR128, 1),
+ (rotl FPR128, 2), (rotl FPR128, 3)]>;
+def QQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqPairs)> {
+ let Size = 256;
+}
+def QQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqTriples)> {
+ let Size = 384;
+}
+def QQQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqQuads)> {
+ let Size = 512;
+}
+
+
+// Vector operand versions of the FP registers. Alternate name printing and
+// assmebler matching.
+def VectorReg64AsmOperand : AsmOperandClass {
+ let Name = "VectorReg64";
+ let PredicateMethod = "isNeonVectorReg";
+}
+def VectorReg128AsmOperand : AsmOperandClass {
+ let Name = "VectorReg128";
+ let PredicateMethod = "isNeonVectorReg";
+}
+
+def V64 : RegisterOperand<FPR64, "printVRegOperand"> {
+ let ParserMatchClass = VectorReg64AsmOperand;
+}
+
+def V128 : RegisterOperand<FPR128, "printVRegOperand"> {
+ let ParserMatchClass = VectorReg128AsmOperand;
+}
+
+def VectorRegLoAsmOperand : AsmOperandClass {
+ let Name = "VectorRegLo";
+ let PredicateMethod = "isNeonVectorRegLo";
+}
+def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand"> {
+ let ParserMatchClass = VectorRegLoAsmOperand;
+}
+
+class TypedVecListAsmOperand<int count, string vecty, int lanes, int eltsize>
+ : AsmOperandClass {
+ let Name = "TypedVectorList" # count # "_" # lanes # eltsize;
+
+ let PredicateMethod
+ = "isTypedVectorList<RegKind::NeonVector, " # count # ", " # lanes # ", " # eltsize # ">";
+ let RenderMethod = "addVectorListOperands<" # vecty # ", " # count # ">";
+}
+
+class TypedVecListRegOperand<RegisterClass Reg, int lanes, string eltsize>
+ : RegisterOperand<Reg, "printTypedVectorList<" # lanes # ", '"
+ # eltsize # "'>">;
+
+multiclass VectorList<int count, RegisterClass Reg64, RegisterClass Reg128> {
+ // With implicit types (probably on instruction instead). E.g. { v0, v1 }
+ def _64AsmOperand : AsmOperandClass {
+ let Name = NAME # "64";
+ let PredicateMethod = "isImplicitlyTypedVectorList<RegKind::NeonVector, " # count # ">";
+ let RenderMethod = "addVectorListOperands<AArch64Operand::VecListIdx_DReg, " # count # ">";
+ }
+
+ def "64" : RegisterOperand<Reg64, "printImplicitlyTypedVectorList"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_64AsmOperand");
+ }
+
+ def _128AsmOperand : AsmOperandClass {
+ let Name = NAME # "128";
+ let PredicateMethod = "isImplicitlyTypedVectorList<RegKind::NeonVector, " # count # ">";
+ let RenderMethod = "addVectorListOperands<AArch64Operand::VecListIdx_QReg, " # count # ">";
+ }
+
+ def "128" : RegisterOperand<Reg128, "printImplicitlyTypedVectorList"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_128AsmOperand");
+ }
+
+ // 64-bit register lists with explicit type.
+
+ // { v0.8b, v1.8b }
+ def _8bAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_DReg", 8, 8>;
+ def "8b" : TypedVecListRegOperand<Reg64, 8, "b"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8bAsmOperand");
+ }
+
+ // { v0.4h, v1.4h }
+ def _4hAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_DReg", 4, 16>;
+ def "4h" : TypedVecListRegOperand<Reg64, 4, "h"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4hAsmOperand");
+ }
+
+ // { v0.2s, v1.2s }
+ def _2sAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_DReg", 2, 32>;
+ def "2s" : TypedVecListRegOperand<Reg64, 2, "s"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2sAsmOperand");
+ }
+
+ // { v0.1d, v1.1d }
+ def _1dAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_DReg", 1, 64>;
+ def "1d" : TypedVecListRegOperand<Reg64, 1, "d"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_1dAsmOperand");
+ }
+
+ // 128-bit register lists with explicit type
+
+ // { v0.16b, v1.16b }
+ def _16bAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_QReg", 16, 8>;
+ def "16b" : TypedVecListRegOperand<Reg128, 16, "b"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_16bAsmOperand");
+ }
+
+ // { v0.8h, v1.8h }
+ def _8hAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_QReg", 8, 16>;
+ def "8h" : TypedVecListRegOperand<Reg128, 8, "h"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8hAsmOperand");
+ }
+
+ // { v0.4s, v1.4s }
+ def _4sAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_QReg", 4, 32>;
+ def "4s" : TypedVecListRegOperand<Reg128, 4, "s"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4sAsmOperand");
+ }
+
+ // { v0.2d, v1.2d }
+ def _2dAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_QReg", 2, 64>;
+ def "2d" : TypedVecListRegOperand<Reg128, 2, "d"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2dAsmOperand");
+ }
+
+ // { v0.b, v1.b }
+ def _bAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_QReg", 0, 8>;
+ def "b" : TypedVecListRegOperand<Reg128, 0, "b"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_bAsmOperand");
+ }
+
+ // { v0.h, v1.h }
+ def _hAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_QReg", 0, 16>;
+ def "h" : TypedVecListRegOperand<Reg128, 0, "h"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_hAsmOperand");
+ }
+
+ // { v0.s, v1.s }
+ def _sAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_QReg", 0, 32>;
+ def "s" : TypedVecListRegOperand<Reg128, 0, "s"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_sAsmOperand");
+ }
+
+ // { v0.d, v1.d }
+ def _dAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_QReg", 0, 64>;
+ def "d" : TypedVecListRegOperand<Reg128, 0, "d"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_dAsmOperand");
+ }
+
+
+}
+
+defm VecListOne : VectorList<1, FPR64, FPR128>;
+defm VecListTwo : VectorList<2, DD, QQ>;
+defm VecListThree : VectorList<3, DDD, QQQ>;
+defm VecListFour : VectorList<4, DDDD, QQQQ>;
+
+class FPRAsmOperand<string RC> : AsmOperandClass {
+ let Name = "FPRAsmOperand" # RC;
+ let PredicateMethod = "isGPR64<AArch64::" # RC # "RegClassID>";
+ let RenderMethod = "addRegOperands";
+}
+
+// Register operand versions of the scalar FP registers.
+def FPR8Op : RegisterOperand<FPR8, "printOperand"> {
+ let ParserMatchClass = FPRAsmOperand<"FPR8">;
+}
+
+def FPR16Op : RegisterOperand<FPR16, "printOperand"> {
+ let ParserMatchClass = FPRAsmOperand<"FPR16">;
+}
+
+def FPR32Op : RegisterOperand<FPR32, "printOperand"> {
+ let ParserMatchClass = FPRAsmOperand<"FPR32">;
+}
+
+def FPR64Op : RegisterOperand<FPR64, "printOperand"> {
+ let ParserMatchClass = FPRAsmOperand<"FPR64">;
+}
+
+def FPR128Op : RegisterOperand<FPR128, "printOperand"> {
+ let ParserMatchClass = FPRAsmOperand<"FPR128">;
+}
+
+//===----------------------------------------------------------------------===//
+// ARMv8.1a atomic CASP register operands
+
+
+def WSeqPairs : RegisterTuples<[sube32, subo32],
+ [(rotl GPR32, 0), (rotl GPR32, 1)]>;
+def XSeqPairs : RegisterTuples<[sube64, subo64],
+ [(rotl GPR64, 0), (rotl GPR64, 1)]>;
+
+def WSeqPairsClass : RegisterClass<"AArch64", [untyped], 32,
+ (add WSeqPairs)>{
+ let Size = 64;
+}
+def XSeqPairsClass : RegisterClass<"AArch64", [untyped], 64,
+ (add XSeqPairs)>{
+ let Size = 128;
+}
+
+
+let RenderMethod = "addRegOperands", ParserMethod="tryParseGPRSeqPair" in {
+ def WSeqPairsAsmOperandClass : AsmOperandClass { let Name = "WSeqPair"; }
+ def XSeqPairsAsmOperandClass : AsmOperandClass { let Name = "XSeqPair"; }
+}
+
+def WSeqPairClassOperand :
+ RegisterOperand<WSeqPairsClass, "printGPRSeqPairsClassOperand<32>"> {
+ let ParserMatchClass = WSeqPairsAsmOperandClass;
+}
+def XSeqPairClassOperand :
+ RegisterOperand<XSeqPairsClass, "printGPRSeqPairsClassOperand<64>"> {
+ let ParserMatchClass = XSeqPairsAsmOperandClass;
+}
+
+
+//===----- END: v8.1a atomic CASP register operands -----------------------===//
+
+// SVE predicate registers
+def P0 : AArch64Reg<0, "p0">, DwarfRegNum<[48]>;
+def P1 : AArch64Reg<1, "p1">, DwarfRegNum<[49]>;
+def P2 : AArch64Reg<2, "p2">, DwarfRegNum<[50]>;
+def P3 : AArch64Reg<3, "p3">, DwarfRegNum<[51]>;
+def P4 : AArch64Reg<4, "p4">, DwarfRegNum<[52]>;
+def P5 : AArch64Reg<5, "p5">, DwarfRegNum<[53]>;
+def P6 : AArch64Reg<6, "p6">, DwarfRegNum<[54]>;
+def P7 : AArch64Reg<7, "p7">, DwarfRegNum<[55]>;
+def P8 : AArch64Reg<8, "p8">, DwarfRegNum<[56]>;
+def P9 : AArch64Reg<9, "p9">, DwarfRegNum<[57]>;
+def P10 : AArch64Reg<10, "p10">, DwarfRegNum<[58]>;
+def P11 : AArch64Reg<11, "p11">, DwarfRegNum<[59]>;
+def P12 : AArch64Reg<12, "p12">, DwarfRegNum<[60]>;
+def P13 : AArch64Reg<13, "p13">, DwarfRegNum<[61]>;
+def P14 : AArch64Reg<14, "p14">, DwarfRegNum<[62]>;
+def P15 : AArch64Reg<15, "p15">, DwarfRegNum<[63]>;
+
+// The part of SVE registers that don't overlap Neon registers.
+// These are only used as part of clobber lists.
+def Z0_HI : AArch64Reg<0, "z0_hi">;
+def Z1_HI : AArch64Reg<1, "z1_hi">;
+def Z2_HI : AArch64Reg<2, "z2_hi">;
+def Z3_HI : AArch64Reg<3, "z3_hi">;
+def Z4_HI : AArch64Reg<4, "z4_hi">;
+def Z5_HI : AArch64Reg<5, "z5_hi">;
+def Z6_HI : AArch64Reg<6, "z6_hi">;
+def Z7_HI : AArch64Reg<7, "z7_hi">;
+def Z8_HI : AArch64Reg<8, "z8_hi">;
+def Z9_HI : AArch64Reg<9, "z9_hi">;
+def Z10_HI : AArch64Reg<10, "z10_hi">;
+def Z11_HI : AArch64Reg<11, "z11_hi">;
+def Z12_HI : AArch64Reg<12, "z12_hi">;
+def Z13_HI : AArch64Reg<13, "z13_hi">;
+def Z14_HI : AArch64Reg<14, "z14_hi">;
+def Z15_HI : AArch64Reg<15, "z15_hi">;
+def Z16_HI : AArch64Reg<16, "z16_hi">;
+def Z17_HI : AArch64Reg<17, "z17_hi">;
+def Z18_HI : AArch64Reg<18, "z18_hi">;
+def Z19_HI : AArch64Reg<19, "z19_hi">;
+def Z20_HI : AArch64Reg<20, "z20_hi">;
+def Z21_HI : AArch64Reg<21, "z21_hi">;
+def Z22_HI : AArch64Reg<22, "z22_hi">;
+def Z23_HI : AArch64Reg<23, "z23_hi">;
+def Z24_HI : AArch64Reg<24, "z24_hi">;
+def Z25_HI : AArch64Reg<25, "z25_hi">;
+def Z26_HI : AArch64Reg<26, "z26_hi">;
+def Z27_HI : AArch64Reg<27, "z27_hi">;
+def Z28_HI : AArch64Reg<28, "z28_hi">;
+def Z29_HI : AArch64Reg<29, "z29_hi">;
+def Z30_HI : AArch64Reg<30, "z30_hi">;
+def Z31_HI : AArch64Reg<31, "z31_hi">;
+
+// SVE variable-size vector registers
+let SubRegIndices = [zsub,zsub_hi] in {
+def Z0 : AArch64Reg<0, "z0", [Q0, Z0_HI]>, DwarfRegNum<[96]>;
+def Z1 : AArch64Reg<1, "z1", [Q1, Z1_HI]>, DwarfRegNum<[97]>;
+def Z2 : AArch64Reg<2, "z2", [Q2, Z2_HI]>, DwarfRegNum<[98]>;
+def Z3 : AArch64Reg<3, "z3", [Q3, Z3_HI]>, DwarfRegNum<[99]>;
+def Z4 : AArch64Reg<4, "z4", [Q4, Z4_HI]>, DwarfRegNum<[100]>;
+def Z5 : AArch64Reg<5, "z5", [Q5, Z5_HI]>, DwarfRegNum<[101]>;
+def Z6 : AArch64Reg<6, "z6", [Q6, Z6_HI]>, DwarfRegNum<[102]>;
+def Z7 : AArch64Reg<7, "z7", [Q7, Z7_HI]>, DwarfRegNum<[103]>;
+def Z8 : AArch64Reg<8, "z8", [Q8, Z8_HI]>, DwarfRegNum<[104]>;
+def Z9 : AArch64Reg<9, "z9", [Q9, Z9_HI]>, DwarfRegNum<[105]>;
+def Z10 : AArch64Reg<10, "z10", [Q10, Z10_HI]>, DwarfRegNum<[106]>;
+def Z11 : AArch64Reg<11, "z11", [Q11, Z11_HI]>, DwarfRegNum<[107]>;
+def Z12 : AArch64Reg<12, "z12", [Q12, Z12_HI]>, DwarfRegNum<[108]>;
+def Z13 : AArch64Reg<13, "z13", [Q13, Z13_HI]>, DwarfRegNum<[109]>;
+def Z14 : AArch64Reg<14, "z14", [Q14, Z14_HI]>, DwarfRegNum<[110]>;
+def Z15 : AArch64Reg<15, "z15", [Q15, Z15_HI]>, DwarfRegNum<[111]>;
+def Z16 : AArch64Reg<16, "z16", [Q16, Z16_HI]>, DwarfRegNum<[112]>;
+def Z17 : AArch64Reg<17, "z17", [Q17, Z17_HI]>, DwarfRegNum<[113]>;
+def Z18 : AArch64Reg<18, "z18", [Q18, Z18_HI]>, DwarfRegNum<[114]>;
+def Z19 : AArch64Reg<19, "z19", [Q19, Z19_HI]>, DwarfRegNum<[115]>;
+def Z20 : AArch64Reg<20, "z20", [Q20, Z20_HI]>, DwarfRegNum<[116]>;
+def Z21 : AArch64Reg<21, "z21", [Q21, Z21_HI]>, DwarfRegNum<[117]>;
+def Z22 : AArch64Reg<22, "z22", [Q22, Z22_HI]>, DwarfRegNum<[118]>;
+def Z23 : AArch64Reg<23, "z23", [Q23, Z23_HI]>, DwarfRegNum<[119]>;
+def Z24 : AArch64Reg<24, "z24", [Q24, Z24_HI]>, DwarfRegNum<[120]>;
+def Z25 : AArch64Reg<25, "z25", [Q25, Z25_HI]>, DwarfRegNum<[121]>;
+def Z26 : AArch64Reg<26, "z26", [Q26, Z26_HI]>, DwarfRegNum<[122]>;
+def Z27 : AArch64Reg<27, "z27", [Q27, Z27_HI]>, DwarfRegNum<[123]>;
+def Z28 : AArch64Reg<28, "z28", [Q28, Z28_HI]>, DwarfRegNum<[124]>;
+def Z29 : AArch64Reg<29, "z29", [Q29, Z29_HI]>, DwarfRegNum<[125]>;
+def Z30 : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>;
+def Z31 : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>;
+}
+
+// Enum descibing the element size for destructive
+// operations.
+class ElementSizeEnum<bits<3> val> {
+ bits<3> Value = val;
+}
+
+def ElementSizeNone : ElementSizeEnum<0>;
+def ElementSizeB : ElementSizeEnum<1>;
+def ElementSizeH : ElementSizeEnum<2>;
+def ElementSizeS : ElementSizeEnum<3>;
+def ElementSizeD : ElementSizeEnum<4>;
+def ElementSizeQ : ElementSizeEnum<5>; // Unused
+
+class SVERegOp <string Suffix, AsmOperandClass C,
+ ElementSizeEnum Size,
+ RegisterClass RC> : RegisterOperand<RC> {
+ ElementSizeEnum ElementSize;
+
+ let ElementSize = Size;
+ let PrintMethod = !if(!eq(Suffix, ""),
+ "printSVERegOp<>",
+ "printSVERegOp<'" # Suffix # "'>");
+ let ParserMatchClass = C;
+}
+
+class PPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
+ RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {}
+class ZPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
+ RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {}
+
+//******************************************************************************
+
+// SVE predicate register classes.
+class PPRClass<int lastreg> : RegisterClass<
+ "AArch64",
+ [ nxv16i1, nxv8i1, nxv4i1, nxv2i1 ], 16,
+ (sequence "P%u", 0, lastreg)> {
+ let Size = 16;
+}
+
+def PPR : PPRClass<15>;
+def PPR_3b : PPRClass<7>; // Restricted 3 bit SVE predicate register class.
+
+class PPRAsmOperand <string name, string RegClass, int Width>: AsmOperandClass {
+ let Name = "SVE" # name # "Reg";
+ let PredicateMethod = "isSVEPredicateVectorRegOfWidth<"
+ # Width # ", " # "AArch64::" # RegClass # "RegClassID>";
+ let DiagnosticType = "InvalidSVE" # name # "Reg";
+ let RenderMethod = "addRegOperands";
+ let ParserMethod = "tryParseSVEPredicateVector";
+}
+
+def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", "PPR", 0>;
+def PPRAsmOp8 : PPRAsmOperand<"PredicateB", "PPR", 8>;
+def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>;
+def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>;
+def PPRAsmOp64 : PPRAsmOperand<"PredicateD", "PPR", 64>;
+
+def PPRAny : PPRRegOp<"", PPRAsmOpAny, ElementSizeNone, PPR>;
+def PPR8 : PPRRegOp<"b", PPRAsmOp8, ElementSizeB, PPR>;
+def PPR16 : PPRRegOp<"h", PPRAsmOp16, ElementSizeH, PPR>;
+def PPR32 : PPRRegOp<"s", PPRAsmOp32, ElementSizeS, PPR>;
+def PPR64 : PPRRegOp<"d", PPRAsmOp64, ElementSizeD, PPR>;
+
+def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", 0>;
+def PPRAsmOp3b8 : PPRAsmOperand<"Predicate3bB", "PPR_3b", 8>;
+def PPRAsmOp3b16 : PPRAsmOperand<"Predicate3bH", "PPR_3b", 16>;
+def PPRAsmOp3b32 : PPRAsmOperand<"Predicate3bS", "PPR_3b", 32>;
+def PPRAsmOp3b64 : PPRAsmOperand<"Predicate3bD", "PPR_3b", 64>;
+
+def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, ElementSizeNone, PPR_3b>;
+def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, ElementSizeB, PPR_3b>;
+def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, ElementSizeH, PPR_3b>;
+def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, ElementSizeS, PPR_3b>;
+def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, ElementSizeD, PPR_3b>;
+
+//******************************************************************************
+
+// SVE vector register class
+def ZPR : RegisterClass<"AArch64",
+ [nxv16i8, nxv8i16, nxv4i32, nxv2i64,
+ nxv2f16, nxv4f16, nxv8f16,
+ nxv1f32, nxv2f32, nxv4f32,
+ nxv1f64, nxv2f64],
+ 128, (sequence "Z%u", 0, 31)> {
+ let Size = 128;
+}
+
+// SVE restricted 4 bit scalable vector register class
+def ZPR_4b : RegisterClass<"AArch64",
+ [nxv16i8, nxv8i16, nxv4i32, nxv2i64,
+ nxv2f16, nxv4f16, nxv8f16,
+ nxv1f32, nxv2f32, nxv4f32,
+ nxv1f64, nxv2f64],
+ 128, (sequence "Z%u", 0, 15)> {
+ let Size = 128;
+}
+
+// SVE restricted 3 bit scalable vector register class
+def ZPR_3b : RegisterClass<"AArch64",
+ [nxv16i8, nxv8i16, nxv4i32, nxv2i64,
+ nxv2f16, nxv4f16, nxv8f16,
+ nxv1f32, nxv2f32, nxv4f32,
+ nxv1f64, nxv2f64],
+ 128, (sequence "Z%u", 0, 7)> {
+ let Size = 128;
+}
+
+class ZPRAsmOperand<string name, int Width, string RegClassSuffix = "">
+ : AsmOperandClass {
+ let Name = "SVE" # name # "Reg";
+ let PredicateMethod = "isSVEDataVectorRegOfWidth<"
+ # Width # ", AArch64::ZPR"
+ # RegClassSuffix # "RegClassID>";
+ let RenderMethod = "addRegOperands";
+ let DiagnosticType = "InvalidZPR" # RegClassSuffix # Width;
+ let ParserMethod = "tryParseSVEDataVector<false, "
+ # !if(!eq(Width, 0), "false", "true") # ">";
+}
+
+def ZPRAsmOpAny : ZPRAsmOperand<"VectorAny", 0>;
+def ZPRAsmOp8 : ZPRAsmOperand<"VectorB", 8>;
+def ZPRAsmOp16 : ZPRAsmOperand<"VectorH", 16>;
+def ZPRAsmOp32 : ZPRAsmOperand<"VectorS", 32>;
+def ZPRAsmOp64 : ZPRAsmOperand<"VectorD", 64>;
+def ZPRAsmOp128 : ZPRAsmOperand<"VectorQ", 128>;
+
+def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ElementSizeNone, ZPR>;
+def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ElementSizeB, ZPR>;
+def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ElementSizeH, ZPR>;
+def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ElementSizeS, ZPR>;
+def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ElementSizeD, ZPR>;
+def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ElementSizeQ, ZPR>;
+
+def ZPRAsmOp3b8 : ZPRAsmOperand<"Vector3bB", 8, "_3b">;
+def ZPRAsmOp3b16 : ZPRAsmOperand<"Vector3bH", 16, "_3b">;
+def ZPRAsmOp3b32 : ZPRAsmOperand<"Vector3bS", 32, "_3b">;
+
+def ZPR3b8 : ZPRRegOp<"b", ZPRAsmOp3b8, ElementSizeB, ZPR_3b>;
+def ZPR3b16 : ZPRRegOp<"h", ZPRAsmOp3b16, ElementSizeH, ZPR_3b>;
+def ZPR3b32 : ZPRRegOp<"s", ZPRAsmOp3b32, ElementSizeS, ZPR_3b>;
+
+def ZPRAsmOp4b16 : ZPRAsmOperand<"Vector4bH", 16, "_4b">;
+def ZPRAsmOp4b32 : ZPRAsmOperand<"Vector4bS", 32, "_4b">;
+def ZPRAsmOp4b64 : ZPRAsmOperand<"Vector4bD", 64, "_4b">;
+
+def ZPR4b16 : ZPRRegOp<"h", ZPRAsmOp4b16, ElementSizeH, ZPR_4b>;
+def ZPR4b32 : ZPRRegOp<"s", ZPRAsmOp4b32, ElementSizeS, ZPR_4b>;
+def ZPR4b64 : ZPRRegOp<"d", ZPRAsmOp4b64, ElementSizeD, ZPR_4b>;
+
+class FPRasZPR<int Width> : AsmOperandClass{
+ let Name = "FPR" # Width # "asZPR";
+ let PredicateMethod = "isFPRasZPR<AArch64::FPR" # Width # "RegClassID>";
+ let RenderMethod = "addFPRasZPRRegOperands<" # Width # ">";
+}
+
+class FPRasZPROperand<int Width> : RegisterOperand<ZPR> {
+ let ParserMatchClass = FPRasZPR<Width>;
+ let PrintMethod = "printZPRasFPR<" # Width # ">";
+}
+
+def FPR8asZPR : FPRasZPROperand<8>;
+def FPR16asZPR : FPRasZPROperand<16>;
+def FPR32asZPR : FPRasZPROperand<32>;
+def FPR64asZPR : FPRasZPROperand<64>;
+def FPR128asZPR : FPRasZPROperand<128>;
+
+let Namespace = "AArch64" in {
+ def zsub0 : SubRegIndex<128, -1>;
+ def zsub1 : SubRegIndex<128, -1>;
+ def zsub2 : SubRegIndex<128, -1>;
+ def zsub3 : SubRegIndex<128, -1>;
+}
+
+// Pairs, triples, and quads of SVE vector registers.
+def ZSeqPairs : RegisterTuples<[zsub0, zsub1], [(rotl ZPR, 0), (rotl ZPR, 1)]>;
+def ZSeqTriples : RegisterTuples<[zsub0, zsub1, zsub2], [(rotl ZPR, 0), (rotl ZPR, 1), (rotl ZPR, 2)]>;
+def ZSeqQuads : RegisterTuples<[zsub0, zsub1, zsub2, zsub3], [(rotl ZPR, 0), (rotl ZPR, 1), (rotl ZPR, 2), (rotl ZPR, 3)]>;
+
+def ZPR2 : RegisterClass<"AArch64", [untyped], 128, (add ZSeqPairs)> {
+ let Size = 256;
+}
+def ZPR3 : RegisterClass<"AArch64", [untyped], 128, (add ZSeqTriples)> {
+ let Size = 384;
+}
+def ZPR4 : RegisterClass<"AArch64", [untyped], 128, (add ZSeqQuads)> {
+ let Size = 512;
+}
+
+class ZPRVectorList<int ElementWidth, int NumRegs> : AsmOperandClass {
+ let Name = "SVEVectorList" # NumRegs # ElementWidth;
+ let ParserMethod = "tryParseVectorList<RegKind::SVEDataVector>";
+ let PredicateMethod =
+ "isTypedVectorList<RegKind::SVEDataVector, " #NumRegs #", 0, " #ElementWidth #">";
+ let RenderMethod = "addVectorListOperands<AArch64Operand::VecListIdx_ZReg, " # NumRegs # ">";
+}
+
+def Z_b : RegisterOperand<ZPR, "printTypedVectorList<0,'b'>"> {
+ let ParserMatchClass = ZPRVectorList<8, 1>;
+}
+
+def Z_h : RegisterOperand<ZPR, "printTypedVectorList<0,'h'>"> {
+ let ParserMatchClass = ZPRVectorList<16, 1>;
+}
+
+def Z_s : RegisterOperand<ZPR, "printTypedVectorList<0,'s'>"> {
+ let ParserMatchClass = ZPRVectorList<32, 1>;
+}
+
+def Z_d : RegisterOperand<ZPR, "printTypedVectorList<0,'d'>"> {
+ let ParserMatchClass = ZPRVectorList<64, 1>;
+}
+
+def ZZ_b : RegisterOperand<ZPR2, "printTypedVectorList<0,'b'>"> {
+ let ParserMatchClass = ZPRVectorList<8, 2>;
+}
+
+def ZZ_h : RegisterOperand<ZPR2, "printTypedVectorList<0,'h'>"> {
+ let ParserMatchClass = ZPRVectorList<16, 2>;
+}
+
+def ZZ_s : RegisterOperand<ZPR2, "printTypedVectorList<0,'s'>"> {
+ let ParserMatchClass = ZPRVectorList<32, 2>;
+}
+
+def ZZ_d : RegisterOperand<ZPR2, "printTypedVectorList<0,'d'>"> {
+ let ParserMatchClass = ZPRVectorList<64, 2>;
+}
+
+def ZZZ_b : RegisterOperand<ZPR3, "printTypedVectorList<0,'b'>"> {
+ let ParserMatchClass = ZPRVectorList<8, 3>;
+}
+
+def ZZZ_h : RegisterOperand<ZPR3, "printTypedVectorList<0,'h'>"> {
+ let ParserMatchClass = ZPRVectorList<16, 3>;
+}
+
+def ZZZ_s : RegisterOperand<ZPR3, "printTypedVectorList<0,'s'>"> {
+ let ParserMatchClass = ZPRVectorList<32, 3>;
+}
+
+def ZZZ_d : RegisterOperand<ZPR3, "printTypedVectorList<0,'d'>"> {
+ let ParserMatchClass = ZPRVectorList<64, 3>;
+}
+
+def ZZZZ_b : RegisterOperand<ZPR4, "printTypedVectorList<0,'b'>"> {
+ let ParserMatchClass = ZPRVectorList<8, 4>;
+}
+
+def ZZZZ_h : RegisterOperand<ZPR4, "printTypedVectorList<0,'h'>"> {
+ let ParserMatchClass = ZPRVectorList<16, 4>;
+}
+
+def ZZZZ_s : RegisterOperand<ZPR4, "printTypedVectorList<0,'s'>"> {
+ let ParserMatchClass = ZPRVectorList<32, 4>;
+}
+
+def ZZZZ_d : RegisterOperand<ZPR4, "printTypedVectorList<0,'d'>"> {
+ let ParserMatchClass = ZPRVectorList<64, 4>;
+}
+
+class ZPRExtendAsmOperand<string ShiftExtend, int RegWidth, int Scale,
+ bit ScaleAlwaysSame = 0b0> : AsmOperandClass {
+ let Name = "ZPRExtend" # ShiftExtend # RegWidth # Scale
+ # !if(ScaleAlwaysSame, "Only", "");
+
+ let PredicateMethod = "isSVEDataVectorRegWithShiftExtend<"
+ # RegWidth # ", AArch64::ZPRRegClassID, "
+ # "AArch64_AM::" # ShiftExtend # ", "
+ # Scale # ", "
+ # !if(ScaleAlwaysSame, "true", "false")
+ # ">";
+ let DiagnosticType = "InvalidZPR" # RegWidth # ShiftExtend # Scale;
+ let RenderMethod = "addRegOperands";
+ let ParserMethod = "tryParseSVEDataVector<true, true>";
+}
+
+class ZPRExtendRegisterOperand<bit SignExtend, bit IsLSL, string Repr,
+ int RegWidth, int Scale, string Suffix = "">
+ : RegisterOperand<ZPR> {
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("ZPR" # RegWidth # "AsmOpndExt" # Repr # Scale # Suffix);
+ let PrintMethod = "printRegWithShiftExtend<"
+ # !if(SignExtend, "true", "false") # ", "
+ # Scale # ", "
+ # !if(IsLSL, "'x'", "'w'") # ", "
+ # !if(!eq(RegWidth, 32), "'s'", "'d'") # ">";
+}
+
+foreach RegWidth = [32, 64] in {
+ // UXTW(8|16|32|64)
+ def ZPR#RegWidth#AsmOpndExtUXTW8Only : ZPRExtendAsmOperand<"UXTW", RegWidth, 8, 0b1>;
+ def ZPR#RegWidth#AsmOpndExtUXTW8 : ZPRExtendAsmOperand<"UXTW", RegWidth, 8>;
+ def ZPR#RegWidth#AsmOpndExtUXTW16 : ZPRExtendAsmOperand<"UXTW", RegWidth, 16>;
+ def ZPR#RegWidth#AsmOpndExtUXTW32 : ZPRExtendAsmOperand<"UXTW", RegWidth, 32>;
+ def ZPR#RegWidth#AsmOpndExtUXTW64 : ZPRExtendAsmOperand<"UXTW", RegWidth, 64>;
+
+ def ZPR#RegWidth#ExtUXTW8Only : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 8, "Only">;
+ def ZPR#RegWidth#ExtUXTW8 : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 8>;
+ def ZPR#RegWidth#ExtUXTW16 : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 16>;
+ def ZPR#RegWidth#ExtUXTW32 : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 32>;
+ def ZPR#RegWidth#ExtUXTW64 : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 64>;
+
+ // SXTW(8|16|32|64)
+ def ZPR#RegWidth#AsmOpndExtSXTW8Only : ZPRExtendAsmOperand<"SXTW", RegWidth, 8, 0b1>;
+ def ZPR#RegWidth#AsmOpndExtSXTW8 : ZPRExtendAsmOperand<"SXTW", RegWidth, 8>;
+ def ZPR#RegWidth#AsmOpndExtSXTW16 : ZPRExtendAsmOperand<"SXTW", RegWidth, 16>;
+ def ZPR#RegWidth#AsmOpndExtSXTW32 : ZPRExtendAsmOperand<"SXTW", RegWidth, 32>;
+ def ZPR#RegWidth#AsmOpndExtSXTW64 : ZPRExtendAsmOperand<"SXTW", RegWidth, 64>;
+
+ def ZPR#RegWidth#ExtSXTW8Only : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 8, "Only">;
+ def ZPR#RegWidth#ExtSXTW8 : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 8>;
+ def ZPR#RegWidth#ExtSXTW16 : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 16>;
+ def ZPR#RegWidth#ExtSXTW32 : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 32>;
+ def ZPR#RegWidth#ExtSXTW64 : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 64>;
+
+ // LSL(8|16|32|64)
+ def ZPR#RegWidth#AsmOpndExtLSL8 : ZPRExtendAsmOperand<"LSL", RegWidth, 8>;
+ def ZPR#RegWidth#AsmOpndExtLSL16 : ZPRExtendAsmOperand<"LSL", RegWidth, 16>;
+ def ZPR#RegWidth#AsmOpndExtLSL32 : ZPRExtendAsmOperand<"LSL", RegWidth, 32>;
+ def ZPR#RegWidth#AsmOpndExtLSL64 : ZPRExtendAsmOperand<"LSL", RegWidth, 64>;
+ def ZPR#RegWidth#ExtLSL8 : ZPRExtendRegisterOperand<0b0, 0b1, "LSL", RegWidth, 8>;
+ def ZPR#RegWidth#ExtLSL16 : ZPRExtendRegisterOperand<0b0, 0b1, "LSL", RegWidth, 16>;
+ def ZPR#RegWidth#ExtLSL32 : ZPRExtendRegisterOperand<0b0, 0b1, "LSL", RegWidth, 32>;
+ def ZPR#RegWidth#ExtLSL64 : ZPRExtendRegisterOperand<0b0, 0b1, "LSL", RegWidth, 64>;
+}
+
+class GPR64ShiftExtendAsmOperand <string AsmOperandName, int Scale, string RegClass> : AsmOperandClass {
+ let Name = AsmOperandName # Scale;
+ let PredicateMethod = "isGPR64WithShiftExtend<AArch64::"#RegClass#"RegClassID, " # Scale # ">";
+ let DiagnosticType = "Invalid" # AsmOperandName # Scale;
+ let RenderMethod = "addRegOperands";
+ let ParserMethod = "tryParseGPROperand<true>";
+}
+
+class GPR64ExtendRegisterOperand<string Name, int Scale, RegisterClass RegClass> : RegisterOperand<RegClass>{
+ let ParserMatchClass = !cast<AsmOperandClass>(Name);
+ let PrintMethod = "printRegWithShiftExtend<false, " # Scale # ", 'x', 0>";
+}
+
+foreach Scale = [8, 16, 32, 64] in {
+ def GPR64shiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64shifted", Scale, "GPR64">;
+ def GPR64shifted # Scale : GPR64ExtendRegisterOperand<"GPR64shiftedAsmOpnd" # Scale, Scale, GPR64>;
+
+ def GPR64NoXZRshiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64NoXZRshifted", Scale, "GPR64common">;
+ def GPR64NoXZRshifted # Scale : GPR64ExtendRegisterOperand<"GPR64NoXZRshiftedAsmOpnd" # Scale, Scale, GPR64common>;
+}
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td
new file mode 100644
index 000000000..0fde68011
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td
@@ -0,0 +1,1024 @@
+//=- AArch64SVEInstrInfo.td - AArch64 SVE Instructions -*- tablegen -*-----=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Vector Extension (SVE) Instruction definitions.
+//
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasSVE] in {
+
+ def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr">;
+ def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
+ def RDFFR_P : sve_int_rdffr_unpred<"rdffr">;
+ def SETFFR : sve_int_setffr<"setffr">;
+ def WRFFR : sve_int_wrffr<"wrffr">;
+
+ defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add">;
+ defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub">;
+ defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd">;
+ defm UQADD_ZZZ : sve_int_bin_cons_arit_0<0b101, "uqadd">;
+ defm SQSUB_ZZZ : sve_int_bin_cons_arit_0<0b110, "sqsub">;
+ defm UQSUB_ZZZ : sve_int_bin_cons_arit_0<0b111, "uqsub">;
+
+ def AND_ZZZ : sve_int_bin_cons_log<0b00, "and">;
+ def ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr">;
+ def EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor">;
+ def BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic">;
+
+ defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add">;
+ defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub">;
+ defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr">;
+
+ defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr">;
+ defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor">;
+ defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and">;
+ defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic">;
+
+ defm ADD_ZI : sve_int_arith_imm0<0b000, "add">;
+ defm SUB_ZI : sve_int_arith_imm0<0b001, "sub">;
+ defm SUBR_ZI : sve_int_arith_imm0<0b011, "subr">;
+ defm SQADD_ZI : sve_int_arith_imm0<0b100, "sqadd">;
+ defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd">;
+ defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub">;
+ defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub">;
+
+ defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad">;
+ defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb">;
+ defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla">;
+ defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls">;
+
+ // SVE predicated integer reductions.
+ defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv">;
+ defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv">;
+ defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv">;
+ defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv">;
+ defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv">;
+ defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv">;
+ defm ORV_VPZ : sve_int_reduce_2<0b000, "orv">;
+ defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv">;
+ defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv">;
+
+ defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn">;
+ defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon">;
+ defm AND_ZI : sve_int_log_imm<0b10, "and", "bic">;
+
+ defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", simm8>;
+ defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", simm8>;
+ defm UMAX_ZI : sve_int_arith_imm1<0b01, "umax", imm0_255>;
+ defm UMIN_ZI : sve_int_arith_imm1<0b11, "umin", imm0_255>;
+
+ defm MUL_ZI : sve_int_arith_imm2<"mul">;
+ defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul">;
+ defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh">;
+ defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh">;
+
+ defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv">;
+ defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv">;
+ defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr">;
+ defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr">;
+
+ defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot">;
+ defm UDOT_ZZZ : sve_intx_dot<0b1, "udot">;
+
+ defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot">;
+ defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot">;
+
+ defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb">;
+ defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb">;
+ defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth">;
+ defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth">;
+ defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw">;
+ defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw">;
+ defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs">;
+ defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg">;
+
+ defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls">;
+ defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz">;
+ defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt">;
+ defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot">;
+ defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not">;
+ defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs">;
+ defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg">;
+
+ defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax">;
+ defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax">;
+ defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin">;
+ defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin">;
+ defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd">;
+ defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd">;
+
+ defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe">;
+ defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte">;
+
+ defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", sve_fpimm_half_one>;
+ defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", sve_fpimm_half_one>;
+ defm FMUL_ZPmI : sve_fp_2op_i_p_zds<0b010, "fmul", sve_fpimm_half_two>;
+ defm FSUBR_ZPmI : sve_fp_2op_i_p_zds<0b011, "fsubr", sve_fpimm_half_one>;
+ defm FMAXNM_ZPmI : sve_fp_2op_i_p_zds<0b100, "fmaxnm", sve_fpimm_zero_one>;
+ defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", sve_fpimm_zero_one>;
+ defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", sve_fpimm_zero_one>;
+ defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", sve_fpimm_zero_one>;
+
+ defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd">;
+ defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub">;
+ defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul">;
+ defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr">;
+ defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm">;
+ defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm">;
+ defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax">;
+ defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin">;
+ defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd">;
+ defm FSCALE_ZPmZ : sve_fp_2op_p_zds<0b1001, "fscale">;
+ defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx">;
+ defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr">;
+ defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv">;
+
+ defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd">;
+ defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub">;
+ defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul">;
+ defm FTSMUL_ZZZ : sve_fp_3op_u_zd<0b011, "ftsmul">;
+ defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps">;
+ defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts">;
+
+ defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel">;
+
+ defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd">;
+ defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla">;
+
+ defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla">;
+ defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls">;
+ defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla">;
+ defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls">;
+
+ defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad">;
+ defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb">;
+ defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad">;
+ defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb">;
+
+ defm FTMAD_ZZI : sve_fp_ftmad<"ftmad">;
+
+ defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla">;
+ defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls">;
+
+ defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla">;
+ defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul">;
+
+ // SVE floating point reductions.
+ defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda">;
+ defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv">;
+ defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv">;
+ defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv">;
+ defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv">;
+ defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv">;
+
+ // Splat immediate (unpredicated)
+ defm DUP_ZI : sve_int_dup_imm<"dup">;
+ defm FDUP_ZI : sve_int_dup_fpimm<"fdup">;
+ defm DUPM_ZI : sve_int_dup_mask_imm<"dupm">;
+
+ // Splat immediate (predicated)
+ defm CPY_ZPmI : sve_int_dup_imm_pred_merge<"cpy">;
+ defm CPY_ZPzI : sve_int_dup_imm_pred_zero<"cpy">;
+ defm FCPY_ZPmI : sve_int_dup_fpimm_pred<"fcpy">;
+
+ // Splat scalar register (unpredicated, GPR or vector + element index)
+ defm DUP_ZR : sve_int_perm_dup_r<"dup">;
+ defm DUP_ZZI : sve_int_perm_dup_i<"dup">;
+
+ // Splat scalar register (predicated)
+ defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy">;
+ defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy">;
+
+ // Select elements from either vector (predicated)
+ defm SEL_ZPZZ : sve_int_sel_vvv<"sel">;
+
+ defm SPLICE_ZPZ : sve_int_perm_splice<"splice">;
+ defm COMPACT_ZPZ : sve_int_perm_compact<"compact">;
+ defm INSR_ZR : sve_int_perm_insrs<"insr">;
+ defm INSR_ZV : sve_int_perm_insrv<"insr">;
+ def EXT_ZZI : sve_int_perm_extract_i<"ext">;
+
+ defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit">;
+ defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb">;
+ defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh">;
+ defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw">;
+
+ defm REV_PP : sve_int_perm_reverse_p<"rev">;
+ defm REV_ZZ : sve_int_perm_reverse_z<"rev">;
+
+ defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo">;
+ defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi">;
+ defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo">;
+ defm UUNPKHI_ZZ : sve_int_perm_unpk<0b11, "uunpkhi">;
+
+ def PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo">;
+ def PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi">;
+
+ defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">;
+ defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">;
+ def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>;
+ def FEXPA_ZZ_H : sve_int_bin_cons_misc_0_c<0b01000000, "fexpa", ZPR16>;
+ def FEXPA_ZZ_S : sve_int_bin_cons_misc_0_c<0b10000000, "fexpa", ZPR32>;
+ def FEXPA_ZZ_D : sve_int_bin_cons_misc_0_c<0b11000000, "fexpa", ZPR64>;
+
+ def BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa">;
+ def BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas">;
+ def BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb">;
+ def BRKPBS_PPzPP : sve_int_brkp<0b11, "brkpbs">;
+
+ def BRKN_PPzP : sve_int_brkn<0b0, "brkn">;
+ def BRKNS_PPzP : sve_int_brkn<0b1, "brkns">;
+
+ defm BRKA_PPzP : sve_int_break_z<0b000, "brka">;
+ defm BRKA_PPmP : sve_int_break_m<0b001, "brka">;
+ defm BRKAS_PPzP : sve_int_break_z<0b010, "brkas">;
+ defm BRKB_PPzP : sve_int_break_z<0b100, "brkb">;
+ defm BRKB_PPmP : sve_int_break_m<0b101, "brkb">;
+ defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs">;
+
+ def PTEST_PP : sve_int_ptest<0b010000, "ptest">;
+ def PFALSE : sve_int_pfalse<0b000000, "pfalse">;
+ defm PFIRST : sve_int_pfirst<0b00000, "pfirst">;
+ defm PNEXT : sve_int_pnext<0b00110, "pnext">;
+
+ def AND_PPzPP : sve_int_pred_log<0b0000, "and">;
+ def BIC_PPzPP : sve_int_pred_log<0b0001, "bic">;
+ def EOR_PPzPP : sve_int_pred_log<0b0010, "eor">;
+ def SEL_PPPP : sve_int_pred_log<0b0011, "sel">;
+ def ANDS_PPzPP : sve_int_pred_log<0b0100, "ands">;
+ def BICS_PPzPP : sve_int_pred_log<0b0101, "bics">;
+ def EORS_PPzPP : sve_int_pred_log<0b0110, "eors">;
+ def ORR_PPzPP : sve_int_pred_log<0b1000, "orr">;
+ def ORN_PPzPP : sve_int_pred_log<0b1001, "orn">;
+ def NOR_PPzPP : sve_int_pred_log<0b1010, "nor">;
+ def NAND_PPzPP : sve_int_pred_log<0b1011, "nand">;
+ def ORRS_PPzPP : sve_int_pred_log<0b1100, "orrs">;
+ def ORNS_PPzPP : sve_int_pred_log<0b1101, "orns">;
+ def NORS_PPzPP : sve_int_pred_log<0b1110, "nors">;
+ def NANDS_PPzPP : sve_int_pred_log<0b1111, "nands">;
+
+ defm CLASTA_RPZ : sve_int_perm_clast_rz<0, "clasta">;
+ defm CLASTB_RPZ : sve_int_perm_clast_rz<1, "clastb">;
+ defm CLASTA_VPZ : sve_int_perm_clast_vz<0, "clasta">;
+ defm CLASTB_VPZ : sve_int_perm_clast_vz<1, "clastb">;
+ defm CLASTA_ZPZ : sve_int_perm_clast_zz<0, "clasta">;
+ defm CLASTB_ZPZ : sve_int_perm_clast_zz<1, "clastb">;
+
+ defm LASTA_RPZ : sve_int_perm_last_r<0, "lasta">;
+ defm LASTB_RPZ : sve_int_perm_last_r<1, "lastb">;
+ defm LASTA_VPZ : sve_int_perm_last_v<0, "lasta">;
+ defm LASTB_VPZ : sve_int_perm_last_v<1, "lastb">;
+
+ // continuous load with reg+immediate
+ defm LD1B_IMM : sve_mem_cld_si<0b0000, "ld1b", Z_b, ZPR8>;
+ defm LD1B_H_IMM : sve_mem_cld_si<0b0001, "ld1b", Z_h, ZPR16>;
+ defm LD1B_S_IMM : sve_mem_cld_si<0b0010, "ld1b", Z_s, ZPR32>;
+ defm LD1B_D_IMM : sve_mem_cld_si<0b0011, "ld1b", Z_d, ZPR64>;
+ defm LD1SW_D_IMM : sve_mem_cld_si<0b0100, "ld1sw", Z_d, ZPR64>;
+ defm LD1H_IMM : sve_mem_cld_si<0b0101, "ld1h", Z_h, ZPR16>;
+ defm LD1H_S_IMM : sve_mem_cld_si<0b0110, "ld1h", Z_s, ZPR32>;
+ defm LD1H_D_IMM : sve_mem_cld_si<0b0111, "ld1h", Z_d, ZPR64>;
+ defm LD1SH_D_IMM : sve_mem_cld_si<0b1000, "ld1sh", Z_d, ZPR64>;
+ defm LD1SH_S_IMM : sve_mem_cld_si<0b1001, "ld1sh", Z_s, ZPR32>;
+ defm LD1W_IMM : sve_mem_cld_si<0b1010, "ld1w", Z_s, ZPR32>;
+ defm LD1W_D_IMM : sve_mem_cld_si<0b1011, "ld1w", Z_d, ZPR64>;
+ defm LD1SB_D_IMM : sve_mem_cld_si<0b1100, "ld1sb", Z_d, ZPR64>;
+ defm LD1SB_S_IMM : sve_mem_cld_si<0b1101, "ld1sb", Z_s, ZPR32>;
+ defm LD1SB_H_IMM : sve_mem_cld_si<0b1110, "ld1sb", Z_h, ZPR16>;
+ defm LD1D_IMM : sve_mem_cld_si<0b1111, "ld1d", Z_d, ZPR64>;
+
+ // LD1R loads (splat scalar to vector)
+ defm LD1RB_IMM : sve_mem_ld_dup<0b00, 0b00, "ld1rb", Z_b, ZPR8, uimm6s1>;
+ defm LD1RB_H_IMM : sve_mem_ld_dup<0b00, 0b01, "ld1rb", Z_h, ZPR16, uimm6s1>;
+ defm LD1RB_S_IMM : sve_mem_ld_dup<0b00, 0b10, "ld1rb", Z_s, ZPR32, uimm6s1>;
+ defm LD1RB_D_IMM : sve_mem_ld_dup<0b00, 0b11, "ld1rb", Z_d, ZPR64, uimm6s1>;
+ defm LD1RSW_IMM : sve_mem_ld_dup<0b01, 0b00, "ld1rsw", Z_d, ZPR64, uimm6s4>;
+ defm LD1RH_IMM : sve_mem_ld_dup<0b01, 0b01, "ld1rh", Z_h, ZPR16, uimm6s2>;
+ defm LD1RH_S_IMM : sve_mem_ld_dup<0b01, 0b10, "ld1rh", Z_s, ZPR32, uimm6s2>;
+ defm LD1RH_D_IMM : sve_mem_ld_dup<0b01, 0b11, "ld1rh", Z_d, ZPR64, uimm6s2>;
+ defm LD1RSH_D_IMM : sve_mem_ld_dup<0b10, 0b00, "ld1rsh", Z_d, ZPR64, uimm6s2>;
+ defm LD1RSH_S_IMM : sve_mem_ld_dup<0b10, 0b01, "ld1rsh", Z_s, ZPR32, uimm6s2>;
+ defm LD1RW_IMM : sve_mem_ld_dup<0b10, 0b10, "ld1rw", Z_s, ZPR32, uimm6s4>;
+ defm LD1RW_D_IMM : sve_mem_ld_dup<0b10, 0b11, "ld1rw", Z_d, ZPR64, uimm6s4>;
+ defm LD1RSB_D_IMM : sve_mem_ld_dup<0b11, 0b00, "ld1rsb", Z_d, ZPR64, uimm6s1>;
+ defm LD1RSB_S_IMM : sve_mem_ld_dup<0b11, 0b01, "ld1rsb", Z_s, ZPR32, uimm6s1>;
+ defm LD1RSB_H_IMM : sve_mem_ld_dup<0b11, 0b10, "ld1rsb", Z_h, ZPR16, uimm6s1>;
+ defm LD1RD_IMM : sve_mem_ld_dup<0b11, 0b11, "ld1rd", Z_d, ZPR64, uimm6s8>;
+
+ // LD1RQ loads (load quadword-vector and splat to scalable vector)
+ defm LD1RQ_B_IMM : sve_mem_ldqr_si<0b00, "ld1rqb", Z_b, ZPR8>;
+ defm LD1RQ_H_IMM : sve_mem_ldqr_si<0b01, "ld1rqh", Z_h, ZPR16>;
+ defm LD1RQ_W_IMM : sve_mem_ldqr_si<0b10, "ld1rqw", Z_s, ZPR32>;
+ defm LD1RQ_D_IMM : sve_mem_ldqr_si<0b11, "ld1rqd", Z_d, ZPR64>;
+ defm LD1RQ_B : sve_mem_ldqr_ss<0b00, "ld1rqb", Z_b, ZPR8, GPR64NoXZRshifted8>;
+ defm LD1RQ_H : sve_mem_ldqr_ss<0b01, "ld1rqh", Z_h, ZPR16, GPR64NoXZRshifted16>;
+ defm LD1RQ_W : sve_mem_ldqr_ss<0b10, "ld1rqw", Z_s, ZPR32, GPR64NoXZRshifted32>;
+ defm LD1RQ_D : sve_mem_ldqr_ss<0b11, "ld1rqd", Z_d, ZPR64, GPR64NoXZRshifted64>;
+
+ // continuous load with reg+reg addressing.
+ defm LD1B : sve_mem_cld_ss<0b0000, "ld1b", Z_b, ZPR8, GPR64NoXZRshifted8>;
+ defm LD1B_H : sve_mem_cld_ss<0b0001, "ld1b", Z_h, ZPR16, GPR64NoXZRshifted8>;
+ defm LD1B_S : sve_mem_cld_ss<0b0010, "ld1b", Z_s, ZPR32, GPR64NoXZRshifted8>;
+ defm LD1B_D : sve_mem_cld_ss<0b0011, "ld1b", Z_d, ZPR64, GPR64NoXZRshifted8>;
+ defm LD1SW_D : sve_mem_cld_ss<0b0100, "ld1sw", Z_d, ZPR64, GPR64NoXZRshifted32>;
+ defm LD1H : sve_mem_cld_ss<0b0101, "ld1h", Z_h, ZPR16, GPR64NoXZRshifted16>;
+ defm LD1H_S : sve_mem_cld_ss<0b0110, "ld1h", Z_s, ZPR32, GPR64NoXZRshifted16>;
+ defm LD1H_D : sve_mem_cld_ss<0b0111, "ld1h", Z_d, ZPR64, GPR64NoXZRshifted16>;
+ defm LD1SH_D : sve_mem_cld_ss<0b1000, "ld1sh", Z_d, ZPR64, GPR64NoXZRshifted16>;
+ defm LD1SH_S : sve_mem_cld_ss<0b1001, "ld1sh", Z_s, ZPR32, GPR64NoXZRshifted16>;
+ defm LD1W : sve_mem_cld_ss<0b1010, "ld1w", Z_s, ZPR32, GPR64NoXZRshifted32>;
+ defm LD1W_D : sve_mem_cld_ss<0b1011, "ld1w", Z_d, ZPR64, GPR64NoXZRshifted32>;
+ defm LD1SB_D : sve_mem_cld_ss<0b1100, "ld1sb", Z_d, ZPR64, GPR64NoXZRshifted8>;
+ defm LD1SB_S : sve_mem_cld_ss<0b1101, "ld1sb", Z_s, ZPR32, GPR64NoXZRshifted8>;
+ defm LD1SB_H : sve_mem_cld_ss<0b1110, "ld1sb", Z_h, ZPR16, GPR64NoXZRshifted8>;
+ defm LD1D : sve_mem_cld_ss<0b1111, "ld1d", Z_d, ZPR64, GPR64NoXZRshifted64>;
+
+ // non-faulting continuous load with reg+immediate
+ defm LDNF1B_IMM : sve_mem_cldnf_si<0b0000, "ldnf1b", Z_b, ZPR8>;
+ defm LDNF1B_H_IMM : sve_mem_cldnf_si<0b0001, "ldnf1b", Z_h, ZPR16>;
+ defm LDNF1B_S_IMM : sve_mem_cldnf_si<0b0010, "ldnf1b", Z_s, ZPR32>;
+ defm LDNF1B_D_IMM : sve_mem_cldnf_si<0b0011, "ldnf1b", Z_d, ZPR64>;
+ defm LDNF1SW_D_IMM : sve_mem_cldnf_si<0b0100, "ldnf1sw", Z_d, ZPR64>;
+ defm LDNF1H_IMM : sve_mem_cldnf_si<0b0101, "ldnf1h", Z_h, ZPR16>;
+ defm LDNF1H_S_IMM : sve_mem_cldnf_si<0b0110, "ldnf1h", Z_s, ZPR32>;
+ defm LDNF1H_D_IMM : sve_mem_cldnf_si<0b0111, "ldnf1h", Z_d, ZPR64>;
+ defm LDNF1SH_D_IMM : sve_mem_cldnf_si<0b1000, "ldnf1sh", Z_d, ZPR64>;
+ defm LDNF1SH_S_IMM : sve_mem_cldnf_si<0b1001, "ldnf1sh", Z_s, ZPR32>;
+ defm LDNF1W_IMM : sve_mem_cldnf_si<0b1010, "ldnf1w", Z_s, ZPR32>;
+ defm LDNF1W_D_IMM : sve_mem_cldnf_si<0b1011, "ldnf1w", Z_d, ZPR64>;
+ defm LDNF1SB_D_IMM : sve_mem_cldnf_si<0b1100, "ldnf1sb", Z_d, ZPR64>;
+ defm LDNF1SB_S_IMM : sve_mem_cldnf_si<0b1101, "ldnf1sb", Z_s, ZPR32>;
+ defm LDNF1SB_H_IMM : sve_mem_cldnf_si<0b1110, "ldnf1sb", Z_h, ZPR16>;
+ defm LDNF1D_IMM : sve_mem_cldnf_si<0b1111, "ldnf1d", Z_d, ZPR64>;
+
+ // First-faulting loads with reg+reg addressing.
+ defm LDFF1B : sve_mem_cldff_ss<0b0000, "ldff1b", Z_b, ZPR8, GPR64shifted8>;
+ defm LDFF1B_H : sve_mem_cldff_ss<0b0001, "ldff1b", Z_h, ZPR16, GPR64shifted8>;
+ defm LDFF1B_S : sve_mem_cldff_ss<0b0010, "ldff1b", Z_s, ZPR32, GPR64shifted8>;
+ defm LDFF1B_D : sve_mem_cldff_ss<0b0011, "ldff1b", Z_d, ZPR64, GPR64shifted8>;
+ defm LDFF1SW_D : sve_mem_cldff_ss<0b0100, "ldff1sw", Z_d, ZPR64, GPR64shifted32>;
+ defm LDFF1H : sve_mem_cldff_ss<0b0101, "ldff1h", Z_h, ZPR16, GPR64shifted16>;
+ defm LDFF1H_S : sve_mem_cldff_ss<0b0110, "ldff1h", Z_s, ZPR32, GPR64shifted16>;
+ defm LDFF1H_D : sve_mem_cldff_ss<0b0111, "ldff1h", Z_d, ZPR64, GPR64shifted16>;
+ defm LDFF1SH_D : sve_mem_cldff_ss<0b1000, "ldff1sh", Z_d, ZPR64, GPR64shifted16>;
+ defm LDFF1SH_S : sve_mem_cldff_ss<0b1001, "ldff1sh", Z_s, ZPR32, GPR64shifted16>;
+ defm LDFF1W : sve_mem_cldff_ss<0b1010, "ldff1w", Z_s, ZPR32, GPR64shifted32>;
+ defm LDFF1W_D : sve_mem_cldff_ss<0b1011, "ldff1w", Z_d, ZPR64, GPR64shifted32>;
+ defm LDFF1SB_D : sve_mem_cldff_ss<0b1100, "ldff1sb", Z_d, ZPR64, GPR64shifted8>;
+ defm LDFF1SB_S : sve_mem_cldff_ss<0b1101, "ldff1sb", Z_s, ZPR32, GPR64shifted8>;
+ defm LDFF1SB_H : sve_mem_cldff_ss<0b1110, "ldff1sb", Z_h, ZPR16, GPR64shifted8>;
+ defm LDFF1D : sve_mem_cldff_ss<0b1111, "ldff1d", Z_d, ZPR64, GPR64shifted64>;
+
+ // LD(2|3|4) structured loads with reg+immediate
+ defm LD2B_IMM : sve_mem_eld_si<0b00, 0b01, ZZ_b, "ld2b", simm4s2>;
+ defm LD3B_IMM : sve_mem_eld_si<0b00, 0b10, ZZZ_b, "ld3b", simm4s3>;
+ defm LD4B_IMM : sve_mem_eld_si<0b00, 0b11, ZZZZ_b, "ld4b", simm4s4>;
+ defm LD2H_IMM : sve_mem_eld_si<0b01, 0b01, ZZ_h, "ld2h", simm4s2>;
+ defm LD3H_IMM : sve_mem_eld_si<0b01, 0b10, ZZZ_h, "ld3h", simm4s3>;
+ defm LD4H_IMM : sve_mem_eld_si<0b01, 0b11, ZZZZ_h, "ld4h", simm4s4>;
+ defm LD2W_IMM : sve_mem_eld_si<0b10, 0b01, ZZ_s, "ld2w", simm4s2>;
+ defm LD3W_IMM : sve_mem_eld_si<0b10, 0b10, ZZZ_s, "ld3w", simm4s3>;
+ defm LD4W_IMM : sve_mem_eld_si<0b10, 0b11, ZZZZ_s, "ld4w", simm4s4>;
+ defm LD2D_IMM : sve_mem_eld_si<0b11, 0b01, ZZ_d, "ld2d", simm4s2>;
+ defm LD3D_IMM : sve_mem_eld_si<0b11, 0b10, ZZZ_d, "ld3d", simm4s3>;
+ defm LD4D_IMM : sve_mem_eld_si<0b11, 0b11, ZZZZ_d, "ld4d", simm4s4>;
+
+ // LD(2|3|4) structured loads (register + register)
+ def LD2B : sve_mem_eld_ss<0b00, 0b01, ZZ_b, "ld2b", GPR64NoXZRshifted8>;
+ def LD3B : sve_mem_eld_ss<0b00, 0b10, ZZZ_b, "ld3b", GPR64NoXZRshifted8>;
+ def LD4B : sve_mem_eld_ss<0b00, 0b11, ZZZZ_b, "ld4b", GPR64NoXZRshifted8>;
+ def LD2H : sve_mem_eld_ss<0b01, 0b01, ZZ_h, "ld2h", GPR64NoXZRshifted16>;
+ def LD3H : sve_mem_eld_ss<0b01, 0b10, ZZZ_h, "ld3h", GPR64NoXZRshifted16>;
+ def LD4H : sve_mem_eld_ss<0b01, 0b11, ZZZZ_h, "ld4h", GPR64NoXZRshifted16>;
+ def LD2W : sve_mem_eld_ss<0b10, 0b01, ZZ_s, "ld2w", GPR64NoXZRshifted32>;
+ def LD3W : sve_mem_eld_ss<0b10, 0b10, ZZZ_s, "ld3w", GPR64NoXZRshifted32>;
+ def LD4W : sve_mem_eld_ss<0b10, 0b11, ZZZZ_s, "ld4w", GPR64NoXZRshifted32>;
+ def LD2D : sve_mem_eld_ss<0b11, 0b01, ZZ_d, "ld2d", GPR64NoXZRshifted64>;
+ def LD3D : sve_mem_eld_ss<0b11, 0b10, ZZZ_d, "ld3d", GPR64NoXZRshifted64>;
+ def LD4D : sve_mem_eld_ss<0b11, 0b11, ZZZZ_d, "ld4d", GPR64NoXZRshifted64>;
+
+ // Gathers using unscaled 32-bit offsets, e.g.
+ // ld1h z0.s, p0/z, [x0, z0.s, uxtw]
+ defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
+ defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
+ defm GLD1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0010, "ld1b", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
+ defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
+ defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
+ defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
+ defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
+ defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
+ defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
+ defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
+
+ // Gathers using scaled 32-bit offsets, e.g.
+ // ld1h z0.s, p0/z, [x0, z0.s, uxtw #1]
+ defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
+ defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
+ defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
+ defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
+ defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", ZPR32ExtSXTW32, ZPR32ExtUXTW32>;
+ defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", ZPR32ExtSXTW32, ZPR32ExtUXTW32>;
+
+ // Gathers using scaled 32-bit pointers with offset, e.g.
+ // ld1h z0.s, p0/z, [z0.s, #16]
+ defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31>;
+ defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31>;
+ defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31>;
+ defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31>;
+ defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2>;
+ defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2>;
+ defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2>;
+ defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2>;
+ defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4>;
+ defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4>;
+
+ // Gathers using scaled 64-bit pointers with offset, e.g.
+ // ld1h z0.d, p0/z, [z0.d, #16]
+ defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31>;
+ defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31>;
+ defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31>;
+ defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31>;
+ defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2>;
+ defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2>;
+ defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2>;
+ defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4>;
+ defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4>;
+ defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4>;
+ defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8>;
+ defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8>;
+
+ // Gathers using unscaled 64-bit offsets, e.g.
+ // ld1h z0.d, p0/z, [x0, z0.d]
+ defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb">;
+ defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb">;
+ defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b">;
+ defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b">;
+ defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh">;
+ defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh">;
+ defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h">;
+ defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h">;
+ defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw">;
+ defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw">;
+ defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w">;
+ defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w">;
+ defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d">;
+ defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d">;
+
+ // Gathers using scaled 64-bit offsets, e.g.
+ // ld1h z0.d, p0/z, [x0, z0.d, lsl #1]
+ defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", ZPR64ExtLSL16>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", ZPR64ExtLSL16>;
+ defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", ZPR64ExtLSL16>;
+ defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", ZPR64ExtLSL16>;
+ defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", ZPR64ExtLSL32>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", ZPR64ExtLSL32>;
+ defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", ZPR64ExtLSL32>;
+ defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", ZPR64ExtLSL32>;
+ defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", ZPR64ExtLSL64>;
+ defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", ZPR64ExtLSL64>;
+
+ // Gathers using unscaled 32-bit offsets unpacked in 64-bits elements, e.g.
+ // ld1h z0.d, p0/z, [x0, z0.d, uxtw]
+ defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
+ defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
+ defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
+ defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
+ defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
+ defm GLD1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0110, "ld1h", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
+ defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
+ defm GLD1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1000, "ld1sw", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
+ defm GLD1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1010, "ld1w", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
+ defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
+ defm GLD1D : sve_mem_64b_gld_vs_32_unscaled<0b1110, "ld1d", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
+ defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
+
+ // Gathers using scaled 32-bit offsets unpacked in 64-bits elements, e.g.
+ // ld1h z0.d, p0/z, [x0, z0.d, uxtw #1]
+ defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
+ defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh",ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
+ defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
+ defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
+ defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
+ defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw",ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
+ defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
+ defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
+ defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", ZPR64ExtSXTW64, ZPR64ExtUXTW64>;
+ defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", ZPR64ExtSXTW64, ZPR64ExtUXTW64>;
+
+ // Non-temporal contiguous loads (register + immediate)
+ defm LDNT1B_ZRI : sve_mem_cldnt_si<0b00, "ldnt1b", Z_b, ZPR8>;
+ defm LDNT1H_ZRI : sve_mem_cldnt_si<0b01, "ldnt1h", Z_h, ZPR16>;
+ defm LDNT1W_ZRI : sve_mem_cldnt_si<0b10, "ldnt1w", Z_s, ZPR32>;
+ defm LDNT1D_ZRI : sve_mem_cldnt_si<0b11, "ldnt1d", Z_d, ZPR64>;
+
+ // Non-temporal contiguous loads (register + register)
+ defm LDNT1B_ZRR : sve_mem_cldnt_ss<0b00, "ldnt1b", Z_b, ZPR8, GPR64NoXZRshifted8>;
+ defm LDNT1H_ZRR : sve_mem_cldnt_ss<0b01, "ldnt1h", Z_h, ZPR16, GPR64NoXZRshifted16>;
+ defm LDNT1W_ZRR : sve_mem_cldnt_ss<0b10, "ldnt1w", Z_s, ZPR32, GPR64NoXZRshifted32>;
+ defm LDNT1D_ZRR : sve_mem_cldnt_ss<0b11, "ldnt1d", Z_d, ZPR64, GPR64NoXZRshifted64>;
+
+ // contiguous store with immediates
+ defm ST1B_IMM : sve_mem_cst_si<0b00, 0b00, "st1b", Z_b, ZPR8>;
+ defm ST1B_H_IMM : sve_mem_cst_si<0b00, 0b01, "st1b", Z_h, ZPR16>;
+ defm ST1B_S_IMM : sve_mem_cst_si<0b00, 0b10, "st1b", Z_s, ZPR32>;
+ defm ST1B_D_IMM : sve_mem_cst_si<0b00, 0b11, "st1b", Z_d, ZPR64>;
+ defm ST1H_IMM : sve_mem_cst_si<0b01, 0b01, "st1h", Z_h, ZPR16>;
+ defm ST1H_S_IMM : sve_mem_cst_si<0b01, 0b10, "st1h", Z_s, ZPR32>;
+ defm ST1H_D_IMM : sve_mem_cst_si<0b01, 0b11, "st1h", Z_d, ZPR64>;
+ defm ST1W_IMM : sve_mem_cst_si<0b10, 0b10, "st1w", Z_s, ZPR32>;
+ defm ST1W_D_IMM : sve_mem_cst_si<0b10, 0b11, "st1w", Z_d, ZPR64>;
+ defm ST1D_IMM : sve_mem_cst_si<0b11, 0b11, "st1d", Z_d, ZPR64>;
+
+ // contiguous store with reg+reg addressing.
+ defm ST1B : sve_mem_cst_ss<0b0000, "st1b", Z_b, ZPR8, GPR64NoXZRshifted8>;
+ defm ST1B_H : sve_mem_cst_ss<0b0001, "st1b", Z_h, ZPR16, GPR64NoXZRshifted8>;
+ defm ST1B_S : sve_mem_cst_ss<0b0010, "st1b", Z_s, ZPR32, GPR64NoXZRshifted8>;
+ defm ST1B_D : sve_mem_cst_ss<0b0011, "st1b", Z_d, ZPR64, GPR64NoXZRshifted8>;
+ defm ST1H : sve_mem_cst_ss<0b0101, "st1h", Z_h, ZPR16, GPR64NoXZRshifted16>;
+ defm ST1H_S : sve_mem_cst_ss<0b0110, "st1h", Z_s, ZPR32, GPR64NoXZRshifted16>;
+ defm ST1H_D : sve_mem_cst_ss<0b0111, "st1h", Z_d, ZPR64, GPR64NoXZRshifted16>;
+ defm ST1W : sve_mem_cst_ss<0b1010, "st1w", Z_s, ZPR32, GPR64NoXZRshifted32>;
+ defm ST1W_D : sve_mem_cst_ss<0b1011, "st1w", Z_d, ZPR64, GPR64NoXZRshifted32>;
+ defm ST1D : sve_mem_cst_ss<0b1111, "st1d", Z_d, ZPR64, GPR64NoXZRshifted64>;
+
+ // Scatters using unscaled 32-bit offsets, e.g.
+ // st1h z0.s, p0, [x0, z0.s, uxtw]
+ // and unpacked:
+ // st1h z0.d, p0, [x0, z0.d, uxtw]
+ defm SST1B_D : sve_mem_sst_sv_32_unscaled<0b000, "st1b", Z_d, ZPR64, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
+ defm SST1B_S : sve_mem_sst_sv_32_unscaled<0b001, "st1b", Z_s, ZPR32, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
+ defm SST1H_D : sve_mem_sst_sv_32_unscaled<0b010, "st1h", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
+ defm SST1H_S : sve_mem_sst_sv_32_unscaled<0b011, "st1h", Z_s, ZPR32, ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
+ defm SST1W_D : sve_mem_sst_sv_32_unscaled<0b100, "st1w", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
+ defm SST1W : sve_mem_sst_sv_32_unscaled<0b101, "st1w", Z_s, ZPR32, ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
+ defm SST1D : sve_mem_sst_sv_32_unscaled<0b110, "st1d", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
+
+ // Scatters using scaled 32-bit offsets, e.g.
+ // st1h z0.s, p0, [x0, z0.s, uxtw #1]
+ // and unpacked:
+ // st1h z0.d, p0, [x0, z0.d, uxtw #1]
+ defm SST1H_D : sve_mem_sst_sv_32_scaled<0b010, "st1h", Z_d, ZPR64, ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
+ defm SST1H_S : sve_mem_sst_sv_32_scaled<0b011, "st1h", Z_s, ZPR32, ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
+ defm SST1W_D : sve_mem_sst_sv_32_scaled<0b100, "st1w", Z_d, ZPR64, ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
+ defm SST1W : sve_mem_sst_sv_32_scaled<0b101, "st1w", Z_s, ZPR32, ZPR32ExtSXTW32, ZPR32ExtUXTW32>;
+ defm SST1D : sve_mem_sst_sv_32_scaled<0b110, "st1d", Z_d, ZPR64, ZPR64ExtSXTW64, ZPR64ExtUXTW64>;
+
+ // Scatters using 32/64-bit pointers with offset, e.g.
+ // st1h z0.s, p0, [z0.s, #16]
+ // st1h z0.d, p0, [z0.d, #16]
+ defm SST1B_D : sve_mem_sst_vi_ptrs<0b000, "st1b", Z_d, ZPR64, imm0_31>;
+ defm SST1B_S : sve_mem_sst_vi_ptrs<0b001, "st1b", Z_s, ZPR32, imm0_31>;
+ defm SST1H_D : sve_mem_sst_vi_ptrs<0b010, "st1h", Z_d, ZPR64, uimm5s2>;
+ defm SST1H_S : sve_mem_sst_vi_ptrs<0b011, "st1h", Z_s, ZPR32, uimm5s2>;
+ defm SST1W_D : sve_mem_sst_vi_ptrs<0b100, "st1w", Z_d, ZPR64, uimm5s4>;
+ defm SST1W : sve_mem_sst_vi_ptrs<0b101, "st1w", Z_s, ZPR32, uimm5s4>;
+ defm SST1D : sve_mem_sst_vi_ptrs<0b110, "st1d", Z_d, ZPR64, uimm5s8>;
+
+ // Scatters using unscaled 64-bit offsets, e.g.
+ // st1h z0.d, p0, [x0, z0.d]
+ defm SST1B_D : sve_mem_sst_sv_64_unscaled<0b00, "st1b">;
+ defm SST1H_D : sve_mem_sst_sv_64_unscaled<0b01, "st1h">;
+ defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w">;
+ defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d">;
+
+ // Scatters using scaled 64-bit offsets, e.g.
+ // st1h z0.d, p0, [x0, z0.d, lsl #1]
+ defm SST1H_D_SCALED : sve_mem_sst_sv_64_scaled<0b01, "st1h", ZPR64ExtLSL16>;
+ defm SST1W_D_SCALED : sve_mem_sst_sv_64_scaled<0b10, "st1w", ZPR64ExtLSL32>;
+ defm SST1D_SCALED : sve_mem_sst_sv_64_scaled<0b11, "st1d", ZPR64ExtLSL64>;
+
+ // ST(2|3|4) structured stores (register + immediate)
+ defm ST2B_IMM : sve_mem_est_si<0b00, 0b01, ZZ_b, "st2b", simm4s2>;
+ defm ST3B_IMM : sve_mem_est_si<0b00, 0b10, ZZZ_b, "st3b", simm4s3>;
+ defm ST4B_IMM : sve_mem_est_si<0b00, 0b11, ZZZZ_b, "st4b", simm4s4>;
+ defm ST2H_IMM : sve_mem_est_si<0b01, 0b01, ZZ_h, "st2h", simm4s2>;
+ defm ST3H_IMM : sve_mem_est_si<0b01, 0b10, ZZZ_h, "st3h", simm4s3>;
+ defm ST4H_IMM : sve_mem_est_si<0b01, 0b11, ZZZZ_h, "st4h", simm4s4>;
+ defm ST2W_IMM : sve_mem_est_si<0b10, 0b01, ZZ_s, "st2w", simm4s2>;
+ defm ST3W_IMM : sve_mem_est_si<0b10, 0b10, ZZZ_s, "st3w", simm4s3>;
+ defm ST4W_IMM : sve_mem_est_si<0b10, 0b11, ZZZZ_s, "st4w", simm4s4>;
+ defm ST2D_IMM : sve_mem_est_si<0b11, 0b01, ZZ_d, "st2d", simm4s2>;
+ defm ST3D_IMM : sve_mem_est_si<0b11, 0b10, ZZZ_d, "st3d", simm4s3>;
+ defm ST4D_IMM : sve_mem_est_si<0b11, 0b11, ZZZZ_d, "st4d", simm4s4>;
+
+ // ST(2|3|4) structured stores (register + register)
+ def ST2B : sve_mem_est_ss<0b00, 0b01, ZZ_b, "st2b", GPR64NoXZRshifted8>;
+ def ST3B : sve_mem_est_ss<0b00, 0b10, ZZZ_b, "st3b", GPR64NoXZRshifted8>;
+ def ST4B : sve_mem_est_ss<0b00, 0b11, ZZZZ_b, "st4b", GPR64NoXZRshifted8>;
+ def ST2H : sve_mem_est_ss<0b01, 0b01, ZZ_h, "st2h", GPR64NoXZRshifted16>;
+ def ST3H : sve_mem_est_ss<0b01, 0b10, ZZZ_h, "st3h", GPR64NoXZRshifted16>;
+ def ST4H : sve_mem_est_ss<0b01, 0b11, ZZZZ_h, "st4h", GPR64NoXZRshifted16>;
+ def ST2W : sve_mem_est_ss<0b10, 0b01, ZZ_s, "st2w", GPR64NoXZRshifted32>;
+ def ST3W : sve_mem_est_ss<0b10, 0b10, ZZZ_s, "st3w", GPR64NoXZRshifted32>;
+ def ST4W : sve_mem_est_ss<0b10, 0b11, ZZZZ_s, "st4w", GPR64NoXZRshifted32>;
+ def ST2D : sve_mem_est_ss<0b11, 0b01, ZZ_d, "st2d", GPR64NoXZRshifted64>;
+ def ST3D : sve_mem_est_ss<0b11, 0b10, ZZZ_d, "st3d", GPR64NoXZRshifted64>;
+ def ST4D : sve_mem_est_ss<0b11, 0b11, ZZZZ_d, "st4d", GPR64NoXZRshifted64>;
+
+ // Non-temporal contiguous stores (register + immediate)
+ defm STNT1B_ZRI : sve_mem_cstnt_si<0b00, "stnt1b", Z_b, ZPR8>;
+ defm STNT1H_ZRI : sve_mem_cstnt_si<0b01, "stnt1h", Z_h, ZPR16>;
+ defm STNT1W_ZRI : sve_mem_cstnt_si<0b10, "stnt1w", Z_s, ZPR32>;
+ defm STNT1D_ZRI : sve_mem_cstnt_si<0b11, "stnt1d", Z_d, ZPR64>;
+
+ // Non-temporal contiguous stores (register + register)
+ defm STNT1B_ZRR : sve_mem_cstnt_ss<0b00, "stnt1b", Z_b, ZPR8, GPR64NoXZRshifted8>;
+ defm STNT1H_ZRR : sve_mem_cstnt_ss<0b01, "stnt1h", Z_h, ZPR16, GPR64NoXZRshifted16>;
+ defm STNT1W_ZRR : sve_mem_cstnt_ss<0b10, "stnt1w", Z_s, ZPR32, GPR64NoXZRshifted32>;
+ defm STNT1D_ZRR : sve_mem_cstnt_ss<0b11, "stnt1d", Z_d, ZPR64, GPR64NoXZRshifted64>;
+
+ // Fill/Spill
+ defm LDR_ZXI : sve_mem_z_fill<"ldr">;
+ defm LDR_PXI : sve_mem_p_fill<"ldr">;
+ defm STR_ZXI : sve_mem_z_spill<"str">;
+ defm STR_PXI : sve_mem_p_spill<"str">;
+
+ // Contiguous prefetch (register + immediate)
+ defm PRFB_PRI : sve_mem_prfm_si<0b00, "prfb">;
+ defm PRFH_PRI : sve_mem_prfm_si<0b01, "prfh">;
+ defm PRFW_PRI : sve_mem_prfm_si<0b10, "prfw">;
+ defm PRFD_PRI : sve_mem_prfm_si<0b11, "prfd">;
+
+ // Contiguous prefetch (register + register)
+ def PRFB_PRR : sve_mem_prfm_ss<0b001, "prfb", GPR64NoXZRshifted8>;
+ def PRFH_PRR : sve_mem_prfm_ss<0b011, "prfh", GPR64NoXZRshifted16>;
+ def PRFS_PRR : sve_mem_prfm_ss<0b101, "prfw", GPR64NoXZRshifted32>;
+ def PRFD_PRR : sve_mem_prfm_ss<0b111, "prfd", GPR64NoXZRshifted64>;
+
+ // Gather prefetch using scaled 32-bit offsets, e.g.
+ // prfh pldl1keep, p0, [x0, z0.s, uxtw #1]
+ defm PRFB_S : sve_mem_32b_prfm_sv_scaled<0b00, "prfb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
+ defm PRFH_S : sve_mem_32b_prfm_sv_scaled<0b01, "prfh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
+ defm PRFW_S : sve_mem_32b_prfm_sv_scaled<0b10, "prfw", ZPR32ExtSXTW32, ZPR32ExtUXTW32>;
+ defm PRFD_S : sve_mem_32b_prfm_sv_scaled<0b11, "prfd", ZPR32ExtSXTW64, ZPR32ExtUXTW64>;
+
+ // Gather prefetch using unpacked, scaled 32-bit offsets, e.g.
+ // prfh pldl1keep, p0, [x0, z0.d, uxtw #1]
+ defm PRFB_D : sve_mem_64b_prfm_sv_ext_scaled<0b00, "prfb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
+ defm PRFH_D : sve_mem_64b_prfm_sv_ext_scaled<0b01, "prfh", ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
+ defm PRFW_D : sve_mem_64b_prfm_sv_ext_scaled<0b10, "prfw", ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
+ defm PRFD_D : sve_mem_64b_prfm_sv_ext_scaled<0b11, "prfd", ZPR64ExtSXTW64, ZPR64ExtUXTW64>;
+
+ // Gather prefetch using scaled 64-bit offsets, e.g.
+ // prfh pldl1keep, p0, [x0, z0.d, lsl #1]
+ defm PRFB_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b00, "prfb", ZPR64ExtLSL8>;
+ defm PRFH_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b01, "prfh", ZPR64ExtLSL16>;
+ defm PRFW_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b10, "prfw", ZPR64ExtLSL32>;
+ defm PRFD_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b11, "prfd", ZPR64ExtLSL64>;
+
+ // Gather prefetch using 32/64-bit pointers with offset, e.g.
+ // prfh pldl1keep, p0, [z0.s, #16]
+ // prfh pldl1keep, p0, [z0.d, #16]
+ defm PRFB_S_PZI : sve_mem_32b_prfm_vi<0b00, "prfb", imm0_31>;
+ defm PRFH_S_PZI : sve_mem_32b_prfm_vi<0b01, "prfh", uimm5s2>;
+ defm PRFW_S_PZI : sve_mem_32b_prfm_vi<0b10, "prfw", uimm5s4>;
+ defm PRFD_S_PZI : sve_mem_32b_prfm_vi<0b11, "prfd", uimm5s8>;
+
+ defm PRFB_D_PZI : sve_mem_64b_prfm_vi<0b00, "prfb", imm0_31>;
+ defm PRFH_D_PZI : sve_mem_64b_prfm_vi<0b01, "prfh", uimm5s2>;
+ defm PRFW_D_PZI : sve_mem_64b_prfm_vi<0b10, "prfw", uimm5s4>;
+ defm PRFD_D_PZI : sve_mem_64b_prfm_vi<0b11, "prfd", uimm5s8>;
+
+ defm ADR_SXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_sxtw<0b00, "adr">;
+ defm ADR_UXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_uxtw<0b01, "adr">;
+ defm ADR_LSL_ZZZ_S : sve_int_bin_cons_misc_0_a_32_lsl<0b10, "adr">;
+ defm ADR_LSL_ZZZ_D : sve_int_bin_cons_misc_0_a_64_lsl<0b11, "adr">;
+
+ defm TBL_ZZZ : sve_int_perm_tbl<"tbl">;
+
+ defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1">;
+ defm ZIP2_ZZZ : sve_int_perm_bin_perm_zz<0b001, "zip2">;
+ defm UZP1_ZZZ : sve_int_perm_bin_perm_zz<0b010, "uzp1">;
+ defm UZP2_ZZZ : sve_int_perm_bin_perm_zz<0b011, "uzp2">;
+ defm TRN1_ZZZ : sve_int_perm_bin_perm_zz<0b100, "trn1">;
+ defm TRN2_ZZZ : sve_int_perm_bin_perm_zz<0b101, "trn2">;
+
+ defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1">;
+ defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2">;
+ defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1">;
+ defm UZP2_PPP : sve_int_perm_bin_perm_pp<0b011, "uzp2">;
+ defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1">;
+ defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2">;
+
+ defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs">;
+ defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi">;
+ defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge">;
+ defm CMPGT_PPzZZ : sve_int_cmp_0<0b101, "cmpgt">;
+ defm CMPEQ_PPzZZ : sve_int_cmp_0<0b110, "cmpeq">;
+ defm CMPNE_PPzZZ : sve_int_cmp_0<0b111, "cmpne">;
+
+ defm CMPEQ_WIDE_PPzZZ : sve_int_cmp_0_wide<0b010, "cmpeq">;
+ defm CMPNE_WIDE_PPzZZ : sve_int_cmp_0_wide<0b011, "cmpne">;
+ defm CMPGE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b000, "cmpge">;
+ defm CMPGT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b001, "cmpgt">;
+ defm CMPLT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b010, "cmplt">;
+ defm CMPLE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b011, "cmple">;
+ defm CMPHS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b100, "cmphs">;
+ defm CMPHI_WIDE_PPzZZ : sve_int_cmp_1_wide<0b101, "cmphi">;
+ defm CMPLO_WIDE_PPzZZ : sve_int_cmp_1_wide<0b110, "cmplo">;
+ defm CMPLS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b111, "cmpls">;
+
+ defm CMPGE_PPzZI : sve_int_scmp_vi<0b000, "cmpge">;
+ defm CMPGT_PPzZI : sve_int_scmp_vi<0b001, "cmpgt">;
+ defm CMPLT_PPzZI : sve_int_scmp_vi<0b010, "cmplt">;
+ defm CMPLE_PPzZI : sve_int_scmp_vi<0b011, "cmple">;
+ defm CMPEQ_PPzZI : sve_int_scmp_vi<0b100, "cmpeq">;
+ defm CMPNE_PPzZI : sve_int_scmp_vi<0b101, "cmpne">;
+ defm CMPHS_PPzZI : sve_int_ucmp_vi<0b00, "cmphs">;
+ defm CMPHI_PPzZI : sve_int_ucmp_vi<0b01, "cmphi">;
+ defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo">;
+ defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls">;
+
+ defm FCMGE_PPzZZ : sve_fp_3op_p_pd<0b000, "fcmge">;
+ defm FCMGT_PPzZZ : sve_fp_3op_p_pd<0b001, "fcmgt">;
+ defm FCMEQ_PPzZZ : sve_fp_3op_p_pd<0b010, "fcmeq">;
+ defm FCMNE_PPzZZ : sve_fp_3op_p_pd<0b011, "fcmne">;
+ defm FCMUO_PPzZZ : sve_fp_3op_p_pd<0b100, "fcmuo">;
+ defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge">;
+ defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt">;
+
+ defm FCMGE_PPzZ0 : sve_fp_2op_p_pd<0b000, "fcmge">;
+ defm FCMGT_PPzZ0 : sve_fp_2op_p_pd<0b001, "fcmgt">;
+ defm FCMLT_PPzZ0 : sve_fp_2op_p_pd<0b010, "fcmlt">;
+ defm FCMLE_PPzZ0 : sve_fp_2op_p_pd<0b011, "fcmle">;
+ defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq">;
+ defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne">;
+
+ defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt">;
+ defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele">;
+ defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo">;
+ defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels">;
+
+ defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt">;
+ defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele">;
+ defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo">;
+ defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels">;
+
+ def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>;
+ def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>;
+ def CTERMEQ_XX : sve_int_cterm<0b1, 0b0, "ctermeq", GPR64>;
+ def CTERMNE_XX : sve_int_cterm<0b1, 0b1, "ctermne", GPR64>;
+
+ def RDVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdvl">;
+ def ADDVL_XXI : sve_int_arith_vl<0b0, "addvl">;
+ def ADDPL_XXI : sve_int_arith_vl<0b1, "addpl">;
+
+ defm CNTB_XPiI : sve_int_count<0b000, "cntb">;
+ defm CNTH_XPiI : sve_int_count<0b010, "cnth">;
+ defm CNTW_XPiI : sve_int_count<0b100, "cntw">;
+ defm CNTD_XPiI : sve_int_count<0b110, "cntd">;
+ defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp">;
+
+ defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb">;
+ defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb">;
+ defm INCH_XPiI : sve_int_pred_pattern_a<0b010, "inch">;
+ defm DECH_XPiI : sve_int_pred_pattern_a<0b011, "dech">;
+ defm INCW_XPiI : sve_int_pred_pattern_a<0b100, "incw">;
+ defm DECW_XPiI : sve_int_pred_pattern_a<0b101, "decw">;
+ defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd">;
+ defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd">;
+
+ defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb">;
+ defm UQINCB_WPiI : sve_int_pred_pattern_b_u32<0b00001, "uqincb">;
+ defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb">;
+ defm UQDECB_WPiI : sve_int_pred_pattern_b_u32<0b00011, "uqdecb">;
+ defm SQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00100, "sqincb">;
+ defm UQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00101, "uqincb">;
+ defm SQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00110, "sqdecb">;
+ defm UQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00111, "uqdecb">;
+
+ defm SQINCH_XPiWdI : sve_int_pred_pattern_b_s32<0b01000, "sqinch">;
+ defm UQINCH_WPiI : sve_int_pred_pattern_b_u32<0b01001, "uqinch">;
+ defm SQDECH_XPiWdI : sve_int_pred_pattern_b_s32<0b01010, "sqdech">;
+ defm UQDECH_WPiI : sve_int_pred_pattern_b_u32<0b01011, "uqdech">;
+ defm SQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01100, "sqinch">;
+ defm UQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01101, "uqinch">;
+ defm SQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01110, "sqdech">;
+ defm UQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01111, "uqdech">;
+
+ defm SQINCW_XPiWdI : sve_int_pred_pattern_b_s32<0b10000, "sqincw">;
+ defm UQINCW_WPiI : sve_int_pred_pattern_b_u32<0b10001, "uqincw">;
+ defm SQDECW_XPiWdI : sve_int_pred_pattern_b_s32<0b10010, "sqdecw">;
+ defm UQDECW_WPiI : sve_int_pred_pattern_b_u32<0b10011, "uqdecw">;
+ defm SQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10100, "sqincw">;
+ defm UQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10101, "uqincw">;
+ defm SQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10110, "sqdecw">;
+ defm UQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10111, "uqdecw">;
+
+ defm SQINCD_XPiWdI : sve_int_pred_pattern_b_s32<0b11000, "sqincd">;
+ defm UQINCD_WPiI : sve_int_pred_pattern_b_u32<0b11001, "uqincd">;
+ defm SQDECD_XPiWdI : sve_int_pred_pattern_b_s32<0b11010, "sqdecd">;
+ defm UQDECD_WPiI : sve_int_pred_pattern_b_u32<0b11011, "uqdecd">;
+ defm SQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11100, "sqincd">;
+ defm UQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11101, "uqincd">;
+ defm SQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11110, "sqdecd">;
+ defm UQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11111, "uqdecd">;
+
+ defm SQINCH_ZPiI : sve_int_countvlv<0b01000, "sqinch", ZPR16>;
+ defm UQINCH_ZPiI : sve_int_countvlv<0b01001, "uqinch", ZPR16>;
+ defm SQDECH_ZPiI : sve_int_countvlv<0b01010, "sqdech", ZPR16>;
+ defm UQDECH_ZPiI : sve_int_countvlv<0b01011, "uqdech", ZPR16>;
+ defm INCH_ZPiI : sve_int_countvlv<0b01100, "inch", ZPR16>;
+ defm DECH_ZPiI : sve_int_countvlv<0b01101, "dech", ZPR16>;
+ defm SQINCW_ZPiI : sve_int_countvlv<0b10000, "sqincw", ZPR32>;
+ defm UQINCW_ZPiI : sve_int_countvlv<0b10001, "uqincw", ZPR32>;
+ defm SQDECW_ZPiI : sve_int_countvlv<0b10010, "sqdecw", ZPR32>;
+ defm UQDECW_ZPiI : sve_int_countvlv<0b10011, "uqdecw", ZPR32>;
+ defm INCW_ZPiI : sve_int_countvlv<0b10100, "incw", ZPR32>;
+ defm DECW_ZPiI : sve_int_countvlv<0b10101, "decw", ZPR32>;
+ defm SQINCD_ZPiI : sve_int_countvlv<0b11000, "sqincd", ZPR64>;
+ defm UQINCD_ZPiI : sve_int_countvlv<0b11001, "uqincd", ZPR64>;
+ defm SQDECD_ZPiI : sve_int_countvlv<0b11010, "sqdecd", ZPR64>;
+ defm UQDECD_ZPiI : sve_int_countvlv<0b11011, "uqdecd", ZPR64>;
+ defm INCD_ZPiI : sve_int_countvlv<0b11100, "incd", ZPR64>;
+ defm DECD_ZPiI : sve_int_countvlv<0b11101, "decd", ZPR64>;
+
+ defm SQINCP_XPWd : sve_int_count_r_s32<0b00000, "sqincp">;
+ defm SQINCP_XP : sve_int_count_r_x64<0b00010, "sqincp">;
+ defm UQINCP_WP : sve_int_count_r_u32<0b00100, "uqincp">;
+ defm UQINCP_XP : sve_int_count_r_x64<0b00110, "uqincp">;
+ defm SQDECP_XPWd : sve_int_count_r_s32<0b01000, "sqdecp">;
+ defm SQDECP_XP : sve_int_count_r_x64<0b01010, "sqdecp">;
+ defm UQDECP_WP : sve_int_count_r_u32<0b01100, "uqdecp">;
+ defm UQDECP_XP : sve_int_count_r_x64<0b01110, "uqdecp">;
+ defm INCP_XP : sve_int_count_r_x64<0b10000, "incp">;
+ defm DECP_XP : sve_int_count_r_x64<0b10100, "decp">;
+
+ defm SQINCP_ZP : sve_int_count_v<0b00000, "sqincp">;
+ defm UQINCP_ZP : sve_int_count_v<0b00100, "uqincp">;
+ defm SQDECP_ZP : sve_int_count_v<0b01000, "sqdecp">;
+ defm UQDECP_ZP : sve_int_count_v<0b01100, "uqdecp">;
+ defm INCP_ZP : sve_int_count_v<0b10000, "incp">;
+ defm DECP_ZP : sve_int_count_v<0b10100, "decp">;
+
+ defm INDEX_RR : sve_int_index_rr<"index">;
+ defm INDEX_IR : sve_int_index_ir<"index">;
+ defm INDEX_RI : sve_int_index_ri<"index">;
+ defm INDEX_II : sve_int_index_ii<"index">;
+
+ // Unpredicated shifts
+ defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr">;
+ defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr">;
+ defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl">;
+
+ defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr">;
+ defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr">;
+ defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl">;
+
+ // Predicated shifts
+ defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b000, "asr">;
+ defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b001, "lsr">;
+ defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b011, "lsl">;
+ defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b100, "asrd">;
+
+ defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr">;
+ defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr">;
+ defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl">;
+ defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr">;
+ defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr">;
+ defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr">;
+
+ defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr">;
+ defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">;
+ defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">;
+
+ def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, ElementSizeS>;
+ def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, ElementSizeS>;
+ def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, ElementSizeH>;
+ def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, ElementSizeS>;
+ def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, ElementSizeS>;
+ def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, ElementSizeH>;
+ def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, ElementSizeH>;
+ def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, ElementSizeS>;
+ def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, ElementSizeH>;
+ def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, ElementSizeS>;
+ def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, ElementSizeD>;
+ def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, ElementSizeD>;
+ def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, ElementSizeD>;
+ def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, ElementSizeD>;
+ def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, ElementSizeD>;
+ def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, ElementSizeD>;
+ def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, ElementSizeS>;
+ def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, ElementSizeD>;
+ def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, ElementSizeS>;
+ def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, ElementSizeD>;
+ def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, ElementSizeD>;
+ def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, ElementSizeD>;
+ def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, ElementSizeD>;
+ def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, ElementSizeD>;
+ def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, ElementSizeD>;
+ def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, ElementSizeD>;
+ def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, ElementSizeD>;
+ def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, ElementSizeS>;
+ def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, ElementSizeD>;
+ def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, ElementSizeS>;
+ def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, ElementSizeD>;
+ def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, ElementSizeD>;
+ def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, ElementSizeD>;
+ def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, ElementSizeD>;
+
+ defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn">;
+ defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp">;
+ defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm">;
+ defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz">;
+ defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta">;
+ defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx">;
+ defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti">;
+ defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx">;
+ defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt">;
+
+ // InstAliases
+ def : InstAlias<"mov $Zd, $Zn",
+ (ORR_ZZZ ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zn), 1>;
+ def : InstAlias<"mov $Pd, $Pg/m, $Pn",
+ (SEL_PPPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pd), 1>;
+ def : InstAlias<"mov $Pd, $Pn",
+ (ORR_PPzPP PPR8:$Pd, PPR8:$Pn, PPR8:$Pn, PPR8:$Pn), 1>;
+ def : InstAlias<"mov $Pd, $Pg/z, $Pn",
+ (AND_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pn), 1>;
+
+ def : InstAlias<"movs $Pd, $Pn",
+ (ORRS_PPzPP PPR8:$Pd, PPR8:$Pn, PPR8:$Pn, PPR8:$Pn), 1>;
+ def : InstAlias<"movs $Pd, $Pg/z, $Pn",
+ (ANDS_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pn), 1>;
+
+ def : InstAlias<"not $Pd, $Pg/z, $Pn",
+ (EOR_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPRAny:$Pg), 1>;
+
+ def : InstAlias<"nots $Pd, $Pg/z, $Pn",
+ (EORS_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPRAny:$Pg), 1>;
+
+ def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn",
+ (CMPGE_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>;
+ def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn",
+ (CMPGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>;
+ def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn",
+ (CMPGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>;
+ def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn",
+ (CMPGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
+
+ def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn",
+ (CMPHI_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>;
+ def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn",
+ (CMPHI_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>;
+ def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn",
+ (CMPHI_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>;
+ def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn",
+ (CMPHI_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
+
+ def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn",
+ (CMPHS_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>;
+ def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn",
+ (CMPHS_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>;
+ def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn",
+ (CMPHS_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>;
+ def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn",
+ (CMPHS_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
+
+ def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn",
+ (CMPGT_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>;
+ def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn",
+ (CMPGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>;
+ def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn",
+ (CMPGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>;
+ def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn",
+ (CMPGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
+
+ def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn",
+ (FACGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>;
+ def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn",
+ (FACGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>;
+ def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn",
+ (FACGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
+
+ def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn",
+ (FACGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>;
+ def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn",
+ (FACGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>;
+ def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn",
+ (FACGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
+
+ def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn",
+ (FCMGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>;
+ def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn",
+ (FCMGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>;
+ def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn",
+ (FCMGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
+
+ def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn",
+ (FCMGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>;
+ def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn",
+ (FCMGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>;
+ def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn",
+ (FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
+}
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA53.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA53.td
new file mode 100644
index 000000000..f253a4f3e
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA53.td
@@ -0,0 +1,295 @@
+//==- AArch64SchedA53.td - Cortex-A53 Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A53 processors.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See MCSchedule.h for details.
+
+// Cortex-A53 machine model for scheduling and other instruction cost heuristics.
+def CortexA53Model : SchedMachineModel {
+ let MicroOpBufferSize = 0; // Explicitly set to zero since A53 is in-order.
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let LoadLatency = 3; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 9; // Based on "Cortex-A53 Software Optimisation
+ // Specification - Instruction Timings"
+ // v 1.0 Spreadsheet
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
+// Cortex-A53 is in-order.
+
+def A53UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
+def A53UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
+def A53UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
+def A53UnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store
+def A53UnitB : ProcResource<1> { let BufferSize = 0; } // Branch
+def A53UnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU
+def A53UnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mult/Div/Sqrt
+
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types which both map the ProcResources and
+// set the latency.
+
+let SchedModel = CortexA53Model in {
+
+// ALU - Despite having a full latency of 4, most of the ALU instructions can
+// forward a cycle earlier and then two cycles earlier in the case of a
+// shift-only instruction. These latencies will be incorrect when the
+// result cannot be forwarded, but modeling isn't rocket surgery.
+def : WriteRes<WriteImm, [A53UnitALU]> { let Latency = 3; }
+def : WriteRes<WriteI, [A53UnitALU]> { let Latency = 3; }
+def : WriteRes<WriteISReg, [A53UnitALU]> { let Latency = 3; }
+def : WriteRes<WriteIEReg, [A53UnitALU]> { let Latency = 3; }
+def : WriteRes<WriteIS, [A53UnitALU]> { let Latency = 2; }
+def : WriteRes<WriteExtr, [A53UnitALU]> { let Latency = 3; }
+
+// MAC
+def : WriteRes<WriteIM32, [A53UnitMAC]> { let Latency = 4; }
+def : WriteRes<WriteIM64, [A53UnitMAC]> { let Latency = 4; }
+
+// Div
+def : WriteRes<WriteID32, [A53UnitDiv]> { let Latency = 4; }
+def : WriteRes<WriteID64, [A53UnitDiv]> { let Latency = 4; }
+
+// Load
+def : WriteRes<WriteLD, [A53UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteLDIdx, [A53UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteLDHi, [A53UnitLdSt]> { let Latency = 4; }
+
+// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd
+// below, choosing the median of 3 which makes the latency 6.
+// May model this more carefully in the future. The remaining
+// A53WriteVLD# types represent the 1-5 cycle issues explicitly.
+def : WriteRes<WriteVLD, [A53UnitLdSt]> { let Latency = 6;
+ let ResourceCycles = [3]; }
+def A53WriteVLD1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; }
+def A53WriteVLD2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5;
+ let ResourceCycles = [2]; }
+def A53WriteVLD3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6;
+ let ResourceCycles = [3]; }
+def A53WriteVLD4 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 7;
+ let ResourceCycles = [4]; }
+def A53WriteVLD5 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 8;
+ let ResourceCycles = [5]; }
+
+// Pre/Post Indexing - Performed as part of address generation which is already
+// accounted for in the WriteST* latencies below
+def : WriteRes<WriteAdr, []> { let Latency = 0; }
+
+// Store
+def : WriteRes<WriteST, [A53UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteSTP, [A53UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteSTIdx, [A53UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteSTX, [A53UnitLdSt]> { let Latency = 4; }
+
+// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
+def : WriteRes<WriteVST, [A53UnitLdSt]> { let Latency = 5;
+ let ResourceCycles = [2];}
+def A53WriteVST1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; }
+def A53WriteVST2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5;
+ let ResourceCycles = [2]; }
+def A53WriteVST3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6;
+ let ResourceCycles = [3]; }
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// Branch
+def : WriteRes<WriteBr, [A53UnitB]>;
+def : WriteRes<WriteBrReg, [A53UnitB]>;
+def : WriteRes<WriteSys, [A53UnitB]>;
+def : WriteRes<WriteBarrier, [A53UnitB]>;
+def : WriteRes<WriteHint, [A53UnitB]>;
+
+// FP ALU
+def : WriteRes<WriteF, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCmp, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCvt, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCopy, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFImm, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteV, [A53UnitFPALU]> { let Latency = 6; }
+
+// FP Mul, Div, Sqrt
+def : WriteRes<WriteFMul, [A53UnitFPMDS]> { let Latency = 6; }
+def : WriteRes<WriteFDiv, [A53UnitFPMDS]> { let Latency = 33;
+ let ResourceCycles = [29]; }
+def A53WriteFMAC : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 10; }
+def A53WriteFDivSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 18;
+ let ResourceCycles = [14]; }
+def A53WriteFDivDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 33;
+ let ResourceCycles = [29]; }
+def A53WriteFSqrtSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 17;
+ let ResourceCycles = [13]; }
+def A53WriteFSqrtDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 32;
+ let ResourceCycles = [28]; }
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedRead types.
+
+// No forwarding for these reads.
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
+// operands are needed one cycle later if and only if they are to be
+// shifted. Otherwise, they too are needed two cycles later. This same
+// ReadAdvance applies to Extended registers as well, even though there is
+// a separate SchedPredicate for them.
+def : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+def A53ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+def A53ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+def A53ReadISReg : SchedReadVariant<[
+ SchedVar<RegShiftedPred, [A53ReadShifted]>,
+ SchedVar<NoSchedPred, [A53ReadNotShifted]>]>;
+def : SchedAlias<ReadISReg, A53ReadISReg>;
+
+def A53ReadIEReg : SchedReadVariant<[
+ SchedVar<RegExtendedPred, [A53ReadShifted]>,
+ SchedVar<NoSchedPred, [A53ReadNotShifted]>]>;
+def : SchedAlias<ReadIEReg, A53ReadIEReg>;
+
+// MAC - Operands are generally needed one cycle later in the MAC pipe.
+// Accumulator operands are needed two cycles later.
+def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+def : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+
+// Div
+def : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific InstRWs.
+
+//---
+// Miscellaneous
+//---
+def : InstRW<[WriteI], (instrs COPY)>;
+
+//---
+// Vector Loads
+//---
+def : InstRW<[A53WriteVLD1], (instregex "LD1i(8|16|32|64)$")>;
+def : InstRW<[A53WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A53WriteVLD1], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[A53WriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[A53WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
+def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A53WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[A53WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[A53WriteVLD3], (instregex "LD3Threev2d$")>;
+def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD3Threev2d_POST$")>;
+
+def : InstRW<[A53WriteVLD2], (instregex "LD4i(8|16|32|64)$")>;
+def : InstRW<[A53WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[A53WriteVLD4], (instregex "LD4Fourv(2d)$")>;
+def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
+
+//---
+// Vector Stores
+//---
+def : InstRW<[A53WriteVST1], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[A53WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A53WriteVST1], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[A53WriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[A53WriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A53WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[A53WriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[A53WriteVST2], (instregex "ST3Threev(2d)$")>;
+def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
+
+def : InstRW<[A53WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[A53WriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[A53WriteVST2], (instregex "ST4Fourv(2d)$")>;
+def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+
+//---
+// Floating Point MAC, DIV, SQRT
+//---
+def : InstRW<[A53WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
+def : InstRW<[A53WriteFMAC], (instregex "^FML(A|S).*")>;
+def : InstRW<[A53WriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[A53WriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[A53WriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[A53WriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[A53WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[A53WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+
+}
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57.td
new file mode 100644
index 000000000..ade03f23f
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57.td
@@ -0,0 +1,668 @@
+//=- AArch64SchedA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for ARM Cortex-A57 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// The Cortex-A57 is a traditional superscalar microprocessor with a
+// conservative 3-wide in-order stage for decode and dispatch. Combined with the
+// much wider out-of-order issue stage, this produced a need to carefully
+// schedule micro-ops so that all three decoded each cycle are successfully
+// issued as the reservation station(s) simply don't stay occupied for long.
+// Therefore, IssueWidth is set to the narrower of the two at three, while still
+// modeling the machine as out-of-order.
+
+def CortexA57Model : SchedMachineModel {
+ let IssueWidth = 3; // 3-way decode and dispatch
+ let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
+ let LoadLatency = 4; // Optimistic load latency
+ let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch
+
+ // Enable partial & runtime unrolling. The magic number is chosen based on
+ // experiments and benchmarking data.
+ let LoopMicroOpBufferSize = 16;
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Cortex-A57.
+// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where
+// micro-ops wait for their operands and then issue out-of-order.
+
+def A57UnitB : ProcResource<1>; // Type B micro-ops
+def A57UnitI : ProcResource<2>; // Type I micro-ops
+def A57UnitM : ProcResource<1>; // Type M micro-ops
+def A57UnitL : ProcResource<1>; // Type L micro-ops
+def A57UnitS : ProcResource<1>; // Type S micro-ops
+def A57UnitX : ProcResource<1>; // Type X micro-ops
+def A57UnitW : ProcResource<1>; // Type W micro-ops
+let SchedModel = CortexA57Model in {
+ def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops
+}
+
+let SchedModel = CortexA57Model in {
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to the Cortex-A57.
+
+include "AArch64SchedA57WriteRes.td"
+
+//===----------------------------------------------------------------------===//
+// Map the target-defined scheduler read/write resources and latency for
+// Cortex-A57. The Cortex-A57 types are directly associated with resources, so
+// defining the aliases precludes the need for mapping them using WriteRes. The
+// aliases are sufficient for creating a coarse, working model. As the model
+// evolves, InstRWs will be used to override some of these SchedAliases.
+//
+// WARNING: Using SchedAliases is convenient and works well for latency and
+// resource lookup for instructions. However, this creates an entry in
+// AArch64WriteLatencyTable with a WriteResourceID of 0, breaking
+// any SchedReadAdvance since the lookup will fail.
+
+def : SchedAlias<WriteImm, A57Write_1cyc_1I>;
+def : SchedAlias<WriteI, A57Write_1cyc_1I>;
+def : SchedAlias<WriteISReg, A57Write_2cyc_1M>;
+def : SchedAlias<WriteIEReg, A57Write_2cyc_1M>;
+def : SchedAlias<WriteExtr, A57Write_1cyc_1I>;
+def : SchedAlias<WriteIS, A57Write_1cyc_1I>;
+def : SchedAlias<WriteID32, A57Write_19cyc_1M>;
+def : SchedAlias<WriteID64, A57Write_35cyc_1M>;
+def : WriteRes<WriteIM32, [A57UnitM]> { let Latency = 3; }
+def : WriteRes<WriteIM64, [A57UnitM]> { let Latency = 5; }
+def : SchedAlias<WriteBr, A57Write_1cyc_1B>;
+def : SchedAlias<WriteBrReg, A57Write_1cyc_1B>;
+def : SchedAlias<WriteLD, A57Write_4cyc_1L>;
+def : SchedAlias<WriteST, A57Write_1cyc_1S>;
+def : SchedAlias<WriteSTP, A57Write_1cyc_1S>;
+def : SchedAlias<WriteAdr, A57Write_1cyc_1I>;
+def : SchedAlias<WriteLDIdx, A57Write_4cyc_1I_1L>;
+def : SchedAlias<WriteSTIdx, A57Write_1cyc_1I_1S>;
+def : SchedAlias<WriteF, A57Write_3cyc_1V>;
+def : SchedAlias<WriteFCmp, A57Write_3cyc_1V>;
+def : SchedAlias<WriteFCvt, A57Write_5cyc_1V>;
+def : SchedAlias<WriteFCopy, A57Write_5cyc_1L>;
+def : SchedAlias<WriteFImm, A57Write_3cyc_1V>;
+def : SchedAlias<WriteFMul, A57Write_5cyc_1V>;
+def : SchedAlias<WriteFDiv, A57Write_17cyc_1W>;
+def : SchedAlias<WriteV, A57Write_3cyc_1V>;
+def : SchedAlias<WriteVLD, A57Write_5cyc_1L>;
+def : SchedAlias<WriteVST, A57Write_1cyc_1S>;
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+def : WriteRes<WriteLDHi, []> { let Latency = 4; }
+
+// Forwarding logic is only modeled for multiply and accumulate
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+
+//===----------------------------------------------------------------------===//
+// Specialize the coarse model by associating instruction groups with the
+// subtarget-defined types. As the modeled is refined, this will override most
+// of the above ShchedAlias mappings.
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+
+// Branch Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_1cyc_1B_1I], (instrs BL)>;
+def : InstRW<[A57Write_2cyc_1B_1I], (instrs BLR)>;
+
+
+// Shifted Register with Shift == 0
+// ----------------------------------------------------------------------------
+
+def A57WriteISReg : SchedWriteVariant<[
+ SchedVar<RegShiftedPred, [WriteISReg]>,
+ SchedVar<NoSchedPred, [WriteI]>]>;
+def : InstRW<[A57WriteISReg], (instregex ".*rs$")>;
+
+
+// Divide and Multiply Instructions
+// -----------------------------------------------------------------------------
+
+// Multiply high
+def : InstRW<[A57Write_6cyc_1M], (instrs SMULHrr, UMULHrr)>;
+
+
+// Miscellaneous Data-Processing Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_1cyc_1I], (instrs EXTRWrri)>;
+def : InstRW<[A57Write_3cyc_1I_1M], (instrs EXTRXrri)>;
+def : InstRW<[A57Write_2cyc_1M], (instregex "BFM")>;
+
+
+// Cryptography Extensions
+// -----------------------------------------------------------------------------
+
+def A57ReadAES : SchedReadAdvance<3, [A57Write_3cyc_1W]>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^AES[DE]")>;
+def : InstRW<[A57Write_3cyc_1W, A57ReadAES], (instregex "^AESI?MC")>;
+def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>;
+def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>;
+def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^CRC32")>;
+
+
+// Vector Load
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1i(8|16|32)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1i(64)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1i(64)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Rv(1d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Rv(1d)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_7cyc_3L], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_7cyc_3L, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_8cyc_4L], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2i(8|16)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD2i(32)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2i(32)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2i(64)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2i(64)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Rv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD2Rv(1d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD2Rv(1d)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s)$")>;
+def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD2Twov(2d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2Twov(2d)_POST$")>;
+
+def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3i(8|16)$")>;
+def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3i(8|16)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3i(32)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3i(32)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD3i(64)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3i(64)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3Rv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD3Rv(1d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3Rv(1d)_POST$")>;
+def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3Rv(16b|8h|4s)$")>;
+def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD3Rv(2d)$")>;
+def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD3Rv(2d)_POST$")>;
+
+def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_10cyc_3L_4V], (instregex "LD3Threev(16b|8h|4s)$")>;
+def : InstRW<[A57Write_10cyc_3L_4V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_8cyc_4L], (instregex "LD3Threev(2d)$")>;
+def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
+
+def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(8|16)$")>;
+def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(8|16)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4i(32)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4i(32)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(64)$")>;
+def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(64)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4Rv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD4Rv(1d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD4Rv(1d)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4Rv(16b|8h|4s)$")>;
+def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_4V], (instregex "LD4Rv(2d)$")>;
+def : InstRW<[A57Write_9cyc_2L_4V, WriteAdr], (instregex "LD4Rv(2d)_POST$")>;
+
+def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_11cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s)$")>;
+def : InstRW<[A57Write_11cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_8cyc_4L], (instregex "LD4Fourv(2d)$")>;
+def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
+
+// Vector Store
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_1cyc_1S], (instregex "ST1i(8|16|32)$")>;
+def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1i(8|16|32)_POST$")>;
+def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST1i(64)$")>;
+def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST1i(64)_POST$")>;
+
+def : InstRW<[A57Write_1cyc_1S], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_3cyc_3S], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_6cyc_6S], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_8cyc_8S], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST2i(8|16|32)$")>;
+def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST2i(8|16|32)_POST$")>;
+def : InstRW<[A57Write_2cyc_2S], (instregex "ST2i(64)$")>;
+def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST2i(64)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST2Twov(16b|8h|4s)$")>;
+def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S], (instregex "ST2Twov(2d)$")>;
+def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST2Twov(2d)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST3i(8|16)$")>;
+def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST3i(8|16)_POST$")>;
+def : InstRW<[A57Write_3cyc_3S], (instregex "ST3i(32)$")>;
+def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST3i(32)_POST$")>;
+def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST3i(64)$")>;
+def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST3i(64)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_3S_2V], (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[A57Write_3cyc_3S_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_6cyc_6S_4V], (instregex "ST3Threev(16b|8h|4s)$")>;
+def : InstRW<[A57Write_6cyc_6S_4V, WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_6cyc_6S], (instregex "ST3Threev(2d)$")>;
+def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST4i(8|16)$")>;
+def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST4i(8|16)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S], (instregex "ST4i(32)$")>;
+def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST4i(32)_POST$")>;
+def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST4i(64)$")>;
+def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST4i(64)_POST$")>;
+
+def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_8cyc_8S_4V], (instregex "ST4Fourv(16b|8h|4s)$")>;
+def : InstRW<[A57Write_8cyc_8S_4V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_8cyc_8S], (instregex "ST4Fourv(2d)$")>;
+def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+
+// Vector - Integer
+// -----------------------------------------------------------------------------
+
+// Reference for forms in this group
+// D form - v8i8, v4i16, v2i32
+// Q form - v16i8, v8i16, v4i32
+// D form - v1i8, v1i16, v1i32, v1i64
+// Q form - v16i8, v8i16, v4i32, v2i64
+// D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64
+// Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64
+
+// ASIMD absolute diff accum, D-form
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
+// ASIMD absolute diff accum, Q-form
+def : InstRW<[A57Write_5cyc_2X], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
+// ASIMD absolute diff accum long
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABAL")>;
+
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
+// ASIMD arith, reduce, 8B/8H
+def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
+// ASIMD arith, reduce, 16B
+def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU]?ADDL?Vv16i8v$")>;
+
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
+// ASIMD max/min, reduce, 16B
+def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
+
+// ASIMD multiply, D-form
+def : InstRW<[A57Write_5cyc_1W], (instregex "^(P?MUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
+// ASIMD multiply, Q-form
+def : InstRW<[A57Write_6cyc_2W], (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+// ASIMD multiply accumulate, D-form
+def : InstRW<[A57Write_5cyc_1W], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
+// ASIMD multiply accumulate, Q-form
+def : InstRW<[A57Write_6cyc_2W], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+// ASIMD multiply accumulate long
+// ASIMD multiply accumulate saturating long
+def A57WriteIVMA : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
+def A57ReadIVMA4 : SchedReadAdvance<4, [A57WriteIVMA]>;
+def : InstRW<[A57WriteIVMA, A57ReadIVMA4], (instregex "^(S|U|SQD)ML[AS]L")>;
+
+// ASIMD multiply long
+def : InstRW<[A57Write_5cyc_1W], (instregex "^(S|U|SQD)MULL")>;
+def : InstRW<[A57Write_5cyc_1W], (instregex "^PMULL(v8i8|v16i8)")>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^PMULL(v1i64|v2i64)")>;
+
+// ASIMD pairwise add and accumulate
+// ASIMD shift accumulate
+def A57WriteIVA : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57ReadIVA3 : SchedReadAdvance<3, [A57WriteIVA]>;
+def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^[SU]ADALP")>;
+def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>;
+
+// ASIMD shift by immed, complex
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?(Q|R){1,2}SHR")>;
+def : InstRW<[A57Write_4cyc_1X], (instregex "^SQSHLU")>;
+
+
+// ASIMD shift by register, basic, Q-form
+def : InstRW<[A57Write_4cyc_2X], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD shift by register, complex, D-form
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU][QR]{1,2}SHL(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
+
+// ASIMD shift by register, complex, Q-form
+def : InstRW<[A57Write_5cyc_2X], (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
+
+
+// Vector - Floating Point
+// -----------------------------------------------------------------------------
+
+// Reference for forms in this group
+// D form - v2f32
+// Q form - v4f32, v2f64
+// D form - 32, 64
+// D form - v1i32, v1i64
+// D form - v2i32
+// Q form - v4i32, v2i64
+
+// ASIMD FP arith, normal, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^(FABD|FADD|FSUB)(v2f32|32|64|v2i32p)")>;
+// ASIMD FP arith, normal, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^(FABD|FADD|FSUB)(v4f32|v2f64|v2i64p)")>;
+
+// ASIMD FP arith, pairwise, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^FADDP(v2f32|32|64|v2i32)")>;
+// ASIMD FP arith, pairwise, Q-form
+def : InstRW<[A57Write_9cyc_3V], (instregex "^FADDP(v4f32|v2f64|v2i64)")>;
+
+// ASIMD FP compare, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^(FACGE|FACGT|FCMEQ|FCMGE|FCMGT|FCMLE|FCMLT)(v2f32|32|64|v1i32|v2i32|v1i64)")>;
+// ASIMD FP compare, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^(FACGE|FACGT|FCMEQ|FCMGE|FCMGT|FCMLE|FCMLT)(v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP convert, long and narrow
+def : InstRW<[A57Write_8cyc_3V], (instregex "^FCVT(L|N|XN)v")>;
+// ASIMD FP convert, other, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
+// ASIMD FP convert, other, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP divide, D-form, F32
+def : InstRW<[A57Write_17cyc_1W], (instregex "FDIVv2f32")>;
+// ASIMD FP divide, Q-form, F32
+def : InstRW<[A57Write_34cyc_2W], (instregex "FDIVv4f32")>;
+// ASIMD FP divide, Q-form, F64
+def : InstRW<[A57Write_64cyc_2W], (instregex "FDIVv2f64")>;
+
+// Note: These were simply duplicated from ASIMD FDIV because of missing documentation
+// ASIMD FP square root, D-form, F32
+def : InstRW<[A57Write_17cyc_1W], (instregex "FSQRTv2f32")>;
+// ASIMD FP square root, Q-form, F32
+def : InstRW<[A57Write_34cyc_2W], (instregex "FSQRTv4f32")>;
+// ASIMD FP square root, Q-form, F64
+def : InstRW<[A57Write_64cyc_2W], (instregex "FSQRTv2f64")>;
+
+// ASIMD FP max/min, normal, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^(FMAX|FMIN)(NM)?(v2f32)")>;
+// ASIMD FP max/min, normal, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^(FMAX|FMIN)(NM)?(v4f32|v2f64)")>;
+// ASIMD FP max/min, pairwise, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^(FMAX|FMIN)(NM)?P(v2f32|v2i32)")>;
+// ASIMD FP max/min, pairwise, Q-form
+def : InstRW<[A57Write_9cyc_3V], (instregex "^(FMAX|FMIN)(NM)?P(v4f32|v2f64|v2i64)")>;
+// ASIMD FP max/min, reduce
+def : InstRW<[A57Write_10cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv")>;
+
+// ASIMD FP multiply, D-form, FZ
+def : InstRW<[A57Write_5cyc_1V], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+// ASIMD FP multiply, Q-form, FZ
+def : InstRW<[A57Write_5cyc_2V], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP multiply accumulate, D-form, FZ
+// ASIMD FP multiply accumulate, Q-form, FZ
+def A57WriteFPVMAD : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+def A57WriteFPVMAQ : SchedWriteRes<[A57UnitV, A57UnitV]> { let Latency = 10; }
+def A57ReadFPVMA5 : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ]>;
+def : InstRW<[A57WriteFPVMAD, A57ReadFPVMA5], (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
+def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA5], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP round, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT[AIMNPXZ](v2f32)")>;
+// ASIMD FP round, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
+
+
+// Vector - Miscellaneous
+// -----------------------------------------------------------------------------
+
+// Reference for forms in this group
+// D form - v8i8, v4i16, v2i32
+// Q form - v16i8, v8i16, v4i32
+// D form - v1i8, v1i16, v1i32, v1i64
+// Q form - v16i8, v8i16, v4i32, v2i64
+
+// ASIMD bitwise insert, Q-form
+def : InstRW<[A57Write_3cyc_2V], (instregex "^(BIF|BIT|BSL)v16i8")>;
+
+// ASIMD duplicate, gen reg, D-form and Q-form
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^CPY")>;
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^DUPv.+gpr")>;
+
+// ASIMD move, saturating
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]QXTU?N")>;
+
+// ASIMD reciprocal estimate, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^[FU](RECP|RSQRT)(E|X)(v2f32|v1i32|v2i32|v1i64)")>;
+// ASIMD reciprocal estimate, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^[FU](RECP|RSQRT)(E|X)(v2f64|v4f32|v4i32)")>;
+
+// ASIMD reciprocal step, D-form, FZ
+def : InstRW<[A57Write_9cyc_1V], (instregex "^F(RECP|RSQRT)S(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+// ASIMD reciprocal step, Q-form, FZ
+def : InstRW<[A57Write_9cyc_2V], (instregex "^F(RECP|RSQRT)S(v2f64|v4f32|v4i32)")>;
+
+// ASIMD table lookup, D-form
+def : InstRW<[A57Write_3cyc_1V], (instregex "^TB[LX]v8i8One")>;
+def : InstRW<[A57Write_6cyc_2V], (instregex "^TB[LX]v8i8Two")>;
+def : InstRW<[A57Write_9cyc_3V], (instregex "^TB[LX]v8i8Three")>;
+def : InstRW<[A57Write_12cyc_4V], (instregex "^TB[LX]v8i8Four")>;
+// ASIMD table lookup, Q-form
+def : InstRW<[A57Write_6cyc_3V], (instregex "^TB[LX]v16i8One")>;
+def : InstRW<[A57Write_9cyc_5V], (instregex "^TB[LX]v16i8Two")>;
+def : InstRW<[A57Write_12cyc_7V], (instregex "^TB[LX]v16i8Three")>;
+def : InstRW<[A57Write_15cyc_9V], (instregex "^TB[LX]v16i8Four")>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[A57Write_6cyc_1I_1L], (instregex "^[SU]MOVv")>;
+
+// ASIMD transfer, gen reg to element
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^INSv")>;
+
+// ASIMD unzip/zip, Q-form
+def : InstRW<[A57Write_6cyc_3V], (instregex "^(UZP|ZIP)(1|2)(v16i8|v8i16|v4i32|v2i64)")>;
+
+
+// Remainder
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_5cyc_1V], (instregex "^F(ADD|SUB)[DS]rr")>;
+
+def A57WriteFPMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+def A57ReadFPMA5 : SchedReadAdvance<5, [A57WriteFPMA]>;
+def A57ReadFPM : SchedReadAdvance<0>;
+def : InstRW<[A57WriteFPMA, A57ReadFPM, A57ReadFPM, A57ReadFPMA5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>;
+
+def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>;
+def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[SU]CVTF")>;
+
+def : InstRW<[A57Write_32cyc_1W], (instrs FDIVDrr)>;
+def : InstRW<[A57Write_17cyc_1W], (instrs FDIVSrr)>;
+
+def : InstRW<[A57Write_5cyc_1V], (instregex "^F(MAX|MIN).+rr")>;
+
+def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT.+r")>;
+
+def : InstRW<[A57Write_32cyc_1W], (instrs FSQRTDr)>;
+def : InstRW<[A57Write_17cyc_1W], (instrs FSQRTSr)>;
+
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDNPDi)>;
+def : InstRW<[A57Write_6cyc_2L, WriteLDHi], (instrs LDNPQi)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDNPSi)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDPDi)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPDpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPDpre)>;
+def : InstRW<[A57Write_6cyc_2L, WriteLDHi], (instrs LDPQi)>;
+def : InstRW<[A57Write_6cyc_2L, WriteLDHi, WriteAdr], (instrs LDPQpost)>;
+def : InstRW<[A57Write_6cyc_2L, WriteLDHi, WriteAdr], (instrs LDPQpre)>;
+def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi], (instrs LDPSWi)>;
+def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi, WriteAdr], (instrs LDPSWpost)>;
+def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi, WriteAdr], (instrs LDPSWpre)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDPSi)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPSpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPSpre)>;
+def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRBpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRBpre)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRBroW)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRBroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRBui)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRDl)>;
+def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRDpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRDpre)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRDroW)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRDroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRDui)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRHHroW)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRHHroX)>;
+def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRHpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRHpre)>;
+def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRHroW)>;
+def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRHroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRHui)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRQl)>;
+def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRQpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRQpre)>;
+def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRQroW)>;
+def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRQroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRQui)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHWroW)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHWroX)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHXroW)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHXroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRSl)>;
+def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRSpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRSpre)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRSroW)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRSroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRSui)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDURBi)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDURDi)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDURHi)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDURQi)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDURSi)>;
+
+def : InstRW<[A57Write_2cyc_2S], (instrs STNPDi)>;
+def : InstRW<[A57Write_4cyc_1I_4S], (instrs STNPQi)>;
+def : InstRW<[A57Write_2cyc_2S], (instrs STNPXi)>;
+def : InstRW<[A57Write_2cyc_2S], (instrs STPDi)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPDpost)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPDpre)>;
+def : InstRW<[A57Write_4cyc_1I_4S], (instrs STPQi)>;
+def : InstRW<[WriteAdr, A57Write_4cyc_1I_4S], (instrs STPQpost)>;
+def : InstRW<[WriteAdr, A57Write_4cyc_2I_4S], (instrs STPQpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPSpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPSpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPWpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPWpre)>;
+def : InstRW<[A57Write_2cyc_2S], (instrs STPXi)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPXpost)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPXpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBBpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBBpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRBpre)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRBroW)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRBroX)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRDpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRDpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHHpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHHpre)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHHroW)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHHroX)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRHpre)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHroW)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHroX)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQpost)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STRQpre)>;
+def : InstRW<[A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQroW)>;
+def : InstRW<[A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQroX)>;
+def : InstRW<[A57Write_2cyc_1I_2S], (instrs STRQui)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRSpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRSpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRWpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRWpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRXpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRXpre)>;
+def : InstRW<[A57Write_2cyc_2S], (instrs STURQi)>;
+
+} // SchedModel = CortexA57Model
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td
new file mode 100644
index 000000000..55005e1d9
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td
@@ -0,0 +1,544 @@
+//=- AArch64SchedA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains all of the Cortex-A57 specific SchedWriteRes types. The approach
+// below is to define a generic SchedWriteRes for every combination of
+// latency and microOps. The naming conventions is to use a prefix, one field
+// for latency, and one or more microOp count/type designators.
+// Prefix: A57Write
+// Latency: #cyc
+// MicroOp Count/Types: #(B|I|M|L|S|X|W|V)
+//
+// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are
+// 11 micro-ops to be issued down one I pipe, six S pipes and four V pipes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Define Generic 1 micro-op types
+
+def A57Write_5cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 5; }
+def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; }
+def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
+def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
+def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
+def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17;
+ let ResourceCycles = [17]; }
+def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19;
+ let ResourceCycles = [19]; }
+def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; }
+def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; }
+def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; }
+def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; }
+def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32;
+ let ResourceCycles = [32]; }
+def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35;
+ let ResourceCycles = [35]; }
+def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
+def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; }
+def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; }
+def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; }
+def A57Write_4cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 4; }
+def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; }
+def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; }
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 2 micro-op types
+
+def A57Write_64cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> {
+ let Latency = 64;
+ let NumMicroOps = 2;
+ let ResourceCycles = [32, 32];
+}
+def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_7cyc_1V_1X : SchedWriteRes<[A57UnitV,
+ A57UnitX]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def A57Write_8cyc_1L_1V : SchedWriteRes<[A57UnitL,
+ A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def A57Write_9cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def A57Write_8cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2L : SchedWriteRes<[A57UnitL, A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL,
+ A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def A57Write_10cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1B_1I : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1I_1S : SchedWriteRes<[A57UnitI,
+ A57UnitS]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_1B_1I : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_2S : SchedWriteRes<[A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_34cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> {
+ let Latency = 34;
+ let NumMicroOps = 2;
+ let ResourceCycles = [17, 17];
+}
+def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI,
+ A57UnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1I_1S : SchedWriteRes<[A57UnitI,
+ A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_4cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def A57Write_4cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 3 micro-op types
+
+def A57Write_10cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+def A57Write_2cyc_1I_2S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_1I_1S_1V : SchedWriteRes<[A57UnitI,
+ A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_1M_2S : SchedWriteRes<[A57UnitM,
+ A57UnitS, A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_3S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_2S_1V : SchedWriteRes<[A57UnitS, A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_5cyc_1I_2L : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+def A57Write_6cyc_1I_2L : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+def A57Write_6cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+def A57Write_7cyc_3L : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def A57Write_8cyc_1I_1L_1V : SchedWriteRes<[A57UnitI,
+ A57UnitL,
+ A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def A57Write_8cyc_1L_2V : SchedWriteRes<[A57UnitL,
+ A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def A57Write_8cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def A57Write_9cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 4 micro-op types
+
+def A57Write_2cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI,
+ A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+def A57Write_3cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI,
+ A57UnitS, A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def A57Write_3cyc_1I_3S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def A57Write_3cyc_1I_2S_1V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def A57Write_4cyc_4S : SchedWriteRes<[A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+def A57Write_7cyc_1I_3L : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL, A57UnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+}
+def A57Write_5cyc_2I_2L : SchedWriteRes<[A57UnitI, A57UnitI,
+ A57UnitL, A57UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+def A57Write_8cyc_1I_1L_2V : SchedWriteRes<[A57UnitI,
+ A57UnitL,
+ A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+def A57Write_8cyc_4L : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitL, A57UnitL]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+def A57Write_9cyc_2L_2V : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+}
+def A57Write_9cyc_1L_3V : SchedWriteRes<[A57UnitL,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+}
+def A57Write_12cyc_4V : SchedWriteRes<[A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 12;
+ let NumMicroOps = 4;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 5 micro-op types
+
+def A57Write_3cyc_3S_2V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 5;
+}
+def A57Write_8cyc_1I_4L : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitL, A57UnitL]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+def A57Write_4cyc_1I_4S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+}
+def A57Write_9cyc_1I_2L_2V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
+def A57Write_9cyc_1I_1L_3V : SchedWriteRes<[A57UnitI,
+ A57UnitL,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
+def A57Write_9cyc_2L_3V : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
+def A57Write_9cyc_5V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 6 micro-op types
+
+def A57Write_3cyc_1I_3S_2V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 6;
+}
+def A57Write_4cyc_2I_4S : SchedWriteRes<[A57UnitI, A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+def A57Write_4cyc_4S_2V : SchedWriteRes<[A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+def A57Write_6cyc_6S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS]> {
+ let Latency = 6;
+ let NumMicroOps = 6;
+}
+def A57Write_9cyc_1I_2L_3V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+def A57Write_9cyc_1I_1L_4V : SchedWriteRes<[A57UnitI,
+ A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+def A57Write_9cyc_2L_4V : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 7 micro-op types
+
+def A57Write_10cyc_3L_4V : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 7;
+}
+def A57Write_4cyc_1I_4S_2V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 7;
+}
+def A57Write_6cyc_1I_6S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS]> {
+ let Latency = 6;
+ let NumMicroOps = 7;
+}
+def A57Write_9cyc_1I_2L_4V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 7;
+}
+def A57Write_12cyc_7V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 12;
+ let NumMicroOps = 7;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 8 micro-op types
+
+def A57Write_10cyc_1I_3L_4V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 8;
+}
+def A57Write_11cyc_4L_4V : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 11;
+ let NumMicroOps = 8;
+}
+def A57Write_8cyc_8S : SchedWriteRes<[A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 8;
+ let NumMicroOps = 8;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 9 micro-op types
+
+def A57Write_8cyc_1I_8S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 8;
+ let NumMicroOps = 9;
+}
+def A57Write_11cyc_1I_4L_4V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 11;
+ let NumMicroOps = 9;
+}
+def A57Write_15cyc_9V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 15;
+ let NumMicroOps = 9;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 10 micro-op types
+
+def A57Write_6cyc_6S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 10;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 11 micro-op types
+
+def A57Write_6cyc_1I_6S_4V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 11;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 12 micro-op types
+
+def A57Write_8cyc_8S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 12;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 13 micro-op types
+
+def A57Write_8cyc_1I_8S_4V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 13;
+}
+
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td
new file mode 100644
index 000000000..7a474ba8e
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td
@@ -0,0 +1,871 @@
+//=- AArch64SchedCyclone.td - Cyclone Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for AArch64 Cyclone to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def CycloneModel : SchedMachineModel {
+ let IssueWidth = 6; // 6 micro-ops are dispatched per cycle.
+ let MicroOpBufferSize = 192; // Based on the reorder buffer.
+ let LoadLatency = 4; // Optimistic load latency.
+ let MispredictPenalty = 16; // 14-19 cycles are typical.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Cyclone.
+
+// 4 integer pipes
+def CyUnitI : ProcResource<4> {
+ let BufferSize = 48;
+}
+
+// 2 branch units: I[0..1]
+def CyUnitB : ProcResource<2> {
+ let Super = CyUnitI;
+ let BufferSize = 24;
+}
+
+// 1 indirect-branch unit: I[0]
+def CyUnitBR : ProcResource<1> {
+ let Super = CyUnitB;
+}
+
+// 2 shifter pipes: I[2..3]
+// When an instruction consumes a CyUnitIS, it also consumes a CyUnitI
+def CyUnitIS : ProcResource<2> {
+ let Super = CyUnitI;
+ let BufferSize = 24;
+}
+
+// 1 mul pipe: I[0]
+def CyUnitIM : ProcResource<1> {
+ let Super = CyUnitBR;
+ let BufferSize = 32;
+}
+
+// 1 div pipe: I[1]
+def CyUnitID : ProcResource<1> {
+ let Super = CyUnitB;
+ let BufferSize = 16;
+}
+
+// 1 integer division unit. This is driven by the ID pipe, but only
+// consumes the pipe for one cycle at issue and another cycle at writeback.
+def CyUnitIntDiv : ProcResource<1>;
+
+// 2 ld/st pipes.
+def CyUnitLS : ProcResource<2> {
+ let BufferSize = 28;
+}
+
+// 3 fp/vector pipes.
+def CyUnitV : ProcResource<3> {
+ let BufferSize = 48;
+}
+// 2 fp/vector arithmetic and multiply pipes: V[0-1]
+def CyUnitVM : ProcResource<2> {
+ let Super = CyUnitV;
+ let BufferSize = 32;
+}
+// 1 fp/vector division/sqrt pipe: V[2]
+def CyUnitVD : ProcResource<1> {
+ let Super = CyUnitV;
+ let BufferSize = 16;
+}
+// 1 fp compare pipe: V[0]
+def CyUnitVC : ProcResource<1> {
+ let Super = CyUnitVM;
+ let BufferSize = 16;
+}
+
+// 2 fp division/square-root units. These are driven by the VD pipe,
+// but only consume the pipe for one cycle at issue and a cycle at writeback.
+def CyUnitFloatDiv : ProcResource<2>;
+
+//===----------------------------------------------------------------------===//
+// Define scheduler read/write resources and latency on Cyclone.
+// This mirrors sections 7.7-7.9 of the Tuning Guide v1.0.1.
+
+let SchedModel = CycloneModel in {
+
+//---
+// 7.8.1. Moves
+//---
+
+// A single nop micro-op (uX).
+def WriteX : SchedWriteRes<[]> { let Latency = 0; }
+
+// Move zero is a register rename (to machine register zero).
+// The move is replaced by a single nop micro-op.
+// MOVZ Rd, #0
+// AND Rd, Rzr, #imm
+def WriteZPred : SchedPredicate<[{TII->isGPRZero(*MI)}]>;
+def WriteImmZ : SchedWriteVariant<[
+ SchedVar<WriteZPred, [WriteX]>,
+ SchedVar<NoSchedPred, [WriteImm]>]>;
+def : InstRW<[WriteImmZ], (instrs MOVZWi,MOVZXi,ANDWri,ANDXri)>;
+
+// Move GPR is a register rename and single nop micro-op.
+// ORR Xd, XZR, Xm
+// ADD Xd, Xn, #0
+def WriteIMovPred : SchedPredicate<[{TII->isGPRCopy(*MI)}]>;
+def WriteVMovPred : SchedPredicate<[{TII->isFPRCopy(*MI)}]>;
+def WriteMov : SchedWriteVariant<[
+ SchedVar<WriteIMovPred, [WriteX]>,
+ SchedVar<WriteVMovPred, [WriteX]>,
+ SchedVar<NoSchedPred, [WriteI]>]>;
+def : InstRW<[WriteMov], (instrs COPY,ORRXrr,ADDXrr)>;
+
+// Move non-zero immediate is an integer ALU op.
+// MOVN,MOVZ,MOVK
+def : WriteRes<WriteImm, [CyUnitI]>;
+
+//---
+// 7.8.2-7.8.5. Arithmetic and Logical, Comparison, Conditional,
+// Shifts and Bitfield Operations
+//---
+
+// ADR,ADRP
+// ADD(S)ri,SUB(S)ri,AND(S)ri,EORri,ORRri
+// ADD(S)rr,SUB(S)rr,AND(S)rr,BIC(S)rr,EONrr,EORrr,ORNrr,ORRrr
+// ADC(S),SBC(S)
+// Aliases: CMN, CMP, TST
+//
+// Conditional operations.
+// CCMNi,CCMPi,CCMNr,CCMPr,
+// CSEL,CSINC,CSINV,CSNEG
+//
+// Bit counting and reversal operations.
+// CLS,CLZ,RBIT,REV,REV16,REV32
+def : WriteRes<WriteI, [CyUnitI]>;
+
+// ADD with shifted register operand is a single micro-op that
+// consumes a shift pipeline for two cycles.
+// ADD(S)rs,SUB(S)rs,AND(S)rs,BIC(S)rs,EONrs,EORrs,ORNrs,ORRrs
+// EXAMPLE: ADDrs Xn, Xm LSL #imm
+def : WriteRes<WriteISReg, [CyUnitIS]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+}
+
+// ADD with extended register operand is the same as shifted reg operand.
+// ADD(S)re,SUB(S)re
+// EXAMPLE: ADDXre Xn, Xm, UXTB #1
+def : WriteRes<WriteIEReg, [CyUnitIS]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+}
+
+// Variable shift and bitfield operations.
+// ASRV,LSLV,LSRV,RORV,BFM,SBFM,UBFM
+def : WriteRes<WriteIS, [CyUnitIS]>;
+
+// EXTR Shifts a pair of registers and requires two micro-ops.
+// The second micro-op is delayed, as modeled by ReadExtrHi.
+// EXTR Xn, Xm, #imm
+def : WriteRes<WriteExtr, [CyUnitIS, CyUnitIS]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+// EXTR's first register read is delayed by one cycle, effectively
+// shortening its writer's latency.
+// EXTR Xn, Xm, #imm
+def : ReadAdvance<ReadExtrHi, 1>;
+
+//---
+// 7.8.6. Multiplies
+//---
+
+// MUL/MNEG are aliases for MADD/MSUB.
+// MADDW,MSUBW,SMADDL,SMSUBL,UMADDL,UMSUBL
+def : WriteRes<WriteIM32, [CyUnitIM]> {
+ let Latency = 4;
+}
+// MADDX,MSUBX,SMULH,UMULH
+def : WriteRes<WriteIM64, [CyUnitIM]> {
+ let Latency = 5;
+}
+
+//---
+// 7.8.7. Divide
+//---
+
+// 32-bit divide takes 7-13 cycles. 10 cycles covers a 20-bit quotient.
+// The ID pipe is consumed for 2 cycles: issue and writeback.
+// SDIVW,UDIVW
+def : WriteRes<WriteID32, [CyUnitID, CyUnitIntDiv]> {
+ let Latency = 10;
+ let ResourceCycles = [2, 10];
+}
+// 64-bit divide takes 7-21 cycles. 13 cycles covers a 32-bit quotient.
+// The ID pipe is consumed for 2 cycles: issue and writeback.
+// SDIVX,UDIVX
+def : WriteRes<WriteID64, [CyUnitID, CyUnitIntDiv]> {
+ let Latency = 13;
+ let ResourceCycles = [2, 13];
+}
+
+//---
+// 7.8.8,7.8.10. Load/Store, single element
+//---
+
+// Integer loads take 4 cycles and use one LS unit for one cycle.
+def : WriteRes<WriteLD, [CyUnitLS]> {
+ let Latency = 4;
+}
+
+// Store-load forwarding is 4 cycles.
+//
+// Note: The store-exclusive sequence incorporates this
+// latency. However, general heuristics should not model the
+// dependence between a store and subsequent may-alias load because
+// hardware speculation works.
+def : WriteRes<WriteST, [CyUnitLS]> {
+ let Latency = 4;
+}
+
+// Load from base address plus an optionally scaled register offset.
+// Rt latency is latency WriteIS + WriteLD.
+// EXAMPLE: LDR Xn, Xm [, lsl 3]
+def CyWriteLDIdx : SchedWriteVariant<[
+ SchedVar<ScaledIdxPred, [WriteIS, WriteLD]>, // Load from scaled register.
+ SchedVar<NoSchedPred, [WriteLD]>]>; // Load from register offset.
+def : SchedAlias<WriteLDIdx, CyWriteLDIdx>; // Map AArch64->Cyclone type.
+
+// EXAMPLE: STR Xn, Xm [, lsl 3]
+def CyWriteSTIdx : SchedWriteVariant<[
+ SchedVar<ScaledIdxPred, [WriteIS, WriteST]>, // Store to scaled register.
+ SchedVar<NoSchedPred, [WriteST]>]>; // Store to register offset.
+def : SchedAlias<WriteSTIdx, CyWriteSTIdx>; // Map AArch64->Cyclone type.
+
+// Read the (unshifted) base register Xn in the second micro-op one cycle later.
+// EXAMPLE: LDR Xn, Xm [, lsl 3]
+def ReadBaseRS : SchedReadAdvance<1>;
+def CyReadAdrBase : SchedReadVariant<[
+ SchedVar<ScaledIdxPred, [ReadBaseRS]>, // Read base reg after shifting offset.
+ SchedVar<NoSchedPred, [ReadDefault]>]>; // Read base reg with no shift.
+def : SchedAlias<ReadAdrBase, CyReadAdrBase>; // Map AArch64->Cyclone type.
+
+//---
+// 7.8.9,7.8.11. Load/Store, paired
+//---
+
+// Address pre/post increment is a simple ALU op with one cycle latency.
+def : WriteRes<WriteAdr, [CyUnitI]>;
+
+// LDP high register write is fused with the load, but a nop micro-op remains.
+def : WriteRes<WriteLDHi, []> {
+ let Latency = 4;
+}
+
+// STP is a vector op and store, except for QQ, which is just two stores.
+def : SchedAlias<WriteSTP, WriteVSTShuffle>;
+def : InstRW<[WriteST, WriteST], (instrs STPQi)>;
+
+//---
+// 7.8.13. Branches
+//---
+
+// Branches take a single micro-op.
+// The misprediction penalty is defined as a SchedMachineModel property.
+def : WriteRes<WriteBr, [CyUnitB]> {let Latency = 0;}
+def : WriteRes<WriteBrReg, [CyUnitBR]> {let Latency = 0;}
+
+//---
+// 7.8.14. Never-issued Instructions, Barrier and Hint Operations
+//---
+
+// NOP,SEV,SEVL,WFE,WFI,YIELD
+def : WriteRes<WriteHint, []> {let Latency = 0;}
+// ISB
+def : InstRW<[WriteI], (instrs ISB)>;
+// SLREX,DMB,DSB
+def : WriteRes<WriteBarrier, [CyUnitLS]>;
+
+// System instructions get an invalid latency because the latency of
+// other operations across them is meaningless.
+def : WriteRes<WriteSys, []> {let Latency = -1;}
+
+//===----------------------------------------------------------------------===//
+// 7.9 Vector Unit Instructions
+
+// Simple vector operations take 2 cycles.
+def : WriteRes<WriteV, [CyUnitV]> {let Latency = 2;}
+
+// Define some longer latency vector op types for Cyclone.
+def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
+def CyWriteV4 : SchedWriteRes<[CyUnitV]> {let Latency = 4;}
+def CyWriteV5 : SchedWriteRes<[CyUnitV]> {let Latency = 5;}
+def CyWriteV6 : SchedWriteRes<[CyUnitV]> {let Latency = 6;}
+
+// Simple floating-point operations take 2 cycles.
+def : WriteRes<WriteF, [CyUnitV]> {let Latency = 2;}
+
+//---
+// 7.9.1 Vector Moves
+//---
+
+// TODO: Add Cyclone-specific zero-cycle zeros. LLVM currently
+// generates expensive int-float conversion instead:
+// FMOVDi Dd, #0.0
+// FMOVv2f64ns Vd.2d, #0.0
+
+// FMOVSi,FMOVDi
+def : WriteRes<WriteFImm, [CyUnitV]> {let Latency = 2;}
+
+// MOVI,MVNI are WriteV
+// FMOVv2f32ns,FMOVv2f64ns,FMOVv4f32ns are WriteV
+
+// Move FPR is a register rename and single nop micro-op.
+// ORR.16b Vd,Vn,Vn
+// COPY is handled above in the WriteMov Variant.
+def WriteVMov : SchedWriteVariant<[
+ SchedVar<WriteVMovPred, [WriteX]>,
+ SchedVar<NoSchedPred, [WriteV]>]>;
+def : InstRW<[WriteVMov], (instrs ORRv16i8)>;
+
+// FMOVSr,FMOVDr are WriteF.
+
+// MOV V,V is a WriteV.
+
+// CPY D,V[x] is a WriteV
+
+// INS V[x],V[y] is a WriteV.
+
+// FMOVWSr,FMOVXDr,FMOVXDHighr
+def : WriteRes<WriteFCopy, [CyUnitLS]> {
+ let Latency = 5;
+}
+
+// FMOVSWr,FMOVDXr
+def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>;
+
+// INS V[x],R
+def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>;
+def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>;
+
+// SMOV,UMOV R,V[x]
+def CyWriteCopyToGPR : WriteSequence<[WriteLD, WriteI]>;
+def : InstRW<[CyWriteCopyToGPR], (instregex "SMOVv","UMOVv")>;
+
+// DUP V,R
+def : InstRW<[CyWriteCopyToFPR], (instregex "DUPv")>;
+
+// DUP V,V[x] is a WriteV.
+
+//---
+// 7.9.2 Integer Arithmetic, Logical, and Comparisons
+//---
+
+// BIC,ORR V,#imm are WriteV
+
+def : InstRW<[CyWriteV3], (instregex "ABSv")>;
+
+// MVN,NEG,NOT are WriteV
+
+def : InstRW<[CyWriteV3], (instregex "SQABSv","SQNEGv")>;
+
+// ADDP is a WriteV.
+def CyWriteVADDLP : SchedWriteRes<[CyUnitV]> {let Latency = 2;}
+def : InstRW<[CyWriteVADDLP], (instregex "SADDLPv","UADDLPv")>;
+
+def : InstRW<[CyWriteV3],
+ (instregex "ADDVv","SMAXVv","UMAXVv","SMINVv","UMINVv")>;
+
+def : InstRW<[CyWriteV3], (instregex "SADDLV","UADDLV")>;
+
+// ADD,SUB are WriteV
+
+// Forward declare.
+def CyWriteVABD : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
+
+// Add/Diff and accumulate uses the vector multiply unit.
+def CyWriteVAccum : SchedWriteRes<[CyUnitVM]> {let Latency = 3;}
+def CyReadVAccum : SchedReadAdvance<1,
+ [CyWriteVAccum, CyWriteVADDLP, CyWriteVABD]>;
+
+def : InstRW<[CyWriteVAccum, CyReadVAccum],
+ (instregex "SADALP","UADALP")>;
+
+def : InstRW<[CyWriteVAccum, CyReadVAccum],
+ (instregex "SABAv","UABAv","SABALv","UABALv")>;
+
+def : InstRW<[CyWriteV3], (instregex "SQADDv","SQSUBv","UQADDv","UQSUBv")>;
+
+def : InstRW<[CyWriteV3], (instregex "SUQADDv","USQADDv")>;
+
+def : InstRW<[CyWriteV4], (instregex "ADDHNv","RADDHNv", "RSUBHNv", "SUBHNv")>;
+
+// WriteV includes:
+// AND,BIC,CMTST,EOR,ORN,ORR
+// ADDP
+// SHADD,SHSUB,SRHADD,UHADD,UHSUB,URHADD
+// SADDL,SSUBL,UADDL,USUBL
+// SADDW,SSUBW,UADDW,USUBW
+
+def : InstRW<[CyWriteV3], (instregex "CMEQv","CMGEv","CMGTv",
+ "CMLEv","CMLTv",
+ "CMHIv","CMHSv")>;
+
+def : InstRW<[CyWriteV3], (instregex "SMAXv","SMINv","UMAXv","UMINv",
+ "SMAXPv","SMINPv","UMAXPv","UMINPv")>;
+
+def : InstRW<[CyWriteVABD], (instregex "SABDv","UABDv",
+ "SABDLv","UABDLv")>;
+
+//---
+// 7.9.3 Floating Point Arithmetic and Comparisons
+//---
+
+// FABS,FNEG are WriteF
+
+def : InstRW<[CyWriteV4], (instrs FADDPv2i32p)>;
+def : InstRW<[CyWriteV5], (instrs FADDPv2i64p)>;
+
+def : InstRW<[CyWriteV3], (instregex "FMAXPv2i","FMAXNMPv2i",
+ "FMINPv2i","FMINNMPv2i")>;
+
+def : InstRW<[CyWriteV4], (instregex "FMAXVv","FMAXNMVv","FMINVv","FMINNMVv")>;
+
+def : InstRW<[CyWriteV4], (instrs FADDSrr,FADDv2f32,FADDv4f32,
+ FSUBSrr,FSUBv2f32,FSUBv4f32,
+ FADDPv2f32,FADDPv4f32,
+ FABD32,FABDv2f32,FABDv4f32)>;
+def : InstRW<[CyWriteV5], (instrs FADDDrr,FADDv2f64,
+ FSUBDrr,FSUBv2f64,
+ FADDPv2f64,
+ FABD64,FABDv2f64)>;
+
+def : InstRW<[CyWriteV3], (instregex "FCMEQ","FCMGT","FCMLE","FCMLT")>;
+
+def : InstRW<[CyWriteV3], (instregex "FACGE","FACGT",
+ "FMAXS","FMAXD","FMAXv",
+ "FMINS","FMIND","FMINv",
+ "FMAXNMS","FMAXNMD","FMAXNMv",
+ "FMINNMS","FMINNMD","FMINNMv",
+ "FMAXPv2f","FMAXPv4f",
+ "FMINPv2f","FMINPv4f",
+ "FMAXNMPv2f","FMAXNMPv4f",
+ "FMINNMPv2f","FMINNMPv4f")>;
+
+// FCMP,FCMPE,FCCMP,FCCMPE
+def : WriteRes<WriteFCmp, [CyUnitVC]> {let Latency = 4;}
+
+// FCSEL is a WriteF.
+
+//---
+// 7.9.4 Shifts and Bitfield Operations
+//---
+
+// SHL is a WriteV
+
+def CyWriteVSHR : SchedWriteRes<[CyUnitV]> {let Latency = 2;}
+def : InstRW<[CyWriteVSHR], (instregex "SSHRv","USHRv")>;
+
+def CyWriteVSRSHR : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
+def : InstRW<[CyWriteVSRSHR], (instregex "SRSHRv","URSHRv")>;
+
+// Shift and accumulate uses the vector multiply unit.
+def CyWriteVShiftAcc : SchedWriteRes<[CyUnitVM]> {let Latency = 3;}
+def CyReadVShiftAcc : SchedReadAdvance<1,
+ [CyWriteVShiftAcc, CyWriteVSHR, CyWriteVSRSHR]>;
+def : InstRW<[CyWriteVShiftAcc, CyReadVShiftAcc],
+ (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>;
+
+// SSHL,USHL are WriteV.
+
+def : InstRW<[CyWriteV3], (instregex "SRSHLv","URSHLv")>;
+
+// SQSHL,SQSHLU,UQSHL are WriteV.
+
+def : InstRW<[CyWriteV3], (instregex "SQRSHLv","UQRSHLv")>;
+
+// WriteV includes:
+// SHLL,SSHLL,USHLL
+// SLI,SRI
+// BIF,BIT,BSL
+// EXT
+// CLS,CLZ,CNT,RBIT,REV16,REV32,REV64,XTN
+// XTN2
+
+def : InstRW<[CyWriteV4],
+ (instregex "RSHRNv","SHRNv",
+ "SQRSHRNv","SQRSHRUNv","SQSHRNv","SQSHRUNv",
+ "UQRSHRNv","UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>;
+
+//---
+// 7.9.5 Multiplication
+//---
+
+def CyWriteVMul : SchedWriteRes<[CyUnitVM]> { let Latency = 4;}
+def : InstRW<[CyWriteVMul], (instregex "MULv","SMULLv","UMULLv",
+ "SQDMULLv","SQDMULHv","SQRDMULHv")>;
+
+// FMUL,FMULX,FNMUL default to WriteFMul.
+def : WriteRes<WriteFMul, [CyUnitVM]> { let Latency = 4;}
+
+def CyWriteV64Mul : SchedWriteRes<[CyUnitVM]> { let Latency = 5;}
+def : InstRW<[CyWriteV64Mul], (instrs FMULDrr,FMULv2f64,FMULv2i64_indexed,
+ FNMULDrr,FMULX64,FMULXv2f64,FMULXv2i64_indexed)>;
+
+def CyReadVMulAcc : SchedReadAdvance<1, [CyWriteVMul, CyWriteV64Mul]>;
+def : InstRW<[CyWriteVMul, CyReadVMulAcc],
+ (instregex "MLA","MLS","SMLAL","SMLSL","UMLAL","UMLSL",
+ "SQDMLAL","SQDMLSL")>;
+
+def CyWriteSMul : SchedWriteRes<[CyUnitVM]> { let Latency = 8;}
+def CyWriteDMul : SchedWriteRes<[CyUnitVM]> { let Latency = 10;}
+def CyReadSMul : SchedReadAdvance<4, [CyWriteSMul]>;
+def CyReadDMul : SchedReadAdvance<5, [CyWriteDMul]>;
+
+def : InstRW<[CyWriteSMul, CyReadSMul],
+ (instrs FMADDSrrr,FMSUBSrrr,FNMADDSrrr,FNMSUBSrrr,
+ FMLAv2f32,FMLAv4f32,
+ FMLAv1i32_indexed,FMLAv1i64_indexed,FMLAv2i32_indexed)>;
+def : InstRW<[CyWriteDMul, CyReadDMul],
+ (instrs FMADDDrrr,FMSUBDrrr,FNMADDDrrr,FNMSUBDrrr,
+ FMLAv2f64,FMLAv2i64_indexed,
+ FMLSv2f64,FMLSv2i64_indexed)>;
+
+def CyWritePMUL : SchedWriteRes<[CyUnitVD]> { let Latency = 3; }
+def : InstRW<[CyWritePMUL], (instregex "PMULv", "PMULLv")>;
+
+//---
+// 7.9.6 Divide and Square Root
+//---
+
+// FDIV,FSQRT
+// TODO: Add 64-bit variant with 19 cycle latency.
+// TODO: Specialize FSQRT for longer latency.
+def : WriteRes<WriteFDiv, [CyUnitVD, CyUnitFloatDiv]> {
+ let Latency = 17;
+ let ResourceCycles = [2, 17];
+}
+
+def : InstRW<[CyWriteV4], (instregex "FRECPEv","FRECPXv","URECPEv","URSQRTEv")>;
+
+def WriteFRSQRTE : SchedWriteRes<[CyUnitVM]> { let Latency = 4; }
+def : InstRW<[WriteFRSQRTE], (instregex "FRSQRTEv")>;
+
+def WriteFRECPS : SchedWriteRes<[CyUnitVM]> { let Latency = 8; }
+def WriteFRSQRTS : SchedWriteRes<[CyUnitVM]> { let Latency = 10; }
+def : InstRW<[WriteFRECPS], (instregex "FRECPSv")>;
+def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>;
+
+//---
+// 7.9.7 Integer-FP Conversions
+//---
+
+// FCVT lengthen f16/s32
+def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
+
+// FCVT,FCVTN,FCVTXN
+// SCVTF,UCVTF V,V
+// FRINT(AIMNPXZ) V,V
+def : WriteRes<WriteFCvt, [CyUnitV]> {let Latency = 4;}
+
+// SCVT/UCVT S/D, Rd = VLD5+V4: 9 cycles.
+def CyWriteCvtToFPR : WriteSequence<[WriteVLD, CyWriteV4]>;
+def : InstRW<[CyWriteCopyToFPR], (instregex "FCVT[AMNPZ][SU][SU][WX][SD]r")>;
+
+// FCVT Rd, S/D = V6+LD4: 10 cycles
+def CyWriteCvtToGPR : WriteSequence<[CyWriteV6, WriteLD]>;
+def : InstRW<[CyWriteCvtToGPR], (instregex "[SU]CVTF[SU][WX][SD]r")>;
+
+// FCVTL is a WriteV
+
+//---
+// 7.9.8-7.9.10 Cryptography, Data Transposition, Table Lookup
+//---
+
+def CyWriteCrypto2 : SchedWriteRes<[CyUnitVD]> {let Latency = 2;}
+def : InstRW<[CyWriteCrypto2], (instrs AESIMCrr, AESMCrr, SHA1Hrr,
+ AESDrr, AESErr, SHA1SU1rr, SHA256SU0rr,
+ SHA1SU0rrr)>;
+
+def CyWriteCrypto3 : SchedWriteRes<[CyUnitVD]> {let Latency = 3;}
+def : InstRW<[CyWriteCrypto3], (instrs SHA256SU1rrr)>;
+
+def CyWriteCrypto6 : SchedWriteRes<[CyUnitVD]> {let Latency = 6;}
+def : InstRW<[CyWriteCrypto6], (instrs SHA1Crrr, SHA1Mrrr, SHA1Prrr,
+ SHA256Hrrr,SHA256H2rrr)>;
+
+// TRN,UZP,ZUP are WriteV.
+
+// TBL,TBX are WriteV.
+
+//---
+// 7.9.11-7.9.14 Load/Store, single element and paired
+//---
+
+// Loading into the vector unit takes 5 cycles vs 4 for integer loads.
+def : WriteRes<WriteVLD, [CyUnitLS]> {
+ let Latency = 5;
+}
+
+// Store-load forwarding is 4 cycles.
+def : WriteRes<WriteVST, [CyUnitLS]> {
+ let Latency = 4;
+}
+
+// WriteVLDPair/VSTPair sequences are expanded by the target description.
+
+//---
+// 7.9.15 Load, element operations
+//---
+
+// Only the first WriteVLD and WriteAdr for writeback matches def operands.
+// Subsequent WriteVLDs consume resources. Since all loaded values have the
+// same latency, this is acceptable.
+
+// Vd is read 5 cycles after issuing the vector load.
+def : ReadAdvance<ReadVLD, 5>;
+
+def : InstRW<[WriteVLD],
+ (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD, WriteAdr],
+ (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
+
+// Register writes from the load's high half are fused micro-ops.
+def : InstRW<[WriteVLD],
+ (instregex "LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVLD, WriteAdr],
+ (instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
+def : InstRW<[WriteVLD, WriteVLD],
+ (instregex "LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD, WriteAdr, WriteVLD],
+ (instregex "LD1Twov(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVLD, WriteVLD],
+ (instregex "LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVLD, WriteAdr, WriteVLD],
+ (instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
+def : InstRW<[WriteVLD, WriteVLD, WriteVLD],
+ (instregex "LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD],
+ (instregex "LD1Threev(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVLD, WriteVLD],
+ (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVLD, WriteAdr, WriteVLD],
+ (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
+def : InstRW<[WriteVLD, WriteVLD, WriteVLD, WriteVLD],
+ (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD, WriteVLD],
+ (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, ReadVLD],
+ (instregex "LD1i(8|16|32)$")>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr],
+ (instregex "LD1i(8|16|32)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, ReadVLD], (instrs LD1i64)>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr],(instrs LD1i64_POST)>;
+
+def : InstRW<[WriteVLDShuffle],
+ (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr],
+ (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[WriteVLDShuffle, WriteV],
+ (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
+ (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle],
+ (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle],
+ (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
+ (instregex "LD2i(8|16|32)$")>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
+ (instregex "LD2i(8|16|32)_POST")>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
+ (instregex "LD2i64$")>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
+ (instregex "LD2i64_POST")>;
+
+def : InstRW<[WriteVLDShuffle, WriteV],
+ (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
+ (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
+ (instregex "LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
+ (instregex "LD3Threev(8b|4h|2s)_POST")>;
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle],
+ (instregex "LD3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle],
+ (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV],
+ (instregex "LD3i(8|16|32)$")>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV],
+ (instregex "LD3i(8|16|32)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV],
+ (instregex "LD3i64$")>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
+ (instregex "LD3i64_POST")>;
+
+def : InstRW<[WriteVLDShuffle, WriteV, WriteV],
+ (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV],
+ (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
+ (instrs LD3Rv1d,LD3Rv2d)>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
+ (instrs LD3Rv1d_POST,LD3Rv2d_POST)>;
+
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
+ (instregex "LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
+ (instregex "LD4Fourv(8b|4h|2s)_POST")>;
+def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle,
+ WriteVLDPairShuffle, WriteVLDPairShuffle],
+ (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle,
+ WriteVLDPairShuffle, WriteVLDPairShuffle],
+ (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV],
+ (instregex "LD4i(8|16|32)$")>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV],
+ (instregex "LD4i(8|16|32)_POST")>;
+
+
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV],
+ (instrs LD4i64)>;
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
+ (instrs LD4i64_POST)>;
+
+def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV],
+ (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV],
+ (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>;
+
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
+ (instrs LD4Rv1d,LD4Rv2d)>;
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
+ (instrs LD4Rv1d_POST,LD4Rv2d_POST)>;
+
+//---
+// 7.9.16 Store, element operations
+//---
+
+// Only the WriteAdr for writeback matches a def operands.
+// Subsequent WriteVLDs only consume resources.
+
+def : InstRW<[WriteVST],
+ (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, WriteVST],
+ (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVSTShuffle],
+ (instregex "ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle],
+ (instregex "ST1Twov(8b|4h|2s|1d)_POST")>;
+def : InstRW<[WriteVST, WriteVST],
+ (instregex "ST1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, WriteVST, WriteVST],
+ (instregex "ST1Twov(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVSTShuffle, WriteVST],
+ (instregex "ST1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVST],
+ (instregex "ST1Threev(8b|4h|2s|1d)_POST")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST],
+ (instregex "ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST],
+ (instregex "ST1Threev(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],
+ (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],
+ (instregex "ST1Fourv(8b|4h|2s|1d)_POST")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST],
+ (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST, WriteVST],
+ (instregex "ST1Fourv(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVSTShuffle], (instregex "ST1i(8|16|32)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST1i(8|16|32)_POST")>;
+
+def : InstRW<[WriteVSTShuffle], (instrs ST1i64)>;
+def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST1i64_POST)>;
+
+def : InstRW<[WriteVSTShuffle],
+ (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle],
+ (instregex "ST2Twov(8b|4h|2s)_POST")>;
+def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],
+ (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],
+ (instregex "ST2Twov(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVSTShuffle], (instregex "ST2i(8|16|32)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST2i(8|16|32)_POST")>;
+def : InstRW<[WriteVSTShuffle], (instrs ST2i64)>;
+def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST2i64_POST)>;
+
+def : InstRW<[WriteVSTShuffle, WriteVSTShuffle],
+ (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],
+ (instregex "ST3Threev(8b|4h|2s)_POST")>;
+def : InstRW<[WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle],
+ (instregex "ST3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle],
+ (instregex "ST3Threev(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVSTShuffle], (instregex "ST3i(8|16|32)$")>;
+def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST3i(8|16|32)_POST")>;
+
+def :InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64)>;
+def :InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64_POST)>;
+
+def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle],
+ (instregex "ST4Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle],
+ (instregex "ST4Fourv(8b|4h|2s|1d)_POST")>;
+def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle,
+ WriteVSTPairShuffle, WriteVSTPairShuffle],
+ (instregex "ST4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle,
+ WriteVSTPairShuffle, WriteVSTPairShuffle],
+ (instregex "ST4Fourv(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[WriteVSTPairShuffle], (instregex "ST4i(8|16|32)$")>;
+def : InstRW<[WriteAdr, WriteVSTPairShuffle], (instregex "ST4i(8|16|32)_POST")>;
+
+def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST4i64)>;
+def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>;
+
+// Atomic operations are not supported.
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+//---
+// Unused SchedRead types
+//---
+
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+
+} // SchedModel = CycloneModel
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM1.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM1.td
new file mode 100644
index 000000000..ecc68aed1
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM1.td
@@ -0,0 +1,847 @@
+//=- AArch64SchedExynosM1.td - Samsung Exynos M1 Sched Defs --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for the Samsung Exynos M1 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// The Exynos-M1 is a traditional superscalar microprocessor with a
+// 4-wide in-order stage for decode and dispatch and a wider issue stage.
+// The execution units and loads and stores are out-of-order.
+
+def ExynosM1Model : SchedMachineModel {
+ let IssueWidth = 4; // Up to 4 uops per cycle.
+ let MicroOpBufferSize = 96; // ROB size.
+ let LoopMicroOpBufferSize = 24; // Based on the instruction queue size.
+ let LoadLatency = 4; // Optimistic load cases.
+ let MispredictPenalty = 14; // Minimum branch misprediction penalty.
+ let CompleteModel = 1; // Use the default model otherwise.
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on the Exynos-M1,
+// which has 9 pipelines, each with its own queue with out-of-order dispatch.
+
+let SchedModel = ExynosM1Model in {
+
+def M1UnitA : ProcResource<2>; // Simple integer
+def M1UnitC : ProcResource<1>; // Simple and complex integer
+def M1UnitD : ProcResource<1>; // Integer division (inside C, serialized)
+def M1UnitB : ProcResource<2>; // Branch
+def M1UnitL : ProcResource<1>; // Load
+def M1UnitS : ProcResource<1>; // Store
+def M1PipeF0 : ProcResource<1>; // FP #0
+let Super = M1PipeF0 in {
+ def M1UnitFMAC : ProcResource<1>; // FP multiplication
+ def M1UnitNAL0 : ProcResource<1>; // Simple vector
+ def M1UnitNMISC : ProcResource<1>; // Miscellanea
+ def M1UnitFCVT : ProcResource<1>; // FP conversion
+ def M1UnitNCRYPT : ProcResource<1>; // Cryptographic
+}
+def M1PipeF1 : ProcResource<1>; // FP #1
+let Super = M1PipeF1 in {
+ def M1UnitFADD : ProcResource<1>; // Simple FP
+ def M1UnitNAL1 : ProcResource<1>; // Simple vector
+ def M1UnitFVAR : ProcResource<1>; // FP division & square root (serialized)
+ def M1UnitFST : ProcResource<1>; // FP store
+}
+
+def M1UnitALU : ProcResGroup<[M1UnitA,
+ M1UnitC]>; // All integer
+def M1UnitNALU : ProcResGroup<[M1UnitNAL0,
+ M1UnitNAL1]>; // All simple vector
+
+//===----------------------------------------------------------------------===//
+// Predicates.
+
+def M1BranchLinkFastPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
+ MI->getOperand(0).getReg() != AArch64::LR}]>;
+def M1ShiftLeftFastPred : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>;
+
+//===----------------------------------------------------------------------===//
+// Coarse scheduling model.
+
+def M1WriteA1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; }
+def M1WriteA2 : SchedWriteRes<[M1UnitALU]> { let Latency = 2; }
+def M1WriteAA : SchedWriteRes<[M1UnitALU]> { let Latency = 2;
+ let ResourceCycles = [2]; }
+def M1WriteAB : SchedWriteRes<[M1UnitALU,
+ M1UnitC]> { let Latency = 1;
+ let NumMicroOps = 2; }
+def M1WriteAC : SchedWriteRes<[M1UnitALU,
+ M1UnitALU,
+ M1UnitC]> { let Latency = 2;
+ let NumMicroOps = 3; }
+def M1WriteAD : SchedWriteRes<[M1UnitALU,
+ M1UnitC]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M1WriteAX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteA1]>,
+ SchedVar<NoSchedPred, [M1WriteAA]>]>;
+def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; }
+def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; }
+
+def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
+def M1WriteBX : SchedWriteVariant<[SchedVar<M1BranchLinkFastPred, [M1WriteAB]>,
+ SchedVar<NoSchedPred, [M1WriteAC]>]>;
+
+def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; }
+def M1WriteL6 : SchedWriteRes<[M1UnitL]> { let Latency = 6; }
+def M1WriteLA : SchedWriteRes<[M1UnitL]> { let Latency = 6;
+ let ResourceCycles = [2]; }
+def M1WriteLB : SchedWriteRes<[M1UnitL,
+ M1UnitA]> { let Latency = 4;
+ let NumMicroOps = 2; }
+def M1WriteLC : SchedWriteRes<[M1UnitL,
+ M1UnitA]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M1WriteLD : SchedWriteRes<[M1UnitL,
+ M1UnitA]> { let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2, 1]; }
+def M1WriteLH : SchedWriteRes<[]> { let Latency = 5;
+ let NumMicroOps = 0; }
+def M1WriteLX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>,
+ SchedVar<NoSchedPred, [M1WriteLC]>]>;
+def M1WriteLY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>,
+ SchedVar<NoSchedPred, [M1WriteLD]>]>;
+
+def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; }
+def M1WriteS3 : SchedWriteRes<[M1UnitS]> { let Latency = 3; }
+def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; }
+def M1WriteSA : SchedWriteRes<[M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST]> { let Latency = 1;
+ let NumMicroOps = 2; }
+def M1WriteSB : SchedWriteRes<[M1UnitS,
+ M1UnitFST,
+ M1UnitA]> { let Latency = 3;
+ let NumMicroOps = 2; }
+def M1WriteSC : SchedWriteRes<[M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitA]> { let Latency = 3;
+ let NumMicroOps = 3; }
+def M1WriteSD : SchedWriteRes<[M1UnitS,
+ M1UnitFST,
+ M1UnitA]> { let Latency = 1;
+ let NumMicroOps = 2; }
+def M1WriteSE : SchedWriteRes<[M1UnitS,
+ M1UnitA]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M1WriteSX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>,
+ SchedVar<NoSchedPred, [M1WriteSE]>]>;
+def M1WriteSY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>,
+ SchedVar<NoSchedPred, [M1WriteSB]>]>;
+
+def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
+ SchedVar<NoSchedPred, [ReadDefault]>]>;
+
+// Branch instructions.
+def : WriteRes<WriteBr, []> { let Latency = 0; }
+def : WriteRes<WriteBrReg, [M1UnitC]> { let Latency = 1; }
+
+// Arithmetic and logical integer instructions.
+def : WriteRes<WriteI, [M1UnitALU]> { let Latency = 1; }
+def : WriteRes<WriteISReg, [M1UnitALU]> { let Latency = 1; }
+def : WriteRes<WriteIEReg, [M1UnitALU]> { let Latency = 1; }
+def : WriteRes<WriteIS, [M1UnitALU]> { let Latency = 1; }
+
+// Move instructions.
+def : WriteRes<WriteImm, [M1UnitALU]> { let Latency = 1; }
+
+// Divide and multiply instructions.
+def : WriteRes<WriteID32, [M1UnitC,
+ M1UnitD]> { let Latency = 13;
+ let ResourceCycles = [1, 13]; }
+def : WriteRes<WriteID64, [M1UnitC,
+ M1UnitD]> { let Latency = 21;
+ let ResourceCycles = [1, 21]; }
+// TODO: Long multiplication take 5 cycles and also the ALU.
+def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; }
+def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4;
+ let ResourceCycles = [2]; }
+
+// Miscellaneous instructions.
+def : WriteRes<WriteExtr, [M1UnitALU,
+ M1UnitALU]> { let Latency = 2;
+ let NumMicroOps = 2; }
+
+// Addressing modes.
+def : WriteRes<WriteAdr, []> { let Latency = 1;
+ let NumMicroOps = 0; }
+def : SchedAlias<ReadAdrBase, M1ReadAdrBase>;
+
+// Load instructions.
+def : WriteRes<WriteLD, [M1UnitL]> { let Latency = 4; }
+def : WriteRes<WriteLDHi, []> { let Latency = 4;
+ let NumMicroOps = 0; }
+def : SchedAlias<WriteLDIdx, M1WriteLX>;
+
+// Store instructions.
+def : WriteRes<WriteST, [M1UnitS]> { let Latency = 1; }
+def : WriteRes<WriteSTP, [M1UnitS]> { let Latency = 1; }
+def : WriteRes<WriteSTX, [M1UnitS]> { let Latency = 1; }
+def : SchedAlias<WriteSTIdx, M1WriteSX>;
+
+// FP data instructions.
+def : WriteRes<WriteF, [M1UnitFADD]> { let Latency = 3; }
+def : WriteRes<WriteFCmp, [M1UnitNMISC]> { let Latency = 4; }
+def : WriteRes<WriteFDiv, [M1UnitFVAR]> { let Latency = 15;
+ let ResourceCycles = [15]; }
+def : WriteRes<WriteFMul, [M1UnitFMAC]> { let Latency = 4; }
+
+// FP miscellaneous instructions.
+def : WriteRes<WriteFCvt, [M1UnitFCVT]> { let Latency = 3; }
+def : WriteRes<WriteFImm, [M1UnitNALU]> { let Latency = 1; }
+def : WriteRes<WriteFCopy, [M1UnitS]> { let Latency = 4; }
+
+// FP load instructions.
+def : WriteRes<WriteVLD, [M1UnitL]> { let Latency = 5; }
+
+// FP store instructions.
+def : WriteRes<WriteVST, [M1UnitS,
+ M1UnitFST]> { let Latency = 1;
+ let NumMicroOps = 1; }
+
+// ASIMD FP instructions.
+def : WriteRes<WriteV, [M1UnitFADD]> { let Latency = 3; }
+
+// Other miscellaneous instructions.
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+
+//===----------------------------------------------------------------------===//
+// Fast forwarding.
+
+// TODO: Add FP register forwarding rules.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+// TODO: The forwarding for WriteIM32 saves actually 2 cycles.
+def : ReadAdvance<ReadIMA, 3, [WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+//===----------------------------------------------------------------------===//
+// Finer scheduling model.
+
+def M1WriteNEONA : SchedWriteRes<[M1UnitNALU,
+ M1UnitNALU,
+ M1UnitFADD]> { let Latency = 9;
+ let NumMicroOps = 3; }
+def M1WriteNEONB : SchedWriteRes<[M1UnitNALU,
+ M1UnitFST]> { let Latency = 5;
+ let NumMicroOps = 2;}
+def M1WriteNEONC : SchedWriteRes<[M1UnitNALU,
+ M1UnitFST]> { let Latency = 6;
+ let NumMicroOps = 2; }
+def M1WriteNEOND : SchedWriteRes<[M1UnitNALU,
+ M1UnitFST,
+ M1UnitL]> { let Latency = 10;
+ let NumMicroOps = 3; }
+def M1WriteNEONE : SchedWriteRes<[M1UnitFCVT,
+ M1UnitFST]> { let Latency = 8;
+ let NumMicroOps = 2; }
+def M1WriteNEONF : SchedWriteRes<[M1UnitFCVT,
+ M1UnitFST,
+ M1UnitL]> { let Latency = 13;
+ let NumMicroOps = 3; }
+def M1WriteNEONG : SchedWriteRes<[M1UnitNMISC,
+ M1UnitFST]> { let Latency = 6;
+ let NumMicroOps = 2; }
+def M1WriteNEONH : SchedWriteRes<[M1UnitNALU,
+ M1UnitFST]> { let Latency = 3;
+ let NumMicroOps = 2; }
+def M1WriteNEONI : SchedWriteRes<[M1UnitFST,
+ M1UnitL]> { let Latency = 9;
+ let NumMicroOps = 2; }
+def M1WriteNEONJ : SchedWriteRes<[M1UnitNMISC,
+ M1UnitFMAC]> { let Latency = 6;
+ let NumMicroOps = 2; }
+def M1WriteNEONK : SchedWriteRes<[M1UnitNMISC,
+ M1UnitFMAC]> { let Latency = 7;
+ let NumMicroOps = 2; }
+def M1WriteNEONL : SchedWriteRes<[M1UnitNALU]> { let Latency = 2;
+ let ResourceCycles = [2]; }
+def M1WriteFADD3 : SchedWriteRes<[M1UnitFADD]> { let Latency = 3; }
+def M1WriteFCVT3 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 3; }
+def M1WriteFCVT4 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 4; }
+def M1WriteFMAC4 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 4; }
+def M1WriteFMAC5 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 5; }
+// TODO
+def M1WriteFVAR15 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 15;
+ let ResourceCycles = [15]; }
+def M1WriteFVAR23 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 23;
+ let ResourceCycles = [23]; }
+def M1WriteNALU1 : SchedWriteRes<[M1UnitNALU]> { let Latency = 1; }
+def M1WriteNALU2 : SchedWriteRes<[M1UnitNALU]> { let Latency = 2; }
+def M1WriteNAL11 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 1; }
+def M1WriteNAL12 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 2; }
+def M1WriteNAL13 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 3; }
+def M1WriteNCRYPT1 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
+def M1WriteNCRYPT5 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 5; }
+def M1WriteNMISC1 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 1; }
+def M1WriteNMISC2 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 2; }
+def M1WriteNMISC3 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 3; }
+def M1WriteNMISC4 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 4; }
+def M1WriteTB : SchedWriteRes<[M1UnitC,
+ M1UnitALU]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M1WriteVLDA : SchedWriteRes<[M1UnitL,
+ M1UnitL]> { let Latency = 6;
+ let NumMicroOps = 2; }
+def M1WriteVLDB : SchedWriteRes<[M1UnitL,
+ M1UnitL,
+ M1UnitL]> { let Latency = 7;
+ let NumMicroOps = 3; }
+def M1WriteVLDC : SchedWriteRes<[M1UnitL,
+ M1UnitL,
+ M1UnitL,
+ M1UnitL]> { let Latency = 8;
+ let NumMicroOps = 4; }
+def M1WriteVLDD : SchedWriteRes<[M1UnitL,
+ M1UnitNALU]> { let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2, 1]; }
+def M1WriteVLDE : SchedWriteRes<[M1UnitL,
+ M1UnitNALU]> { let Latency = 6;
+ let NumMicroOps = 2; }
+def M1WriteVLDF : SchedWriteRes<[M1UnitL,
+ M1UnitL]> { let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1]; }
+def M1WriteVLDG : SchedWriteRes<[M1UnitL,
+ M1UnitNALU,
+ M1UnitNALU]> { let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1, 1]; }
+def M1WriteVLDH : SchedWriteRes<[M1UnitL,
+ M1UnitNALU,
+ M1UnitNALU]> { let Latency = 6;
+ let NumMicroOps = 3; }
+def M1WriteVLDI : SchedWriteRes<[M1UnitL,
+ M1UnitL,
+ M1UnitL]> { let Latency = 12;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 2, 2]; }
+def M1WriteVLDJ : SchedWriteRes<[M1UnitL,
+ M1UnitNALU,
+ M1UnitNALU,
+ M1UnitNALU]> { let Latency = 9;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1, 1]; }
+def M1WriteVLDK : SchedWriteRes<[M1UnitL,
+ M1UnitNALU,
+ M1UnitNALU,
+ M1UnitNALU,
+ M1UnitNALU]> { let Latency = 9;
+ let NumMicroOps = 5;
+ let ResourceCycles = [2, 1, 1, 1, 1]; }
+def M1WriteVLDL : SchedWriteRes<[M1UnitL,
+ M1UnitNALU,
+ M1UnitNALU,
+ M1UnitL,
+ M1UnitNALU]> { let Latency = 7;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1, 1, 1, 1, 1]; }
+def M1WriteVLDM : SchedWriteRes<[M1UnitL,
+ M1UnitNALU,
+ M1UnitNALU,
+ M1UnitL,
+ M1UnitNALU,
+ M1UnitNALU]> { let Latency = 7;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1, 1, 1, 1, 1, 1]; }
+def M1WriteVLDN : SchedWriteRes<[M1UnitL,
+ M1UnitL,
+ M1UnitL,
+ M1UnitL]> { let Latency = 14;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 2, 1]; }
+def M1WriteVSTA : WriteSequence<[WriteVST], 2>;
+def M1WriteVSTB : WriteSequence<[WriteVST], 3>;
+def M1WriteVSTC : WriteSequence<[WriteVST], 4>;
+def M1WriteVSTD : SchedWriteRes<[M1UnitS,
+ M1UnitFST,
+ M1UnitFST]> { let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [7, 1, 1]; }
+def M1WriteVSTE : SchedWriteRes<[M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitFST]> { let Latency = 8;
+ let NumMicroOps = 3;
+ let ResourceCycles = [7, 1, 1, 1, 1]; }
+def M1WriteVSTF : SchedWriteRes<[M1UnitNALU,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitFST,
+ M1UnitFST]> { let Latency = 15;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1, 7, 1, 7, 1, 1, 1]; }
+def M1WriteVSTG : SchedWriteRes<[M1UnitNALU,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitFST,
+ M1UnitFST]> { let Latency = 16;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1]; }
+def M1WriteVSTH : SchedWriteRes<[M1UnitNALU,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitFST,
+ M1UnitFST]> { let Latency = 14;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 7, 1, 7, 1]; }
+def M1WriteVSTI : SchedWriteRes<[M1UnitNALU,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitS,
+ M1UnitFST,
+ M1UnitFST,
+ M1UnitFST]> { let Latency = 17;
+ let NumMicroOps = 7;
+ let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; }
+
+// Branch instructions
+def : InstRW<[M1WriteB1], (instrs Bcc)>;
+def : InstRW<[M1WriteA1], (instrs BL)>;
+def : InstRW<[M1WriteBX], (instrs BLR)>;
+def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>;
+def : InstRW<[M1WriteAD], (instregex "^TBN?Z[WX]")>;
+
+// Arithmetic and logical integer instructions.
+def : InstRW<[M1WriteA1], (instrs COPY)>;
+def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>;
+
+// Divide and multiply instructions.
+
+// Miscellaneous instructions.
+
+// Load instructions.
+def : InstRW<[M1WriteLB,
+ WriteLDHi,
+ WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
+def : InstRW<[M1WriteLX,
+ ReadAdrBase], (instregex "^PRFMro[WX]")>;
+
+// Store instructions.
+
+// FP data instructions.
+def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)[DS]r")>;
+def : InstRW<[M1WriteFADD3], (instregex "^F(ADD|SUB)[DS]rr")>;
+def : InstRW<[M1WriteNEONG], (instregex "^FCCMPE?[DS]rr")>;
+def : InstRW<[M1WriteNMISC4], (instregex "^FCMPE?[DS]r")>;
+def : InstRW<[M1WriteFVAR15], (instrs FDIVSrr)>;
+def : InstRW<[M1WriteFVAR23], (instrs FDIVDrr)>;
+def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN).+rr")>;
+def : InstRW<[M1WriteFMAC4], (instregex "^FN?MUL[DS]rr")>;
+def : InstRW<[M1WriteFMAC5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>;
+def : InstRW<[M1WriteFCVT3], (instregex "^FRINT.+r")>;
+def : InstRW<[M1WriteNEONH], (instregex "^FCSEL[DS]rrr")>;
+def : InstRW<[M1WriteFVAR15], (instrs FSQRTSr)>;
+def : InstRW<[M1WriteFVAR23], (instrs FSQRTDr)>;
+
+// FP miscellaneous instructions.
+def : InstRW<[M1WriteFCVT3], (instregex "^FCVT[DS][DS]r")>;
+def : InstRW<[M1WriteNEONF], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>;
+def : InstRW<[M1WriteNEONE], (instregex "^[SU]CVTF[SU]")>;
+def : InstRW<[M1WriteNALU1], (instregex "^FMOV[DS][ir]")>;
+def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev1")>;
+def : InstRW<[M1WriteNMISC1], (instregex "^FRECPXv1")>;
+def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)S(16|32|64)")>;
+def : InstRW<[M1WriteS4], (instregex "^FMOV[WX][DS](High)?r")>;
+def : InstRW<[M1WriteNEONI], (instregex "^FMOV[DS][WX](High)?r")>;
+
+// FP load instructions.
+def : InstRW<[WriteVLD], (instregex "^LDR[DSQ]l")>;
+def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>;
+def : InstRW<[WriteVLD,
+ WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
+def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>;
+def : InstRW<[M1WriteLY,
+ ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
+def : InstRW<[M1WriteLD,
+ ReadAdrBase], (instregex "^LDRQro[WX]")>;
+def : InstRW<[WriteVLD,
+ M1WriteLH], (instregex "^LDN?P[DS]i")>;
+def : InstRW<[M1WriteLA,
+ M1WriteLH], (instregex "^LDN?PQi")>;
+def : InstRW<[M1WriteLC,
+ M1WriteLH,
+ WriteAdr], (instregex "^LDP[DS](post|pre)")>;
+def : InstRW<[M1WriteLD,
+ M1WriteLH,
+ WriteAdr], (instregex "^LDPQ(post|pre)")>;
+
+// FP store instructions.
+def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>;
+def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>;
+def : InstRW<[M1WriteSY,
+ ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
+def : InstRW<[M1WriteSB,
+ ReadAdrBase], (instregex "^STRQro[WX]")>;
+def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "^STP[DS](post|pre)")>;
+def : InstRW<[M1WriteSC,
+ WriteAdr], (instregex "^STPQ(post|pre)")>;
+
+// ASIMD instructions.
+def : InstRW<[M1WriteNMISC3], (instregex "^[SU]ABAL?v")>;
+def : InstRW<[M1WriteNMISC1], (instregex "^[SU]ABDL?v")>;
+def : InstRW<[M1WriteNMISC1], (instregex "^(SQ)?ABSv")>;
+def : InstRW<[M1WriteNMISC1], (instregex "^SQNEGv")>;
+def : InstRW<[M1WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>;
+def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?H(ADD|SUB)v")>;
+def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?AD[AD](L|LP|P|W)V?2?v")>;
+def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?SUB[LW]2?v")>;
+def : InstRW<[M1WriteNMISC3], (instregex "^R?(ADD|SUB)HN?2?v")>;
+def : InstRW<[M1WriteNMISC3], (instregex "^[SU]+Q(ADD|SUB)v")>;
+def : InstRW<[M1WriteNMISC3], (instregex "^[SU]RHADDv")>;
+def : InstRW<[M1WriteNMISC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
+def : InstRW<[M1WriteNALU1], (instregex "^CMTSTv")>;
+def : InstRW<[M1WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>;
+def : InstRW<[M1WriteNMISC1], (instregex "^[SU](MIN|MAX)v")>;
+def : InstRW<[M1WriteNMISC2], (instregex "^[SU](MIN|MAX)Pv")>;
+def : InstRW<[M1WriteNMISC3], (instregex "^[SU](MIN|MAX)Vv")>;
+def : InstRW<[M1WriteNMISC4], (instregex "^(MUL|SQR?DMULH)v")>;
+def : InstRW<[M1WriteNMISC4], (instregex "^ML[AS]v")>;
+def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>;
+def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>;
+def : InstRW<[M1WriteNAL13], (instregex "^(S|SR|U|UR)SRAv")>;
+def : InstRW<[M1WriteNALU1], (instregex "^SHL[dv]")>;
+def : InstRW<[M1WriteNALU1], (instregex "^[SU]SH[LR][dv]")>;
+def : InstRW<[M1WriteNALU1], (instregex "^S[RS]I[dv]")>;
+def : InstRW<[M1WriteNAL13], (instregex "^(([SU]Q)?R)?SHRU?N[bhsv]")>;
+def : InstRW<[M1WriteNAL13], (instregex "^[SU]RSH[LR][dv]")>;
+def : InstRW<[M1WriteNAL13], (instregex "^[SU]QR?SHLU?[bdhsv]")>;
+
+// ASIMD FP instructions.
+def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)v")>;
+def : InstRW<[M1WriteNMISC3], (instregex "^F(ABD|ADD|SUB)v")>;
+def : InstRW<[M1WriteNEONA], (instregex "^FADDP")>;
+def : InstRW<[M1WriteNMISC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>;
+def : InstRW<[M1WriteFCVT3], (instregex "^[FVSU]CVTX?[AFLMNPZ][SU]?(_Int)?v")>;
+def : InstRW<[M1WriteFVAR15], (instregex "FDIVv.f32")>;
+def : InstRW<[M1WriteFVAR23], (instregex "FDIVv2f64")>;
+def : InstRW<[M1WriteFVAR15], (instregex "FSQRTv.f32")>;
+def : InstRW<[M1WriteFVAR23], (instregex "FSQRTv2f64")>;
+def : InstRW<[M1WriteNMISC1], (instregex "^F(MAX|MIN)(NM)?V?v")>;
+def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN)(NM)?Pv")>;
+def : InstRW<[M1WriteNEONJ], (instregex "^FMULX?v.i")>;
+def : InstRW<[M1WriteFMAC4], (instregex "^FMULX?v.f")>;
+def : InstRW<[M1WriteNEONK], (instregex "^FML[AS]v.i")>;
+def : InstRW<[M1WriteFMAC5], (instregex "^FML[AS]v.f")>;
+def : InstRW<[M1WriteFCVT3], (instregex "^FRINT[AIMNPXZ]v")>;
+
+// ASIMD miscellaneous instructions.
+def : InstRW<[M1WriteNALU1], (instregex "^RBITv")>;
+def : InstRW<[M1WriteNAL11], (instregex "^(BIF|BIT|BSL)v")>;
+def : InstRW<[M1WriteNEONB], (instregex "^DUPv.+gpr")>;
+def : InstRW<[M1WriteNALU1], (instregex "^DUPv.+lane")>;
+def : InstRW<[M1WriteNALU1], (instregex "^EXTv8")>;
+def : InstRW<[M1WriteNEONL], (instregex "^EXTv16")>;
+def : InstRW<[M1WriteNAL13], (instregex "^[SU]?Q?XTU?Nv")>;
+def : InstRW<[M1WriteNALU1], (instregex "^CPY")>;
+def : InstRW<[M1WriteNALU1], (instregex "^INSv.+lane")>;
+def : InstRW<[M1WriteNALU1], (instregex "^MOVI[Dv]")>;
+def : InstRW<[M1WriteNALU1], (instregex "^FMOVv")>;
+def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev[248]")>;
+def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)Sv")>;
+def : InstRW<[M1WriteNALU1], (instregex "^REV(16|32|64)v")>;
+def : InstRW<[M1WriteNAL11], (instregex "^TB[LX]v8i8One")>;
+def : InstRW<[WriteSequence<[M1WriteNAL11], 2>],
+ (instregex "^TB[LX]v8i8Two")>;
+def : InstRW<[WriteSequence<[M1WriteNAL11], 3>],
+ (instregex "^TB[LX]v8i8Three")>;
+def : InstRW<[WriteSequence<[M1WriteNAL11], 4>],
+ (instregex "^TB[LX]v8i8Four")>;
+def : InstRW<[M1WriteNAL12], (instregex "^TB[LX]v16i8One")>;
+def : InstRW<[WriteSequence<[M1WriteNAL12], 2>],
+ (instregex "^TB[LX]v16i8Two")>;
+def : InstRW<[WriteSequence<[M1WriteNAL12], 3>],
+ (instregex "^TB[LX]v16i8Three")>;
+def : InstRW<[WriteSequence<[M1WriteNAL12], 4>],
+ (instregex "^TB[LX]v16i8Four")>;
+def : InstRW<[M1WriteNEOND], (instregex "^[SU]MOVv")>;
+def : InstRW<[M1WriteNEONC], (instregex "^INSv.+gpr")>;
+def : InstRW<[M1WriteNALU1], (instregex "^(TRN|UZP)[12](v8i8|v4i16|v2i32)")>;
+def : InstRW<[M1WriteNALU2], (instregex "^(TRN|UZP)[12](v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[M1WriteNALU1], (instregex "^ZIP[12]v")>;
+
+// ASIMD load instructions.
+def : InstRW<[M1WriteVLDD], (instregex "LD1i(8|16|32)$")>;
+def : InstRW<[M1WriteVLDD,
+ WriteAdr], (instregex "LD1i(8|16|32)_POST$")>;
+def : InstRW<[M1WriteVLDE], (instregex "LD1i(64)$")>;
+def : InstRW<[M1WriteVLDE,
+ WriteAdr], (instregex "LD1i(64)_POST$")>;
+
+def : InstRW<[M1WriteL5], (instregex "LD1Rv(8b|4h|2s)$")>;
+def : InstRW<[M1WriteL5,
+ WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteL5], (instregex "LD1Rv(1d)$")>;
+def : InstRW<[M1WriteL5,
+ WriteAdr], (instregex "LD1Rv(1d)_POST$")>;
+def : InstRW<[M1WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteL5,
+ WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M1WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[M1WriteL5,
+ WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M1WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteL5,
+ WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[M1WriteVLDA,
+ WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteVLDA,
+ WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[M1WriteVLDB,
+ WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteVLDB,
+ WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[M1WriteVLDC,
+ WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteVLDC,
+ WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M1WriteVLDG], (instregex "LD2i(8|16)$")>;
+def : InstRW<[M1WriteVLDG,
+ WriteAdr], (instregex "LD2i(8|16)_POST$")>;
+def : InstRW<[M1WriteVLDG], (instregex "LD2i(32)$")>;
+def : InstRW<[M1WriteVLDG,
+ WriteAdr], (instregex "LD2i(32)_POST$")>;
+def : InstRW<[M1WriteVLDH], (instregex "LD2i(64)$")>;
+def : InstRW<[M1WriteVLDH,
+ WriteAdr], (instregex "LD2i(64)_POST$")>;
+
+def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVLDA,
+ WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(1d)$")>;
+def : InstRW<[M1WriteVLDA,
+ WriteAdr], (instregex "LD2Rv(1d)_POST$")>;
+def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteVLDA,
+ WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVLDF,
+ WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(16b|8h|4s)$")>;
+def : InstRW<[M1WriteVLDF,
+ WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>;
+def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(2d)$")>;
+def : InstRW<[M1WriteVLDF,
+ WriteAdr], (instregex "LD2Twov(2d)_POST$")>;
+
+def : InstRW<[M1WriteVLDJ], (instregex "LD3i(8|16)$")>;
+def : InstRW<[M1WriteVLDJ,
+ WriteAdr], (instregex "LD3i(8|16)_POST$")>;
+def : InstRW<[M1WriteVLDJ], (instregex "LD3i(32)$")>;
+def : InstRW<[M1WriteVLDJ,
+ WriteAdr], (instregex "LD3i(32)_POST$")>;
+def : InstRW<[M1WriteVLDL], (instregex "LD3i(64)$")>;
+def : InstRW<[M1WriteVLDL,
+ WriteAdr], (instregex "LD3i(64)_POST$")>;
+
+def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVLDB,
+ WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(1d)$")>;
+def : InstRW<[M1WriteVLDB,
+ WriteAdr], (instregex "LD3Rv(1d)_POST$")>;
+def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(16b|8h|4s)$")>;
+def : InstRW<[M1WriteVLDB,
+ WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>;
+def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(2d)$")>;
+def : InstRW<[M1WriteVLDB,
+ WriteAdr], (instregex "LD3Rv(2d)_POST$")>;
+
+def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVLDI,
+ WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(16b|8h|4s)$")>;
+def : InstRW<[M1WriteVLDI,
+ WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(2d)$")>;
+def : InstRW<[M1WriteVLDI,
+ WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
+
+def : InstRW<[M1WriteVLDK], (instregex "LD4i(8|16)$")>;
+def : InstRW<[M1WriteVLDK,
+ WriteAdr], (instregex "LD4i(8|16)_POST$")>;
+def : InstRW<[M1WriteVLDK], (instregex "LD4i(32)$")>;
+def : InstRW<[M1WriteVLDK,
+ WriteAdr], (instregex "LD4i(32)_POST$")>;
+def : InstRW<[M1WriteVLDM], (instregex "LD4i(64)$")>;
+def : InstRW<[M1WriteVLDM,
+ WriteAdr], (instregex "LD4i(64)_POST$")>;
+
+def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVLDC,
+ WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(1d)$")>;
+def : InstRW<[M1WriteVLDC,
+ WriteAdr], (instregex "LD4Rv(1d)_POST$")>;
+def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(16b|8h|4s)$")>;
+def : InstRW<[M1WriteVLDC,
+ WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>;
+def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(2d)$")>;
+def : InstRW<[M1WriteVLDC,
+ WriteAdr], (instregex "LD4Rv(2d)_POST$")>;
+
+def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVLDN,
+ WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(16b|8h|4s)$")>;
+def : InstRW<[M1WriteVLDN,
+ WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(2d)$")>;
+def : InstRW<[M1WriteVLDN,
+ WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
+
+// ASIMD store instructions.
+def : InstRW<[M1WriteVSTD], (instregex "ST1i(8|16|32)$")>;
+def : InstRW<[M1WriteVSTD,
+ WriteAdr], (instregex "ST1i(8|16|32)_POST$")>;
+def : InstRW<[M1WriteVSTD], (instregex "ST1i(64)$")>;
+def : InstRW<[M1WriteVSTD,
+ WriteAdr], (instregex "ST1i(64)_POST$")>;
+
+def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[WriteVST], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[M1WriteVSTA,
+ WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteVSTA,
+ WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[M1WriteVSTB,
+ WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteVSTB,
+ WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[M1WriteVSTC,
+ WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M1WriteVSTC,
+ WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[M1WriteVSTD], (instregex "ST2i(8|16|32)$")>;
+def : InstRW<[M1WriteVSTD,
+ WriteAdr], (instregex "ST2i(8|16|32)_POST$")>;
+def : InstRW<[M1WriteVSTD], (instregex "ST2i(64)$")>;
+def : InstRW<[M1WriteVSTD,
+ WriteAdr], (instregex "ST2i(64)_POST$")>;
+
+def : InstRW<[M1WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVSTD,
+ WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(16b|8h|4s)$")>;
+def : InstRW<[M1WriteVSTE,
+ WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>;
+def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(2d)$")>;
+def : InstRW<[M1WriteVSTE,
+ WriteAdr], (instregex "ST2Twov(2d)_POST$")>;
+
+def : InstRW<[M1WriteVSTH], (instregex "ST3i(8|16)$")>;
+def : InstRW<[M1WriteVSTH,
+ WriteAdr], (instregex "ST3i(8|16)_POST$")>;
+def : InstRW<[M1WriteVSTH], (instregex "ST3i(32)$")>;
+def : InstRW<[M1WriteVSTH,
+ WriteAdr], (instregex "ST3i(32)_POST$")>;
+def : InstRW<[M1WriteVSTF], (instregex "ST3i(64)$")>;
+def : InstRW<[M1WriteVSTF,
+ WriteAdr], (instregex "ST3i(64)_POST$")>;
+
+def : InstRW<[M1WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVSTF,
+ WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(16b|8h|4s)$")>;
+def : InstRW<[M1WriteVSTG,
+ WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(2d)$")>;
+def : InstRW<[M1WriteVSTG,
+ WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
+
+def : InstRW<[M1WriteVSTH], (instregex "ST4i(8|16)$")>;
+def : InstRW<[M1WriteVSTH,
+ WriteAdr], (instregex "ST4i(8|16)_POST$")>;
+def : InstRW<[M1WriteVSTH], (instregex "ST4i(32)$")>;
+def : InstRW<[M1WriteVSTH,
+ WriteAdr], (instregex "ST4i(32)_POST$")>;
+def : InstRW<[M1WriteVSTF], (instregex "ST4i(64)$")>;
+def : InstRW<[M1WriteVSTF,
+ WriteAdr], (instregex "ST4i(64)_POST$")>;
+
+def : InstRW<[M1WriteVSTF], (instregex "ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[M1WriteVSTF,
+ WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(16b|8h|4s)$")>;
+def : InstRW<[M1WriteVSTI,
+ WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(2d)$")>;
+def : InstRW<[M1WriteVSTI,
+ WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+
+// Cryptography instructions.
+def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
+def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
+def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
+def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;
+
+def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>;
+def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>;
+def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA1[CMP]")>;
+def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA256SU0")>;
+def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA256(H|SU1)")>;
+
+// CRC instructions.
+def : InstRW<[M1WriteC2], (instregex "^CRC32")>;
+
+} // SchedModel = ExynosM1Model
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td
new file mode 100644
index 000000000..5e5369a5a
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td
@@ -0,0 +1,860 @@
+//=- AArch64SchedExynosM3.td - Samsung Exynos M3 Sched Defs --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for the Samsung Exynos M3 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// The Exynos-M3 is an advanced superscalar microprocessor with a 6-wide
+// in-order stage for decode and dispatch and a wider issue stage.
+// The execution units and loads and stores are out-of-order.
+
+def ExynosM3Model : SchedMachineModel {
+ let IssueWidth = 6; // Up to 6 uops per cycle.
+ let MicroOpBufferSize = 228; // ROB size.
+ let LoopMicroOpBufferSize = 40; // Based on the instruction queue size.
+ let LoadLatency = 4; // Optimistic load cases.
+ let MispredictPenalty = 16; // Minimum branch misprediction penalty.
+ let CompleteModel = 1; // Use the default model otherwise.
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
+
+ // FIXME: Remove when all errors have been fixed.
+ let FullInstRWOverlapCheck = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on the Exynos-M3,
+// which has 12 pipelines, each with its own queue with out-of-order dispatch.
+
+let SchedModel = ExynosM3Model in {
+
+def M3UnitA : ProcResource<2>; // Simple integer
+def M3UnitC : ProcResource<2>; // Simple and complex integer
+def M3UnitD : ProcResource<1>; // Integer division (inside C0, serialized)
+def M3UnitB : ProcResource<2>; // Branch
+def M3UnitL : ProcResource<2>; // Load
+def M3UnitS : ProcResource<1>; // Store
+def M3PipeF0 : ProcResource<1>; // FP #0
+let Super = M3PipeF0 in {
+ def M3UnitFMAC0 : ProcResource<1>; // FP multiplication
+ def M3UnitFADD0 : ProcResource<1>; // Simple FP
+ def M3UnitFCVT0 : ProcResource<1>; // FP conversion
+ def M3UnitFSQR : ProcResource<2>; // FP square root (serialized)
+ def M3UnitNALU0 : ProcResource<1>; // Simple vector
+ def M3UnitNMSC : ProcResource<1>; // FP and vector miscellanea
+ def M3UnitNSHT0 : ProcResource<1>; // Vector shifting
+ def M3UnitNSHF0 : ProcResource<1>; // Vector shuffling
+}
+def M3PipeF1 : ProcResource<1>; // FP #1
+let Super = M3PipeF1 in {
+ def M3UnitFMAC1 : ProcResource<1>; // FP multiplication
+ def M3UnitFADD1 : ProcResource<1>; // Simple FP
+ def M3UnitFDIV0 : ProcResource<2>; // FP division (serialized)
+ def M3UnitFCVT1 : ProcResource<1>; // FP conversion
+ def M3UnitFST0 : ProcResource<1>; // FP store
+ def M3UnitNALU1 : ProcResource<1>; // Simple vector
+ def M3UnitNCRY0 : ProcResource<1>; // Cryptographic
+ def M3UnitNMUL : ProcResource<1>; // Vector multiplication
+ def M3UnitNSHT1 : ProcResource<1>; // Vector shifting
+ def M3UnitNSHF1 : ProcResource<1>; // Vector shuffling
+}
+def M3PipeF2 : ProcResource<1>; // FP #2
+let Super = M3PipeF2 in {
+ def M3UnitFMAC2 : ProcResource<1>; // FP multiplication
+ def M3UnitFADD2 : ProcResource<1>; // Simple FP
+ def M3UnitFDIV1 : ProcResource<2>; // FP division (serialized)
+ def M3UnitFST1 : ProcResource<1>; // FP store
+ def M3UnitNALU2 : ProcResource<1>; // Simple vector
+ def M3UnitNCRY1 : ProcResource<1>; // Cryptographic
+ def M3UnitNSHT2 : ProcResource<1>; // Vector shifting
+ def M3UnitNSHF2 : ProcResource<1>; // Vector shuffling
+}
+
+
+def M3UnitALU : ProcResGroup<[M3UnitA,
+ M3UnitC]>;
+def M3UnitFMAC : ProcResGroup<[M3UnitFMAC0,
+ M3UnitFMAC1,
+ M3UnitFMAC2]>;
+def M3UnitFADD : ProcResGroup<[M3UnitFADD0,
+ M3UnitFADD1,
+ M3UnitFADD2]>;
+def M3UnitFDIV : ProcResGroup<[M3UnitFDIV0,
+ M3UnitFDIV1]>;
+def M3UnitFCVT : ProcResGroup<[M3UnitFCVT0,
+ M3UnitFCVT1]>;
+def M3UnitFST : ProcResGroup<[M3UnitFST0,
+ M3UnitFST1]>;
+def M3UnitNALU : ProcResGroup<[M3UnitNALU0,
+ M3UnitNALU1,
+ M3UnitNALU2]>;
+def M3UnitNCRY : ProcResGroup<[M3UnitNCRY0,
+ M3UnitNCRY1]>;
+def M3UnitNSHT : ProcResGroup<[M3UnitNSHT0,
+ M3UnitNSHT1,
+ M3UnitNSHT2]>;
+def M3UnitNSHF : ProcResGroup<[M3UnitNSHF0,
+ M3UnitNSHF1,
+ M3UnitNSHF2]>;
+
+//===----------------------------------------------------------------------===//
+// Predicates.
+
+def M3BranchLinkFastPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR &&
+ MI->getOperand(0).isReg() &&
+ MI->getOperand(0).getReg() != AArch64::LR}]>;
+def M3ResetFastPred : SchedPredicate<[{TII->isExynosResetFast(*MI)}]>;
+def M3RotateRightFastPred : SchedPredicate<[{(MI->getOpcode() == AArch64::EXTRWrri ||
+ MI->getOpcode() == AArch64::EXTRXrri) &&
+ MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
+ MI->getOperand(1).getReg() == MI->getOperand(2).getReg()}]>;
+def M3ShiftLeftFastPred : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>;
+
+//===----------------------------------------------------------------------===//
+// Coarse scheduling model.
+
+def M3WriteZ0 : SchedWriteRes<[]> { let Latency = 0;
+ let NumMicroOps = 1; }
+
+def M3WriteA1 : SchedWriteRes<[M3UnitALU]> { let Latency = 1; }
+def M3WriteAA : SchedWriteRes<[M3UnitALU]> { let Latency = 2;
+ let ResourceCycles = [2]; }
+def M3WriteAB : SchedWriteRes<[M3UnitALU,
+ M3UnitC]> { let Latency = 1;
+ let NumMicroOps = 2; }
+def M3WriteAC : SchedWriteRes<[M3UnitALU,
+ M3UnitALU,
+ M3UnitC]> { let Latency = 2;
+ let NumMicroOps = 3; }
+def M3WriteAD : SchedWriteRes<[M3UnitALU,
+ M3UnitC]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M3WriteC1 : SchedWriteRes<[M3UnitC]> { let Latency = 1; }
+def M3WriteC2 : SchedWriteRes<[M3UnitC]> { let Latency = 2; }
+def M3WriteAX : SchedWriteVariant<[SchedVar<M3ResetFastPred, [M3WriteZ0]>,
+ SchedVar<M3ShiftLeftFastPred, [M3WriteA1]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAY : SchedWriteVariant<[SchedVar<M3RotateRightFastPred, [M3WriteA1]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
+
+def M3WriteB1 : SchedWriteRes<[M3UnitB]> { let Latency = 1; }
+def M3WriteBX : SchedWriteVariant<[SchedVar<M3BranchLinkFastPred, [M3WriteAB]>,
+ SchedVar<NoSchedPred, [M3WriteAC]>]>;
+
+def M3WriteL4 : SchedWriteRes<[M3UnitL]> { let Latency = 4; }
+def M3WriteL5 : SchedWriteRes<[M3UnitL]> { let Latency = 5; }
+def M3WriteLA : SchedWriteRes<[M3UnitL,
+ M3UnitL]> { let Latency = 5;
+ let NumMicroOps = 1; }
+def M3WriteLB : SchedWriteRes<[M3UnitA,
+ M3UnitL]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M3WriteLC : SchedWriteRes<[M3UnitA,
+ M3UnitL,
+ M3UnitL]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M3WriteLD : SchedWriteRes<[M3UnitA,
+ M3UnitL]> { let Latency = 4;
+ let NumMicroOps = 2; }
+def M3WriteLH : SchedWriteRes<[]> { let Latency = 5;
+ let NumMicroOps = 0; }
+
+def M3WriteLX : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteL5]>,
+ SchedVar<NoSchedPred, [M3WriteLB]>]>;
+
+def M3WriteS1 : SchedWriteRes<[M3UnitS]> { let Latency = 1; }
+def M3WriteSA : SchedWriteRes<[M3UnitA,
+ M3UnitS,
+ M3UnitFST]> { let Latency = 2;
+ let NumMicroOps = 2; }
+def M3WriteSB : SchedWriteRes<[M3UnitA,
+ M3UnitS]> { let Latency = 1;
+ let NumMicroOps = 2; }
+def M3WriteSC : SchedWriteRes<[M3UnitA,
+ M3UnitS]> { let Latency = 2;
+ let NumMicroOps = 2; }
+
+def M3WriteSX : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteS1]>,
+ SchedVar<NoSchedPred, [M3WriteSB]>]>;
+def M3WriteSY : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteS1]>,
+ SchedVar<NoSchedPred, [M3WriteSC]>]>;
+
+def M3ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
+ SchedVar<NoSchedPred, [ReadDefault]>]>;
+
+// Branch instructions.
+def : SchedAlias<WriteBr, M3WriteZ0>;
+def : WriteRes<WriteBrReg, [M3UnitC]> { let Latency = 1; }
+
+// Arithmetic and logical integer instructions.
+def : WriteRes<WriteI, [M3UnitALU]> { let Latency = 1; }
+def : WriteRes<WriteISReg, [M3UnitALU]> { let Latency = 1; }
+def : WriteRes<WriteIEReg, [M3UnitALU]> { let Latency = 1; }
+def : WriteRes<WriteIS, [M3UnitALU]> { let Latency = 1; }
+
+// Move instructions.
+def : WriteRes<WriteImm, [M3UnitALU]> { let Latency = 1; }
+
+// Divide and multiply instructions.
+def : WriteRes<WriteID32, [M3UnitC,
+ M3UnitD]> { let Latency = 12;
+ let ResourceCycles = [1, 12]; }
+def : WriteRes<WriteID64, [M3UnitC,
+ M3UnitD]> { let Latency = 21;
+ let ResourceCycles = [1, 21]; }
+def : WriteRes<WriteIM32, [M3UnitC]> { let Latency = 3; }
+def : WriteRes<WriteIM64, [M3UnitC]> { let Latency = 4;
+ let ResourceCycles = [2]; }
+
+// Miscellaneous instructions.
+def : WriteRes<WriteExtr, [M3UnitALU,
+ M3UnitALU]> { let Latency = 1;
+ let NumMicroOps = 2; }
+
+// Addressing modes.
+def : WriteRes<WriteAdr, []> { let Latency = 1;
+ let NumMicroOps = 0; }
+def : SchedAlias<ReadAdrBase, M3ReadAdrBase>;
+
+// Load instructions.
+def : SchedAlias<WriteLD, M3WriteL4>;
+def : WriteRes<WriteLDHi, []> { let Latency = 4;
+ let NumMicroOps = 0; }
+def : SchedAlias<WriteLDIdx, M3WriteLX>;
+
+// Store instructions.
+def : SchedAlias<WriteST, M3WriteS1>;
+def : SchedAlias<WriteSTP, M3WriteS1>;
+def : SchedAlias<WriteSTX, M3WriteS1>;
+def : SchedAlias<WriteSTIdx, M3WriteSX>;
+
+// FP data instructions.
+def : WriteRes<WriteF, [M3UnitFADD]> { let Latency = 2; }
+def : WriteRes<WriteFCmp, [M3UnitNMSC]> { let Latency = 2; }
+def : WriteRes<WriteFDiv, [M3UnitFDIV]> { let Latency = 12;
+ let ResourceCycles = [12]; }
+def : WriteRes<WriteFMul, [M3UnitFMAC]> { let Latency = 4; }
+
+// FP miscellaneous instructions.
+// TODO: Conversion between register files is much different.
+def : WriteRes<WriteFCvt, [M3UnitFCVT]> { let Latency = 3; }
+def : WriteRes<WriteFImm, [M3UnitNALU]> { let Latency = 1; }
+def : WriteRes<WriteFCopy, [M3UnitNALU]> { let Latency = 1; }
+
+// FP load instructions.
+def : SchedAlias<WriteVLD, M3WriteL5>;
+
+// FP store instructions.
+def : WriteRes<WriteVST, [M3UnitS,
+ M3UnitFST]> { let Latency = 1;
+ let NumMicroOps = 1; }
+
+// ASIMD FP instructions.
+def : WriteRes<WriteV, [M3UnitNALU]> { let Latency = 3; }
+
+// Other miscellaneous instructions.
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+
+//===----------------------------------------------------------------------===//
+// Generic fast forwarding.
+
+// TODO: Add FP register forwarding rules.
+
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+// TODO: The forwarding for 32 bits actually saves 2 cycles.
+def : ReadAdvance<ReadIMA, 3, [WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+//===----------------------------------------------------------------------===//
+// Finer scheduling model.
+
+def M3WriteNEONA : SchedWriteRes<[M3UnitNSHF,
+ M3UnitFADD]> { let Latency = 3;
+ let NumMicroOps = 2; }
+def M3WriteNEONB : SchedWriteRes<[M3UnitNALU,
+ M3UnitFST]> { let Latency = 10;
+ let NumMicroOps = 2; }
+def M3WriteNEOND : SchedWriteRes<[M3UnitNSHF,
+ M3UnitFST]> { let Latency = 6;
+ let NumMicroOps = 2; }
+def M3WriteNEONH : SchedWriteRes<[M3UnitNALU,
+ M3UnitS]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M3WriteNEONI : SchedWriteRes<[M3UnitNSHF,
+ M3UnitS]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M3WriteNEONV : SchedWriteRes<[M3UnitFDIV0,
+ M3UnitFDIV1]> { let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [8, 8]; }
+def M3WriteNEONW : SchedWriteRes<[M3UnitFDIV0,
+ M3UnitFDIV1]> { let Latency = 12;
+ let NumMicroOps = 2;
+ let ResourceCycles = [13, 13]; }
+def M3WriteNEONX : SchedWriteRes<[M3UnitFSQR,
+ M3UnitFSQR]> { let Latency = 18;
+ let NumMicroOps = 2;
+ let ResourceCycles = [19, 19]; }
+def M3WriteNEONY : SchedWriteRes<[M3UnitFSQR,
+ M3UnitFSQR]> { let Latency = 25;
+ let NumMicroOps = 2;
+ let ResourceCycles = [26, 26]; }
+def M3WriteNEONZ : SchedWriteRes<[M3UnitNMSC,
+ M3UnitNMSC]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M3WriteFADD2 : SchedWriteRes<[M3UnitFADD]> { let Latency = 2; }
+def M3WriteFCVT2 : SchedWriteRes<[M3UnitFCVT]> { let Latency = 2; }
+def M3WriteFCVT3 : SchedWriteRes<[M3UnitFCVT]> { let Latency = 3; }
+def M3WriteFCVT3A : SchedWriteRes<[M3UnitFCVT0]> { let Latency = 3; }
+def M3WriteFCVT4A : SchedWriteRes<[M3UnitFCVT0]> { let Latency = 4; }
+def M3WriteFCVT4 : SchedWriteRes<[M3UnitFCVT]> { let Latency = 4; }
+def M3WriteFDIV10 : SchedWriteRes<[M3UnitFDIV]> { let Latency = 7;
+ let ResourceCycles = [8]; }
+def M3WriteFDIV12 : SchedWriteRes<[M3UnitFDIV]> { let Latency = 12;
+ let ResourceCycles = [13]; }
+def M3WriteFMAC3 : SchedWriteRes<[M3UnitFMAC]> { let Latency = 3; }
+def M3WriteFMAC4 : SchedWriteRes<[M3UnitFMAC]> { let Latency = 4; }
+def M3WriteFMAC5 : SchedWriteRes<[M3UnitFMAC]> { let Latency = 5; }
+def M3WriteFSQR17 : SchedWriteRes<[M3UnitFSQR]> { let Latency = 18;
+ let ResourceCycles = [19]; }
+def M3WriteFSQR25 : SchedWriteRes<[M3UnitFSQR]> { let Latency = 25;
+ let ResourceCycles = [26]; }
+def M3WriteNALU1 : SchedWriteRes<[M3UnitNALU]> { let Latency = 1; }
+def M3WriteNCRY1A : SchedWriteRes<[M3UnitNCRY0]> { let Latency = 1; }
+def M3WriteNCRY3A : SchedWriteRes<[M3UnitNCRY0]> { let Latency = 3; }
+def M3WriteNCRY5A : SchedWriteRes<[M3UnitNCRY]> { let Latency = 5; }
+def M3WriteNMSC1 : SchedWriteRes<[M3UnitNMSC]> { let Latency = 1; }
+def M3WriteNMSC2 : SchedWriteRes<[M3UnitNMSC]> { let Latency = 2; }
+def M3WriteNMSC3 : SchedWriteRes<[M3UnitNMSC]> { let Latency = 3; }
+def M3WriteNMUL3 : SchedWriteRes<[M3UnitNMUL]> { let Latency = 3; }
+def M3WriteNSHF1 : SchedWriteRes<[M3UnitNSHF]> { let Latency = 1; }
+def M3WriteNSHF3 : SchedWriteRes<[M3UnitNSHF]> { let Latency = 3; }
+def M3WriteNSHT1 : SchedWriteRes<[M3UnitNSHT]> { let Latency = 1; }
+def M3WriteNSHT2 : SchedWriteRes<[M3UnitNSHT]> { let Latency = 2; }
+def M3WriteNSHT3 : SchedWriteRes<[M3UnitNSHT]> { let Latency = 3; }
+def M3WriteVLDA : SchedWriteRes<[M3UnitL,
+ M3UnitL]> { let Latency = 5;
+ let NumMicroOps = 2; }
+def M3WriteVLDB : SchedWriteRes<[M3UnitL,
+ M3UnitL,
+ M3UnitL]> { let Latency = 6;
+ let NumMicroOps = 3; }
+def M3WriteVLDC : SchedWriteRes<[M3UnitL,
+ M3UnitL,
+ M3UnitL,
+ M3UnitL]> { let Latency = 6;
+ let NumMicroOps = 4; }
+def M3WriteVLDD : SchedWriteRes<[M3UnitL,
+ M3UnitNALU]> { let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2, 1]; }
+def M3WriteVLDE : SchedWriteRes<[M3UnitL,
+ M3UnitNALU]> { let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2, 1]; }
+def M3WriteVLDF : SchedWriteRes<[M3UnitL,
+ M3UnitL]> { let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [5, 5]; }
+def M3WriteVLDG : SchedWriteRes<[M3UnitL,
+ M3UnitNALU,
+ M3UnitNALU]> { let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1, 1]; }
+def M3WriteVLDH : SchedWriteRes<[M3UnitL,
+ M3UnitNALU,
+ M3UnitNALU]> { let Latency = 6;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1, 1]; }
+def M3WriteVLDI : SchedWriteRes<[M3UnitL,
+ M3UnitL,
+ M3UnitL]> { let Latency = 12;
+ let NumMicroOps = 3;
+ let ResourceCycles = [6, 6, 6]; }
+def M3WriteVLDJ : SchedWriteRes<[M3UnitL,
+ M3UnitNALU,
+ M3UnitNALU,
+ M3UnitNALU]> { let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1, 1]; }
+def M3WriteVLDK : SchedWriteRes<[M3UnitL,
+ M3UnitNALU,
+ M3UnitNALU,
+ M3UnitNALU,
+ M3UnitNALU]> { let Latency = 9;
+ let NumMicroOps = 5;
+ let ResourceCycles = [4, 1, 1, 1, 1]; }
+def M3WriteVLDL : SchedWriteRes<[M3UnitL,
+ M3UnitNALU,
+ M3UnitNALU,
+ M3UnitL,
+ M3UnitNALU]> { let Latency = 6;
+ let NumMicroOps = 5;
+ let ResourceCycles = [6, 1, 1, 6, 1]; }
+def M3WriteVLDM : SchedWriteRes<[M3UnitL,
+ M3UnitNALU,
+ M3UnitNALU,
+ M3UnitL,
+ M3UnitNALU,
+ M3UnitNALU]> { let Latency = 7;
+ let NumMicroOps = 6;
+ let ResourceCycles = [6, 1, 1, 6, 1, 1]; }
+def M3WriteVLDN : SchedWriteRes<[M3UnitL,
+ M3UnitL,
+ M3UnitL,
+ M3UnitL]> { let Latency = 14;
+ let NumMicroOps = 4;
+ let ResourceCycles = [6, 6, 6, 6]; }
+def M3WriteVSTA : WriteSequence<[WriteVST], 2>;
+def M3WriteVSTB : WriteSequence<[WriteVST], 3>;
+def M3WriteVSTC : WriteSequence<[WriteVST], 4>;
+def M3WriteVSTD : SchedWriteRes<[M3UnitS,
+ M3UnitFST,
+ M3UnitS,
+ M3UnitFST]> { let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 3, 1, 3]; }
+def M3WriteVSTE : SchedWriteRes<[M3UnitS,
+ M3UnitFST,
+ M3UnitS,
+ M3UnitFST,
+ M3UnitS,
+ M3UnitFST]> { let Latency = 8;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1, 3, 1, 3, 1, 3]; }
+def M3WriteVSTF : SchedWriteRes<[M3UnitNALU,
+ M3UnitFST,
+ M3UnitFST,
+ M3UnitS,
+ M3UnitFST,
+ M3UnitS,
+ M3UnitFST]> { let Latency = 15;
+ let NumMicroOps = 7;
+ let ResourceCycles = [1, 3, 3, 1, 3, 1, 3]; }
+def M3WriteVSTG : SchedWriteRes<[M3UnitNALU,
+ M3UnitFST,
+ M3UnitFST,
+ M3UnitS,
+ M3UnitFST,
+ M3UnitS,
+ M3UnitFST,
+ M3UnitS,
+ M3UnitFST]> { let Latency = 16;
+ let NumMicroOps = 9;
+ let ResourceCycles = [1, 3, 3, 1, 3, 1, 3, 1, 3]; }
+def M3WriteVSTH : SchedWriteRes<[M3UnitNALU,
+ M3UnitFST,
+ M3UnitFST,
+ M3UnitS,
+ M3UnitFST]> { let Latency = 14;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1, 3, 3, 1, 3]; }
+def M3WriteVSTI : SchedWriteRes<[M3UnitNALU,
+ M3UnitFST,
+ M3UnitFST,
+ M3UnitS,
+ M3UnitFST,
+ M3UnitS,
+ M3UnitFST,
+ M3UnitS,
+ M3UnitFST]> { let Latency = 17;
+ let NumMicroOps = 9;
+ let ResourceCycles = [1, 3, 3, 1, 3, 1, 3, 1, 3]; }
+
+// Special cases.
+def M3WriteAES : SchedWriteRes<[M3UnitNCRY]> { let Latency = 1; }
+def M3ReadAES : SchedReadAdvance<1, [M3WriteAES]>;
+def M3ReadFMAC : SchedReadAdvance<1, [M3WriteFMAC4,
+ M3WriteFMAC5]>;
+def M3WriteMOVI : SchedWriteVariant<[SchedVar<M3ResetFastPred, [M3WriteZ0]>,
+ SchedVar<NoSchedPred, [M3WriteNALU1]>]>;
+def M3ReadNMUL : SchedReadAdvance<1, [M3WriteNMUL3]>;
+
+// Branch instructions
+def : InstRW<[M3WriteB1], (instrs Bcc)>;
+def : InstRW<[M3WriteA1], (instrs BL)>;
+def : InstRW<[M3WriteBX], (instrs BLR)>;
+def : InstRW<[M3WriteC1], (instregex "^CBN?Z[WX]")>;
+def : InstRW<[M3WriteAD], (instregex "^TBN?Z[WX]")>;
+
+// Arithmetic and logical integer instructions.
+def : InstRW<[M3WriteA1], (instrs COPY)>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?Xrx64")>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]$")>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|BIC|SUB)S[WX]r[sx]$")>;
+def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|EOR|ORR|SUB)[WX]ri")>;
+
+// Move instructions.
+def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>;
+def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>;
+
+// Divide and multiply instructions.
+
+// Miscellaneous instructions.
+def : InstRW<[M3WriteAY], (instrs EXTRWrri, EXTRXrri)>;
+
+// Load instructions.
+def : InstRW<[M3WriteLD,
+ WriteLDHi,
+ WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>;
+def : InstRW<[M3WriteLX,
+ ReadAdrBase], (instregex "^PRFMro[WX]")>;
+
+// Store instructions.
+
+// FP data instructions.
+def : InstRW<[M3WriteNSHF1], (instregex "^FABS[DS]r")>;
+def : InstRW<[M3WriteFADD2], (instregex "^F(ADD|SUB)[DS]rr")>;
+def : InstRW<[M3WriteFDIV10], (instrs FDIVSrr)>;
+def : InstRW<[M3WriteFDIV12], (instrs FDIVDrr)>;
+def : InstRW<[M3WriteNMSC1], (instregex "^F(MAX|MIN).+rr")>;
+def : InstRW<[M3WriteFMAC3], (instregex "^FN?MUL[DS]rr")>;
+def : InstRW<[M3WriteFMAC4,
+ M3ReadFMAC], (instregex "^FN?M(ADD|SUB)[DS]rrr")>;
+def : InstRW<[M3WriteNALU1], (instregex "^FNEG[DS]r")>;
+def : InstRW<[M3WriteFCVT3A], (instregex "^FRINT.+r")>;
+def : InstRW<[M3WriteNEONH], (instregex "^FCSEL[DS]rrr")>;
+def : InstRW<[M3WriteFSQR17], (instrs FSQRTSr)>;
+def : InstRW<[M3WriteFSQR25], (instrs FSQRTDr)>;
+
+// FP miscellaneous instructions.
+def : InstRW<[M3WriteFCVT3], (instregex "^FCVT[DHS][DHS]r")>;
+def : InstRW<[M3WriteFCVT4A], (instregex "^[SU]CVTF[SU][XW][DHS]ri")>;
+def : InstRW<[M3WriteFCVT3A], (instregex "^FCVT[AMNPZ][SU]U[XW][DHS]r")>;
+def : InstRW<[M3WriteFCVT3A], (instregex "^FCVTZ[SU][dhs]")>;
+def : InstRW<[M3WriteNALU1], (instregex "^FMOV[DS][ir]")>;
+def : InstRW<[M3WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev1")>;
+def : InstRW<[M3WriteNMSC1], (instregex "^FRECPXv1")>;
+def : InstRW<[M3WriteFMAC4,
+ M3ReadFMAC], (instregex "^F(RECP|RSQRT)S(16|32|64)")>;
+def : InstRW<[M3WriteNALU1], (instregex "^FMOV[WX][DS]r")>;
+def : InstRW<[M3WriteNALU1], (instregex "^FMOV[DS][WX]r")>;
+def : InstRW<[M3WriteNEONI], (instregex "^FMOV(DX|XD)Highr")>;
+
+// FP load instructions.
+def : InstRW<[WriteVLD], (instregex "^LDR[DSQ]l")>;
+def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>;
+def : InstRW<[WriteVLD,
+ WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
+def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>;
+def : InstRW<[M3WriteLX,
+ ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>;
+def : InstRW<[M3WriteLB,
+ ReadAdrBase], (instregex "^LDRQro[WX]")>;
+def : InstRW<[WriteVLD,
+ M3WriteLH], (instregex "^LDN?P[DS]i")>;
+def : InstRW<[M3WriteLA,
+ M3WriteLH], (instregex "^LDN?PQi")>;
+def : InstRW<[M3WriteLB,
+ M3WriteLH,
+ WriteAdr], (instregex "^LDP[DS](post|pre)")>;
+def : InstRW<[M3WriteLC,
+ M3WriteLH,
+ WriteAdr], (instregex "^LDPQ(post|pre)")>;
+
+// FP store instructions.
+def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>;
+def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>;
+def : InstRW<[M3WriteSY,
+ ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>;
+def : InstRW<[M3WriteSA,
+ ReadAdrBase], (instregex "^STRQro[WX]")>;
+def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "^STP[DS](post|pre)")>;
+def : InstRW<[M3WriteSA,
+ WriteAdr], (instregex "^STPQ(post|pre)")>;
+
+// ASIMD instructions.
+def : InstRW<[M3WriteNMSC3], (instregex "^[SU]ABAL?v")>;
+def : InstRW<[M3WriteNMSC1], (instregex "^[SU]ABDL?v")>;
+def : InstRW<[M3WriteNMSC1], (instregex "^(SQ)?(ABS|NEG)v")>;
+def : InstRW<[M3WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>;
+def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Pv")>;
+def : InstRW<[M3WriteNMSC3], (instregex "^[SU]H(ADD|SUB)v")>;
+def : InstRW<[M3WriteNMSC3], (instregex "^[SU](ADD|SUB)[LW]V?v")>;
+def : InstRW<[M3WriteNMSC3], (instregex "^R?(ADD|SUB)HN2?v")>;
+def : InstRW<[M3WriteNMSC3], (instregex "^[SU]Q(ADD|SUB)v")>;
+def : InstRW<[M3WriteNMSC3], (instregex "^(SU|US)QADDv")>;
+def : InstRW<[M3WriteNMSC3], (instregex "^[SU]RHADDv")>;
+def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Vv")>;
+def : InstRW<[M3WriteNMSC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
+def : InstRW<[M3WriteNALU1], (instregex "^CMTSTv")>;
+def : InstRW<[M3WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>;
+def : InstRW<[M3WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>;
+def : InstRW<[M3WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>;
+def : InstRW<[M3WriteNMSC3], (instregex "^[SU](MIN|MAX)Vv")>;
+def : InstRW<[M3WriteNMUL3], (instregex "^(MUL|SQR?DMULH)v")>;
+def : InstRW<[M3WriteNMUL3,
+ M3ReadNMUL], (instregex "^ML[AS]v")>;
+def : InstRW<[M3WriteNMUL3], (instregex "^[SU]ML[AS]Lv")>;
+def : InstRW<[M3WriteNMUL3], (instregex "^SQDML[AS]L")>;
+def : InstRW<[M3WriteNMUL3], (instregex "^(S|U|SQD)MULLv")>;
+def : InstRW<[M3WriteNMSC3], (instregex "^[SU]ADALPv")>;
+def : InstRW<[M3WriteNSHT3], (instregex "^[SU]R?SRAv")>;
+def : InstRW<[M3WriteNSHT1], (instregex "^SHL[dv]")>;
+def : InstRW<[M3WriteNSHT1], (instregex "^[SU]SH[LR][dv]")>;
+def : InstRW<[M3WriteNSHT1], (instregex "^S[RS]I[dv]")>;
+def : InstRW<[M3WriteNSHT2], (instregex "^[SU]?SHLLv")>;
+def : InstRW<[M3WriteNSHT3], (instregex "^(([SU]Q)?R)?SHRU?N[bhsv]")>;
+def : InstRW<[M3WriteNSHT3], (instregex "^[SU]RSH[LR][dv]")>;
+def : InstRW<[M3WriteNSHT3], (instregex "^[SU]QR?SHLU?[bdhsv]")>;
+
+// ASIMD FP instructions.
+def : InstRW<[M3WriteNSHF1], (instregex "^FABSv")>;
+def : InstRW<[M3WriteFADD2], (instregex "^F(ABD|ADD|SUB)v")>;
+def : InstRW<[M3WriteNEONA], (instregex "^FADDP")>;
+def : InstRW<[M3WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>;
+def : InstRW<[M3WriteFCVT3], (instregex "^FCVT(L|N|XN)v")>;
+def : InstRW<[M3WriteFCVT2], (instregex "^FCVT[AMNPZ][SU]v")>;
+def : InstRW<[M3WriteFCVT2], (instregex "^[SU]CVTFv")>;
+def : InstRW<[M3WriteFDIV10], (instrs FDIVv2f32)>;
+def : InstRW<[M3WriteNEONV], (instrs FDIVv4f32)>;
+def : InstRW<[M3WriteNEONW], (instrs FDIVv2f64)>;
+def : InstRW<[M3WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?v")>;
+def : InstRW<[M3WriteNMSC2], (instregex "^F(MAX|MIN)(NM)?Pv")>;
+def : InstRW<[M3WriteNEONZ], (instregex "^F(MAX|MIN)(NM)?Vv")>;
+def : InstRW<[M3WriteFMAC3], (instregex "^FMULX?v.[fi]")>;
+def : InstRW<[M3WriteFMAC4,
+ M3ReadFMAC], (instregex "^FML[AS]v.f")>;
+def : InstRW<[M3WriteFMAC5,
+ M3ReadFMAC], (instregex "^FML[AS]v.i")>;
+def : InstRW<[M3WriteNALU1], (instregex "^FNEGv")>;
+def : InstRW<[M3WriteFCVT3A], (instregex "^FRINT[AIMNPXZ]v")>;
+def : InstRW<[M3WriteFSQR17], (instrs FSQRTv2f32)>;
+def : InstRW<[M3WriteNEONX], (instrs FSQRTv4f32)>;
+def : InstRW<[M3WriteNEONY], (instrs FSQRTv2f64)>;
+
+// ASIMD miscellaneous instructions.
+def : InstRW<[M3WriteNALU1], (instregex "^RBITv")>;
+def : InstRW<[M3WriteNALU1], (instregex "^(BIF|BIT|BSL)v")>;
+def : InstRW<[M3WriteNEONB], (instregex "^DUPv.+gpr")>;
+def : InstRW<[M3WriteNSHF1], (instregex "^DUPv.+lane")>;
+def : InstRW<[M3WriteNSHF1], (instregex "^EXTv")>;
+def : InstRW<[M3WriteNSHF1], (instregex "^[SU]?Q?XTU?Nv")>;
+def : InstRW<[M3WriteNSHF1], (instregex "^CPY")>;
+def : InstRW<[M3WriteNSHF1], (instregex "^INSv.+lane")>;
+def : InstRW<[M3WriteMOVI], (instregex "^MOVI")>;
+def : InstRW<[M3WriteNALU1], (instregex "^FMOVv")>;
+def : InstRW<[M3WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev[248]")>;
+def : InstRW<[M3WriteFMAC4,
+ M3ReadFMAC], (instregex "^F(RECP|RSQRT)Sv")>;
+def : InstRW<[M3WriteNSHF1], (instregex "^REV(16|32|64)v")>;
+def : InstRW<[M3WriteNSHF1], (instregex "^TB[LX]v")>;
+def : InstRW<[M3WriteNEOND], (instregex "^[SU]MOVv")>;
+def : InstRW<[M3WriteNSHF3], (instregex "^INSv.+gpr")>;
+def : InstRW<[M3WriteNSHF1], (instregex "^(TRN|UZP|ZIP)[12]v")>;
+
+// ASIMD load instructions.
+def : InstRW<[M3WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[M3WriteL5,
+ WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST")>;
+def : InstRW<[M3WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[M3WriteL5,
+ WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[M3WriteVLDA,
+ WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST")>;
+def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M3WriteVLDA,
+ WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[M3WriteVLDB,
+ WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST")>;
+def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M3WriteVLDB,
+ WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[M3WriteVLDC,
+ WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>;
+def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M3WriteVLDC,
+ WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[M3WriteVLDD], (instregex "LD1i(8|16|32)$")>;
+def : InstRW<[M3WriteVLDD,
+ WriteAdr], (instregex "LD1i(8|16|32)_POST")>;
+def : InstRW<[M3WriteVLDE], (instregex "LD1i(64)$")>;
+def : InstRW<[M3WriteVLDE,
+ WriteAdr], (instregex "LD1i(64)_POST")>;
+
+def : InstRW<[M3WriteL5], (instregex "LD1Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[M3WriteL5,
+ WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST")>;
+def : InstRW<[M3WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[M3WriteL5,
+ WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[M3WriteVLDF,
+ WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST")>;
+def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M3WriteVLDF,
+ WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[M3WriteVLDG], (instregex "LD2i(8|16|32)$")>;
+def : InstRW<[M3WriteVLDG,
+ WriteAdr], (instregex "LD2i(8|16|32)_POST")>;
+def : InstRW<[M3WriteVLDH], (instregex "LD2i(64)$")>;
+def : InstRW<[M3WriteVLDH,
+ WriteAdr], (instregex "LD2i(64)_POST")>;
+
+def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[M3WriteVLDA,
+ WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d)_POST")>;
+def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[M3WriteVLDA,
+ WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[M3WriteVLDI,
+ WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST")>;
+def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M3WriteVLDI,
+ WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[M3WriteVLDJ], (instregex "LD3i(8|16|32)$")>;
+def : InstRW<[M3WriteVLDJ,
+ WriteAdr], (instregex "LD3i(8|16|32)_POST")>;
+def : InstRW<[M3WriteVLDL], (instregex "LD3i(64)$")>;
+def : InstRW<[M3WriteVLDL,
+ WriteAdr], (instregex "LD3i(64)_POST")>;
+
+def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[M3WriteVLDB,
+ WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST")>;
+def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[M3WriteVLDB,
+ WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[M3WriteVLDN,
+ WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST")>;
+def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M3WriteVLDN,
+ WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[M3WriteVLDK], (instregex "LD4i(8|16|32)$")>;
+def : InstRW<[M3WriteVLDK,
+ WriteAdr], (instregex "LD4i(8|16|32)_POST")>;
+def : InstRW<[M3WriteVLDM], (instregex "LD4i(64)$")>;
+def : InstRW<[M3WriteVLDM,
+ WriteAdr], (instregex "LD4i(64)_POST")>;
+
+def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[M3WriteVLDC,
+ WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d)_POST")>;
+def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[M3WriteVLDC,
+ WriteAdr], (instregex "LD4Rv(16b|8h|4s|2d)_POST")>;
+
+// ASIMD store instructions.
+def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST")>;
+def : InstRW<[WriteVST], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVST,
+ WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[M3WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[M3WriteVSTA,
+ WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST")>;
+def : InstRW<[M3WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M3WriteVSTA,
+ WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[M3WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[M3WriteVSTB,
+ WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST")>;
+def : InstRW<[M3WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M3WriteVSTB,
+ WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[M3WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[M3WriteVSTC,
+ WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST")>;
+def : InstRW<[M3WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M3WriteVSTC,
+ WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[M3WriteVSTD], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[M3WriteVSTD,
+ WriteAdr], (instregex "ST1i(8|16|32|64)_POST")>;
+
+def : InstRW<[M3WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[M3WriteVSTD,
+ WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST")>;
+def : InstRW<[M3WriteVSTE], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[M3WriteVSTE,
+ WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[M3WriteVSTD], (instregex "ST2i(8|16|32)$")>;
+def : InstRW<[M3WriteVSTD,
+ WriteAdr], (instregex "ST2i(8|16|32)_POST")>;
+def : InstRW<[M3WriteVSTD], (instregex "ST2i(64)$")>;
+def : InstRW<[M3WriteVSTD,
+ WriteAdr], (instregex "ST2i(64)_POST")>;
+
+def : InstRW<[M3WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[M3WriteVSTF,
+ WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST")>;
+def : InstRW<[M3WriteVSTG], (instregex "ST3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[M3WriteVSTG,
+ WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[M3WriteVSTH], (instregex "ST3i(8|16|32)$")>;
+def : InstRW<[M3WriteVSTH,
+ WriteAdr], (instregex "ST3i(8|16|32)_POST")>;
+def : InstRW<[M3WriteVSTF], (instregex "ST3i(64)$")>;
+def : InstRW<[M3WriteVSTF,
+ WriteAdr], (instregex "ST3i(64)_POST")>;
+
+def : InstRW<[M3WriteVSTF], (instregex "ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[M3WriteVSTF,
+ WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST")>;
+def : InstRW<[M3WriteVSTI], (instregex "ST4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[M3WriteVSTI,
+ WriteAdr], (instregex "ST4Fourv(16b|8h|4s|2d)_POST")>;
+
+def : InstRW<[M3WriteVSTF], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[M3WriteVSTF,
+ WriteAdr], (instregex "ST4i(8|16|32|64)_POST")>;
+
+// Cryptography instructions.
+def : InstRW<[M3WriteAES], (instregex "^AES[DE]")>;
+def : InstRW<[M3WriteAES,
+ M3ReadAES], (instregex "^AESI?MC")>;
+
+def : InstRW<[M3WriteNCRY3A], (instregex "^PMULL?v")>;
+
+def : InstRW<[M3WriteNCRY1A], (instregex "^SHA1([CHMP]|SU[01])")>;
+def : InstRW<[M3WriteNCRY1A], (instregex "^SHA256SU0")>;
+def : InstRW<[M3WriteNCRY5A], (instregex "^SHA256(H2?|SU1)")>;
+
+// CRC instructions.
+def : InstRW<[M3WriteC2], (instregex "^CRC32")>;
+
+} // SchedModel = ExynosM3Model
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td
new file mode 100644
index 000000000..84825458e
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td
@@ -0,0 +1,119 @@
+//==- AArch64SchedFalkor.td - Falkor Scheduling Definitions -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Qualcomm Falkor to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Define the SchedMachineModel and provide basic properties for coarse grained
+// instruction cost model.
+
+def FalkorModel : SchedMachineModel {
+ let IssueWidth = 8; // 8 uops are dispatched per cycle.
+ let MicroOpBufferSize = 128; // Out-of-order with temporary unified issue buffer.
+ let LoopMicroOpBufferSize = 16;
+ let LoadLatency = 3; // Optimistic load latency.
+ let MispredictPenalty = 11; // Minimum branch misprediction penalty.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
+
+ // FIXME: Remove when all errors have been fixed.
+ let FullInstRWOverlapCheck = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Falkor.
+
+let SchedModel = FalkorModel in {
+
+ def FalkorUnitB : ProcResource<1>; // Branch
+ def FalkorUnitLD : ProcResource<1>; // Load pipe
+ def FalkorUnitSD : ProcResource<1>; // Store data
+ def FalkorUnitST : ProcResource<1>; // Store pipe
+ def FalkorUnitX : ProcResource<1>; // Complex arithmetic
+ def FalkorUnitY : ProcResource<1>; // Simple arithmetic
+ def FalkorUnitZ : ProcResource<1>; // Simple arithmetic
+
+ def FalkorUnitVSD : ProcResource<1>; // Vector store data
+ def FalkorUnitVX : ProcResource<1>; // Vector X-pipe
+ def FalkorUnitVY : ProcResource<1>; // Vector Y-pipe
+
+ def FalkorUnitGTOV : ProcResource<1>; // Scalar to Vector
+ def FalkorUnitVTOG : ProcResource<1>; // Vector to Scalar
+
+ // Define the resource groups.
+ def FalkorUnitXY : ProcResGroup<[FalkorUnitX, FalkorUnitY]>;
+ def FalkorUnitXYZ : ProcResGroup<[FalkorUnitX, FalkorUnitY, FalkorUnitZ]>;
+ def FalkorUnitXYZB : ProcResGroup<[FalkorUnitX, FalkorUnitY, FalkorUnitZ,
+ FalkorUnitB]>;
+ def FalkorUnitZB : ProcResGroup<[FalkorUnitZ, FalkorUnitB]>;
+ def FalkorUnitVXVY : ProcResGroup<[FalkorUnitVX, FalkorUnitVY]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Map the target-defined scheduler read/write resources and latency for
+// Falkor.
+
+let SchedModel = FalkorModel in {
+
+// These WriteRes entries are not used in the Falkor sched model.
+def : WriteRes<WriteImm, []> { let Unsupported = 1; }
+def : WriteRes<WriteI, []> { let Unsupported = 1; }
+def : WriteRes<WriteISReg, []> { let Unsupported = 1; }
+def : WriteRes<WriteIEReg, []> { let Unsupported = 1; }
+def : WriteRes<WriteExtr, []> { let Unsupported = 1; }
+def : WriteRes<WriteIS, []> { let Unsupported = 1; }
+def : WriteRes<WriteID32, []> { let Unsupported = 1; }
+def : WriteRes<WriteID64, []> { let Unsupported = 1; }
+def : WriteRes<WriteIM32, []> { let Unsupported = 1; }
+def : WriteRes<WriteIM64, []> { let Unsupported = 1; }
+def : WriteRes<WriteBr, []> { let Unsupported = 1; }
+def : WriteRes<WriteBrReg, []> { let Unsupported = 1; }
+def : WriteRes<WriteLD, []> { let Unsupported = 1; }
+def : WriteRes<WriteST, []> { let Unsupported = 1; }
+def : WriteRes<WriteSTP, []> { let Unsupported = 1; }
+def : WriteRes<WriteAdr, []> { let Unsupported = 1; }
+def : WriteRes<WriteLDIdx, []> { let Unsupported = 1; }
+def : WriteRes<WriteSTIdx, []> { let Unsupported = 1; }
+def : WriteRes<WriteF, []> { let Unsupported = 1; }
+def : WriteRes<WriteFCmp, []> { let Unsupported = 1; }
+def : WriteRes<WriteFCvt, []> { let Unsupported = 1; }
+def : WriteRes<WriteFCopy, []> { let Unsupported = 1; }
+def : WriteRes<WriteFImm, []> { let Unsupported = 1; }
+def : WriteRes<WriteFMul, []> { let Unsupported = 1; }
+def : WriteRes<WriteFDiv, []> { let Unsupported = 1; }
+def : WriteRes<WriteV, []> { let Unsupported = 1; }
+def : WriteRes<WriteVLD, []> { let Unsupported = 1; }
+def : WriteRes<WriteVST, []> { let Unsupported = 1; }
+def : WriteRes<WriteSys, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Unsupported = 1; }
+def : WriteRes<WriteHint, []> { let Unsupported = 1; }
+def : WriteRes<WriteLDHi, []> { let Unsupported = 1; }
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// These ReadAdvance entries are not used in the Falkor sched model.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+// Detailed Refinements
+// -----------------------------------------------------------------------------
+include "AArch64SchedFalkorDetails.td"
+
+}
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td
new file mode 100644
index 000000000..ff14e639d
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td
@@ -0,0 +1,1292 @@
+//==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the uop and latency details for the machine model for the
+// Qualcomm Falkor subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+// Contains all of the Falkor specific SchedWriteRes types. The approach
+// below is to define a generic SchedWriteRes for every combination of
+// latency and microOps. The naming conventions is to use a prefix, one field
+// for latency, and one or more microOp count/type designators.
+// Prefix: FalkorWr
+// MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD)
+// Latency: #cyc
+//
+// e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued
+// down one Z pipe, six SD pipes, four VX pipes and the total latency is
+// six cycles.
+//
+// Contains all of the Falkor specific ReadAdvance types for forwarding logic.
+//
+// Contains all of the Falkor specific WriteVariant types for immediate zero
+// and LSLFast.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Define 0 micro-op types
+def FalkorWr_LdInc_none_2cyc : SchedWriteRes<[]> {
+ let Latency = 2;
+ let NumMicroOps = 0;
+}
+def FalkorWr_StInc_none_2cyc : SchedWriteRes<[]> {
+ let Latency = 2;
+ let NumMicroOps = 0;
+}
+def FalkorWr_none_3cyc : SchedWriteRes<[]> {
+ let Latency = 3;
+ let NumMicroOps = 0;
+}
+def FalkorWr_none_4cyc : SchedWriteRes<[]> {
+ let Latency = 4;
+ let NumMicroOps = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 1 micro-op types
+
+def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; }
+def FalkorWr_IMUL32_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
+def FalkorWr_IMUL64_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
+def FalkorWr_IMUL64_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; }
+def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; }
+def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; }
+def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; }
+def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; }
+def FalkorWr_1XYZ_0cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 0; }
+def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; }
+def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; }
+def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; }
+def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; }
+def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; }
+
+def FalkorWr_1VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 0; }
+def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; }
+def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; }
+def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; }
+def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
+def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
+def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
+def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
+def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
+def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
+
+def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; }
+def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; }
+def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; }
+
+def FalkorWr_1GTOV_0cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 0; }
+def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; }
+def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; }
+def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; }
+
+//===----------------------------------------------------------------------===//
+// Define 2 micro-op types
+
+def FalkorWr_2VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_VMUL32_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def FalkorWr_FMUL32_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def FalkorWr_FMUL64_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_1XYZ_1LD_4cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_12cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 12;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 14;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 21;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_2GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_2XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2, 8];
+}
+
+def FalkorWr_1X_1Z_11cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2, 11];
+}
+
+def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitVSD, FalkorUnitST]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 3 micro-op types
+
+def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
+ FalkorUnitLD]> {
+ let Latency = 0;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
+ FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitZ]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_1XYZ_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitSD, FalkorUnitST]> {
+ let Latency = 0;
+ let NumMicroOps = 3;
+}
+def FalkorWr_1XYZ_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitVSD, FalkorUnitST]> {
+ let Latency = 0;
+ let NumMicroOps = 3;
+}
+//===----------------------------------------------------------------------===//
+// Define 4 micro-op types
+
+def FalkorWr_2VX_2VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
+ FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 14;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_2VX_2VY_20cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
+ FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 20;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_2VX_2VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
+ FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 21;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_2VX_2VY_24cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
+ FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 24;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_4VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+def FalkorWr_4VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def FalkorWr_4VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+def FalkorWr_4VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_4LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST,
+ FalkorUnitSD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_2VSD_2ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 4;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 5 micro-op types
+
+def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+}
+def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+}
+def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+}
+def FalkorWr_1XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitST,
+ FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 5;
+}
+def FalkorWr_1VXVY_2ST_2VSD_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitST,
+ FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 5;
+}
+//===----------------------------------------------------------------------===//
+// Define 6 micro-op types
+
+def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+
+def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitXYZ,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 6;
+}
+
+def FalkorWr_2VXVY_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitVXVY,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 6;
+}
+
+def FalkorWr_3VSD_3ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 6;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 8 micro-op types
+
+def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 8;
+}
+
+def FalkorWr_4VSD_4ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 8;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 9 micro-op types
+
+def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitXYZ,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 9;
+}
+
+def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitXYZ,
+ FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 9;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 10 micro-op types
+
+def FalkorWr_2VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitVXVY,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 10;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 12 micro-op types
+
+def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitVXVY,
+ FalkorUnitST, FalkorUnitVSD,
+ FalkorUnitVXVY, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitVXVY,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 12;
+}
+
+// Forwarding logic is modeled for multiply add/accumulate and
+// load/store base register increment.
+// -----------------------------------------------------------------------------
+def FalkorReadIMA32 : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>;
+def FalkorReadIMA64 : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>;
+def FalkorReadVMA : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr_VMUL32_2VXVY_4cyc]>;
+def FalkorReadFMA32 : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>;
+def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>;
+
+def FalkorReadIncLd : SchedReadAdvance<1, [FalkorWr_LdInc_none_2cyc]>;
+def FalkorReadIncSt : SchedReadAdvance<1, [FalkorWr_StInc_none_2cyc]>;
+
+// SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast
+// -----------------------------------------------------------------------------
+def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).isImm() &&
+ MI->getOperand(1).getImm() == 0}]>;
+def FalkorOp1ZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR ||
+
+ MI->getOperand(1).getReg() == AArch64::XZR}]>;
+def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>;
+
+def FalkorWr_FMOV : SchedWriteVariant<[
+ SchedVar<FalkorOp1ZrReg, [FalkorWr_1none_0cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1GTOV_1cyc]>]>;
+
+def FalkorWr_MOVZ : SchedWriteVariant<[
+ SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZB_0cyc]>]>; // imm fwd
+
+
+def FalkorWr_ADDSUBsx : SchedWriteVariant<[
+ SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_1cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_2XYZ_2cyc]>]>;
+
+def FalkorWr_LDRro : SchedWriteVariant<[
+ SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_3cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_4cyc]>]>;
+
+def FalkorWr_LDRSro : SchedWriteVariant<[
+ SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_4cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_5cyc]>]>;
+
+def FalkorWr_ORRi : SchedWriteVariant<[
+ SchedVar<FalkorOp1ZrReg, [FalkorWr_1XYZ_0cyc]>, // imm fwd
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1cyc]>]>;
+
+def FalkorWr_PRFMro : SchedWriteVariant<[
+ SchedVar<FalkorShiftExtFastPred, [FalkorWr_1ST_3cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1ST_4cyc]>]>;
+
+def FalkorWr_STRVro : SchedWriteVariant<[
+ SchedVar<FalkorShiftExtFastPred, [FalkorWr_1VSD_1ST_0cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1VSD_1ST_0cyc]>]>;
+
+def FalkorWr_STRQro : SchedWriteVariant<[
+ SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_2ST_2VSD_0cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_2XYZ_2ST_2VSD_0cyc]>]>;
+
+def FalkorWr_STRro : SchedWriteVariant<[
+ SchedVar<FalkorShiftExtFastPred, [FalkorWr_1SD_1ST_0cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1SD_1ST_0cyc]>]>;
+
+//===----------------------------------------------------------------------===//
+// Specialize the coarse model by associating instruction groups with the
+// subtarget-defined types. As the modeled is refined, this will override most
+// of the earlier mappings.
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+// FIXME: This could be better modeled by looking at the regclasses of the operands.
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs COPY)>;
+
+// SIMD Floating-point Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)v2f32$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v2i32p)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(32|64)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(32|64|v2f32|v2i32)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64|v2i32)rz$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)v2f32$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?Vv4i32v$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)v2f32$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i32p|v2i64p|v2f32)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v2f32)$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTXNv1i64)>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)v2i32(_shift)?$")>;
+
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
+ (instregex "^(FMUL|FMULX)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
+ (instrs FMULX32)>;
+
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
+ (instregex "^(FMUL|FMULX)v1i64_indexed$")>;
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
+ (instrs FMULX64)>;
+
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v2i64p)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs FCVTLv4i16, FCVTLv2i32)>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)$")>;
+
+def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVv2f32)>;
+def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTv2f32)>;
+
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32)$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs FCVTLv8i16, FCVTLv4i32)>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32)(_shift)?$")>;
+
+def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
+ (instregex "^(FMUL|FMULX)(v2f64|v4f32|v4i32_indexed)$")>;
+
+def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
+ (instregex "^(FMUL|FMULX)v2i64_indexed$")>;
+
+def : InstRW<[FalkorWr_3VXVY_4cyc], (instrs FCVTNv4i16, FCVTNv2i32, FCVTXNv2f32)>;
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instrs FCVTNv8i16, FCVTNv4i32, FCVTXNv4f32)>;
+
+def : InstRW<[FalkorWr_2VX_2VY_14cyc],(instrs FDIVv2f64)>;
+def : InstRW<[FalkorWr_2VX_2VY_20cyc],(instrs FDIVv4f32)>;
+def : InstRW<[FalkorWr_2VX_2VY_21cyc],(instrs FSQRTv2f64)>;
+def : InstRW<[FalkorWr_2VX_2VY_24cyc],(instrs FSQRTv4f32)>;
+
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
+ (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
+ (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>;
+
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32],
+ (instregex "^FML(A|S)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64],
+ (instregex "^FML(A|S)v1i64_indexed$")>;
+def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32],
+ (instregex "^FML(A|S)(v4f32|v4i32_indexed)$")>;
+def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64],
+ (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>;
+
+// SIMD Integer Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^ADD(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs ADDPv2i64p)>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v8i8$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIC|ORR)(v2i32|v4i16)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^NEG(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHLv1i64$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHRd$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs PMULv8i8)>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHLd$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(ABD|ADALP)(v8i8|v4i16|v2i32)(_v.*)?$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)ADDLVv4i16v$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(s|h|b)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHRd$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^R?SHRN(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs ADDVv4i16v)>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQABS(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)ADDLVv8i8v$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs ADDVv8i8v)>;
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
+ (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>;
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
+ (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
+ (instregex "^SQDMULL(i16|i32)$")>;
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
+ (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>;
+
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs ADDVv4i32v)>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs ADDVv8i16v)>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(ADD|SUB)HNv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)ABA(v2i32|v4i16|v8i8)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_5cyc], (instrs ADDVv16i8v)>;
+
+def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32)_shift?$")>;
+def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^R(ADD|SUB)HNv.*$")>;
+
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^ADD(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs ADDPv2i64)>; // sz==11
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v16i8$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIC|ORR)(v8i16|v4i32)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(NEG|SUB)(v16i8|v8i16|v4i32|v2i64)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)ADDLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v16i8|v2i64|v4i32|v8i16)(_v.*)?$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHR(v16i8|v8i16|v4i32|v2i64)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SUBLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS)(v16i8|v2i64|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^ADDP(v4i32|v8i16|v16i8)$")>; // sz!=11
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL(v8i8|v16i8)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
+
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABD(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABDLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16|v4i32|v2i64)(_v.*)?$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^R?SHRN(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL(v1i64|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>;
+
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
+ (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
+ (instregex "^SQDMULLv.*$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
+ (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+def : InstRW<[FalkorWr_3VXVY_3cyc], (instregex "^(S|U)ADDLVv4i32v$")>;
+
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(S|U)ADDLVv8i16v$")>;
+
+def : InstRW<[FalkorWr_3VXVY_6cyc], (instregex "^(S|U)ADDLVv16i8v$")>;
+
+def : InstRW<[FalkorWr_4VXVY_2cyc], (instregex "^(S|U)(ADD|SUB)Wv.*$")>;
+
+def : InstRW<[FalkorWr_4VXVY_3cyc], (instregex "^(S|U)ABALv.*$")>;
+
+def : InstRW<[FalkorWr_4VXVY_4cyc], (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>;
+
+def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
+ (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
+ (instregex "^SQD(MLAL|MLSL)v[248].*$")>;
+
+// SIMD Load Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instrs LD2i64)>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instrs LD2i64_POST)>;
+
+def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd], (instregex "^LD1i(8|16|32)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd],
+ (instregex "^LD1i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
+ (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
+ (instregex "^LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
+ (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD3i64)>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
+ (instrs LD3i64_POST)>;
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD4i64)>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
+ (instrs LD4i64_POST)>;
+
+def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd], (instregex "^LD2i(8|16|32)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd],
+ (instregex "^LD2i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd],
+ (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd],
+ (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instrs LD3Threev2d)>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
+ (instrs LD3Threev2d_POST)>;
+def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd], (instregex "^LD3i(8|16|32)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd],
+ (instregex "^LD3i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd],
+ (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd],
+ (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instrs LD4Fourv2d)>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
+ (instrs LD4Fourv2d_POST)>;
+def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
+ (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd], (instregex "^LD4i(8|16|32)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd],
+ (instregex "^LD4i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd],
+ (instregex "^LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd],
+ (instregex "^LD3Threev(8b|4h|2s)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd],
+ (instregex "^LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd],
+ (instregex "^LD4Fourv(8b|4h|2s)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd],
+ (instregex "^LD3Threev(16b|8h|4s)$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd],
+ (instregex "^LD4Fourv(16b|8h|4s)$")>;
+
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, FalkorReadIncLd],
+ (instregex "^LD3Threev(16b|8h|4s)_POST$")>;
+
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, FalkorReadIncLd],
+ (instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
+
+// Arithmetic and Logical Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADC(S)?(W|X)r$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADD(S)?(W|X)r(r|i)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^AND(S)?(W|X)r(i|r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>;
+def : InstRW<[FalkorWr_ORRi], (instregex "^ORR(W|X)ri$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SBC(S)?(W|X)r$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SUB(S)?(W|X)r(r|i)$")>;
+def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>;
+def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>;
+
+// SIMD Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^CPY(i8|i16|i32|i64)$")>;
+def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs EXTv8i8)>;
+def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v2i32|v4i16|v8i8)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "(S|U)QXTU?Nv.*$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPXv1i32, FRECPXv1i64)>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs URECPEv2i32, URSQRTEv2i32)>;
+
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
+ (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>;
+
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
+ (instrs FRECPS64, FRSQRTS64)>;
+
+def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],
+ (instregex "^INSv(i32|i64)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v16i8$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>;
+def : InstRW<[FalkorWr_2VXVY_0cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>;
+
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs URECPEv4i32, URSQRTEv4i32)>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs TBLv8i8Two)>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^TBX(v8|v16)i8One$")>;
+
+def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
+ (instrs FRECPSv4f32, FRSQRTSv4f32)>;
+
+def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
+ (instrs FRECPSv2f64, FRSQRTSv2f64)>;
+
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBL(v8i8Three|v16i8Two)$")>;
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBX(v8i8Two|v16i8Two)$")>;
+
+def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBL(v8i8Four|v16i8Three)$")>;
+def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBX(v8i8Three|v16i8Three)$")>;
+
+def : InstRW<[FalkorWr_5VXVY_7cyc], (instrs TBLv16i8Four)>;
+def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>;
+
+// SIMD Store Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^STR(Q|D|S|H|B)ui$")>;
+def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^STR(Q|D|S|H|B)(post|pre)$")>;
+def : InstRW<[FalkorWr_STRVro, ReadDefault, FalkorReadIncSt],
+ (instregex "^STR(D|S|H|B)ro(W|X)$")>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instregex "^STPQi$")>;
+def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instregex "^STPQ(post|pre)$")>;
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instregex "^STP(D|S)(i)$")>;
+def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instregex "^STP(D|S)(post|pre)$")>;
+def : InstRW<[FalkorWr_STRQro, ReadDefault, FalkorReadIncSt],
+ (instregex "^STRQro(W|X)$")>;
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^STUR(Q|D|S|B|H)i$")>;
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instrs STNPDi, STNPSi)>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instrs STNPQi)>;
+
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
+def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST1(One(v8b|v4h|v2s|v1d)_POST|(i8|i16|i32|i64)_POST)$")>;
+def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))_POST$")>;
+
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST3(i8|i16|i32|i64)$")>;
+def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST4(i8|i16|i32|i64)$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST3(i8|i16|i32|i64)_POST$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST4(i8|i16|i32|i64)_POST$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST3Three(v8b|v4h|v2s)$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST3Three(v8b|v4h|v2s)_POST$")>;
+
+def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instrs ST3Threev2d)>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instrs ST3Threev2d_POST)>;
+
+def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST4Four(v8b|v4h|v2s)$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST4Four(v8b|v4h|v2s)_POST$")>;
+
+def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instrs ST4Fourv2d)>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instrs ST4Fourv2d_POST)>;
+
+def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST3Three(v16b|v8h|v4s)$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST3Three(v16b|v8h|v4s)_POST$")>;
+
+def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST4Four(v16b|v8h|v4s)$")>;
+// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
+def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^ST4Four(v16b|v8h|v4s)_POST$")>;
+
+// Branch Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1none_0cyc], (instrs B, TCRETURNdi)>;
+def : InstRW<[FalkorWr_1Z_0cyc], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>;
+def : InstRW<[FalkorWr_1Z_0cyc], (instrs RET_ReallyLR, TCRETURNri)>;
+def : InstRW<[FalkorWr_1ZB_0cyc], (instrs Bcc)>;
+def : InstRW<[FalkorWr_1XYZB_0cyc], (instrs BL)>;
+def : InstRW<[FalkorWr_1Z_1XY_0cyc], (instrs BLR)>;
+
+// Cryptography Extensions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs SHA1Hrr)>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs AESIMCrr, AESMCrr)>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs AESDrr, AESErr)>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>;
+def : InstRW<[FalkorWr_1VX_1VY_4cyc], (instregex "^SHA1(C|M|P)rrr$")>;
+def : InstRW<[FalkorWr_1VX_1VY_5cyc], (instrs SHA256H2rrr, SHA256Hrrr)>;
+def : InstRW<[FalkorWr_4VXVY_3cyc], (instrs SHA256SU1rrr)>;
+
+// FP Load Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LDUR(Q|D|S|H|B)i$")>;
+def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd],
+ (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
+ (instrs LDNPQi)>;
+def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
+ (instrs LDPQi)>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
+ (instregex "LDNP(D|S)i$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
+ (instregex "LDP(D|S)i$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
+ (instregex "LDP(D|S)(pre|post)$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
+ (instregex "^LDPQ(pre|post)$")>;
+
+// FP Data Processing Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(S|D)r(r|i)$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(S|D)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(S|D)rrr$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(32|64)p$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTSHr, FCVTDHr)>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(S|D)r$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(32|64)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTHSr, FCVTHDr)>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTSDr, FCVTDSr)>;
+
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
+ (instregex "^F(N)?MULSrr$")>;
+
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
+ (instregex "^F(N)?MULDrr$")>;
+
+def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVSrr)>;
+def : InstRW<[FalkorWr_1VX_1VY_14cyc],(instrs FDIVDrr)>;
+def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTSr)>;
+def : InstRW<[FalkorWr_1VX_1VY_21cyc],(instrs FSQRTDr)>;
+
+def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32],
+ (instregex "^F(N)?M(ADD|SUB)Srrr$")>;
+def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64],
+ (instregex "^F(N)?M(ADD|SUB)Drrr$")>;
+
+// FP Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(WS|XD|XDHigh)r$")>;
+def : InstRW<[FalkorWr_1GTOV_0cyc], (instregex "^FMOV(S|D)i$")>; // imm fwd
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(d|s)$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(SW|DX|DXHigh)r$")>;
+def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "^FMOV(Sr|Dr|v.*_ns)$")>; // imm fwd
+// FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr
+def : InstRW<[FalkorWr_2VXVY_0cyc], (instrs FMOVD0, FMOVS0)>; // imm fwd
+
+def : InstRW<[FalkorWr_1GTOV_4cyc], (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>;
+
+// Load Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>;
+def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFUMi)>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
+ (instregex "^LDNP(W|X)i$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
+ (instregex "^LDP(W|X)i$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
+ (instregex "^LDP(W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LDR(BB|HH|W|X)ui$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd],
+ (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LDR(W|X)l$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LDTR(B|H|W|X)i$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^LDUR(BB|HH|W|X)i$")>;
+def : InstRW<[FalkorWr_PRFMro], (instregex "^PRFMro(W|X)$")>;
+def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd],
+ (instrs LDPSWi)>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd],
+ (instregex "^LDPSW(post|pre)$")>;
+def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
+ (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
+def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4cyc, FalkorReadIncLd],
+ (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
+def : InstRW<[FalkorWr_LDRSro, FalkorReadIncLd],
+ (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
+def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
+ (instrs LDRSWl)>;
+def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
+ (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
+def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
+ (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
+
+// Miscellaneous Data-Processing Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(S|U)?BFM(W|X)ri$")>;
+def : InstRW<[FalkorWr_1X_2cyc], (instregex "^CRC32.*$")>;
+def : InstRW<[FalkorWr_1XYZ_2cyc], (instregex "^(CLS|CLZ|RBIT|REV|REV16|REV32)(W|X)r$")>;
+def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^EXTR(W|X)rri$")>;
+
+// Divide and Multiply Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
+ (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
+def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32],
+ (instregex "^M(ADD|SUB)Wrrr$")>;
+
+def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>;
+def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
+ (instregex "^M(ADD|SUB)Xrrr$")>;
+
+def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>;
+def : InstRW<[FalkorWr_1X_1Z_11cyc], (instregex "^(S|U)DIVXr$")>;
+
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
+ (instregex "^(S|U)MULLv.*$")>;
+def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
+ (instregex "^(S|U)(MLAL|MLSL)v.*$")>;
+
+// Move and Shift Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV)(W|X)r$")>;
+def : InstRW<[FalkorWr_1XYZ_0cyc], (instregex "^MOVK(W|X)i$")>; // imm fwd
+def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^ADRP?$")>; // imm fwd
+def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^MOVN(W|X)i$")>; // imm fwd
+def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>;
+def : InstRW<[FalkorWr_1XYZ_0cyc], (instrs MOVi32imm, MOVi64imm)>; // imm fwd (approximation)
+def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>],
+ (instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>;
+def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>],
+ (instrs LOADgot)>;
+
+// Other Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1LD_0cyc], (instrs CLREX, DMB, DSB)>;
+def : InstRW<[FalkorWr_1none_0cyc], (instrs BRK, DCPS1, DCPS2, DCPS3, HINT, HLT, HVC, ISB, SMC, SVC)>;
+def : InstRW<[FalkorWr_1ST_0cyc], (instrs SYSxt, SYSLxt)>;
+def : InstRW<[FalkorWr_1Z_0cyc], (instrs MSRpstateImm1, MSRpstateImm4)>;
+
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
+ (instregex "^(LDAR(B|H|W|X)|LDAXR(B|H|W|X)|LDXR(B|H|W|X))$")>;
+def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
+ (instregex "^(LDAXP(W|X)|LDXP(W|X))$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS, MOVbaseTLS)>;
+
+def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>;
+
+def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>;
+def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instrs STNPWi, STNPXi)>;
+def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>;
+
+def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>;
+def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^STLR(B|H|W|X)$")>;
+def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instregex "^STXP(W|X)$")>;
+def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instregex "^STXR(B|H|W|X)$")>;
+
+def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instregex "^STLXP(W|X)$")>;
+def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instregex "^STLXR(B|H|W|X)$")>;
+
+// Store Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instregex "^STP(W|X)i$")>;
+def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
+ (instregex "^STP(W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^STR(BB|HH|W|X)ui$")>;
+def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_STRro, ReadDefault, FalkorReadIncSt],
+ (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
+def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^STTR(B|H|W|X)i$")>;
+def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
+ (instregex "^STUR(BB|HH|W|X)i$")>;
+
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td
new file mode 100644
index 000000000..68de3e077
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td
@@ -0,0 +1,138 @@
+//==- AArch64SchedKryo.td - Qualcomm Kryo Scheduling Defs ---*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Qualcomm Kryo to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// The issue width is set to five, matching the five issue queues for expanded
+// uops. Now, the latency spreadsheet has information based on fragmented uops,
+// but these do not actually take up an issue queue.
+
+def KryoModel : SchedMachineModel {
+ let IssueWidth = 5; // 5-wide issue for expanded uops
+ let MicroOpBufferSize = 128; // Out-of-order with temporary unified issue buffer
+ let LoadLatency = 4; // Optimistic load latency
+ let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch
+
+ // Enable partial & runtime unrolling. The magic number is chosen based on
+ // experiments and benchmarking data.
+ let LoopMicroOpBufferSize = 16;
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
+
+ // FIXME: Remove when all errors have been fixed.
+ let FullInstRWOverlapCheck = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Kryo.
+
+let SchedModel = KryoModel in {
+ def KryoUnitXA : ProcResource<1>; // Type X(A) micro-ops
+ def KryoUnitXB : ProcResource<1>; // Type X(B) micro-ops
+ def KryoUnitYA : ProcResource<1>; // Type Y(A) micro-ops
+ def KryoUnitYB : ProcResource<1>; // Type Y(B) micro-ops
+ def KryoUnitX : ProcResGroup<[KryoUnitXA, // Type X micro-ops
+ KryoUnitXB]>;
+ def KryoUnitY : ProcResGroup<[KryoUnitYA, // Type Y micro-ops
+ KryoUnitYB]>;
+ def KryoUnitXY : ProcResGroup<[KryoUnitXA, // Type XY micro-ops
+ KryoUnitXB,
+ KryoUnitYA,
+ KryoUnitYB]>;
+ def KryoUnitLSA : ProcResource<1>; // Type LS(A) micro-ops
+ def KryoUnitLSB : ProcResource<1>; // Type LS(B) micro-ops
+ def KryoUnitLS : ProcResGroup<[KryoUnitLSA, // Type LS micro-ops
+ KryoUnitLSB]>;
+}
+
+let SchedModel = KryoModel in {
+
+//===----------------------------------------------------------------------===//
+// Map the target-defined scheduler read/write resources and latency for
+// Kryo.
+
+def : WriteRes<WriteImm, [KryoUnitXY]> { let Latency = 1; }
+def : WriteRes<WriteI, [KryoUnitXY]> { let Latency = 1; }
+def : WriteRes<WriteISReg, [KryoUnitXY, KryoUnitXY]>
+ { let Latency = 2; let NumMicroOps = 2; }
+def : WriteRes<WriteIEReg, [KryoUnitXY, KryoUnitXY]>
+ { let Latency = 2; let NumMicroOps = 2; }
+def : WriteRes<WriteExtr, [KryoUnitXY, KryoUnitX]>
+ { let Latency = 2; let NumMicroOps = 2; }
+def : WriteRes<WriteIS, [KryoUnitXY]> { let Latency = 2; }
+def : WriteRes<WriteID32, [KryoUnitXA, KryoUnitY]>
+ { let Latency = 8; let NumMicroOps = 1; } // Fragent -1
+def : WriteRes<WriteID64, [KryoUnitXA, KryoUnitY]>
+ { let Latency = 8; let NumMicroOps = 1; } // Fragent -1
+def : WriteRes<WriteIM32, [KryoUnitX]> { let Latency = 5; }
+def : WriteRes<WriteIM64, [KryoUnitX]> { let Latency = 5; }
+def : WriteRes<WriteBr, [KryoUnitXY]> { let Latency = 1; }
+def : WriteRes<WriteBrReg, [KryoUnitXY]> { let Latency = 1; }
+def : WriteRes<WriteLD, [KryoUnitLS]> { let Latency = 4; }
+def : WriteRes<WriteST, [KryoUnitLS]> { let Latency = 4; }
+def : WriteRes<WriteSTP, [KryoUnitLS]> { let Latency = 4; }
+def : WriteRes<WriteAdr, [KryoUnitXY]> { let Latency = 6; }
+def : WriteRes<WriteLDIdx, [KryoUnitLS]> { let Latency = 4; }
+def : WriteRes<WriteSTIdx, [KryoUnitLS]> { let Latency = 4; }
+def : WriteRes<WriteF, [KryoUnitXY, KryoUnitXY]>
+ { let Latency = 3; let NumMicroOps = 2; }
+def : WriteRes<WriteFCmp, [KryoUnitXY]> { let Latency = 2; }
+def : WriteRes<WriteFCvt, [KryoUnitX]> { let Latency = 4; }
+def : WriteRes<WriteFCopy, [KryoUnitXY]> { let Latency = 6; }
+def : WriteRes<WriteFImm, [KryoUnitXY]> { let Latency = 6; }
+def : WriteRes<WriteFMul, [KryoUnitX, KryoUnitX]>
+ { let Latency = 6; let NumMicroOps = 2; }
+def : WriteRes<WriteFDiv, [KryoUnitXA, KryoUnitY]>
+ { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
+def : WriteRes<WriteV, [KryoUnitXY]> { let Latency = 6; }
+def : WriteRes<WriteVLD, [KryoUnitLS]> { let Latency = 4; }
+def : WriteRes<WriteVST, [KryoUnitLS]> { let Latency = 4; }
+
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+def : WriteRes<WriteLDHi, []> { let Latency = 4; }
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// No forwarding logic is modelled yet.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+
+//===----------------------------------------------------------------------===//
+// Specialize the coarse model by associating instruction groups with the
+// subtarget-defined types. As the modeled is refined, this will override most
+// of the above SchedWriteRes and SchedAlias mappings.
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+
+// Detailed Refinedments
+// -----------------------------------------------------------------------------
+include "AArch64SchedKryoDetails.td"
+
+
+} // SchedModel = KryoModel
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td
new file mode 100644
index 000000000..cf4cdabb8
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td
@@ -0,0 +1,2378 @@
+//=- AArch64SchedKryoDetails.td - QC Kryo Scheduling Defs ----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the uop and latency details for the machine model for the
+// Qualcomm Kryo subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+def KryoWrite_3cyc_X_noRSV_138ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_X_noRSV_138ln],
+ (instregex "(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)")>;
+
+def KryoWrite_3cyc_X_X_139ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_X_X_139ln],
+ (instregex "(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift")>;
+
+def KryoWrite_4cyc_XY_XY_noRSV_172ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 4; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_4cyc_XY_XY_noRSV_172ln],
+ (instregex "(S|U)ABA(v8i8|v4i16|v2i32)")>;
+def KryoWrite_4cyc_XY_XY_XY_XY_178ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY, KryoUnitXY]> {
+ let Latency = 4; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_4cyc_XY_XY_XY_XY_178ln],
+ (instregex "(S|U)ABA(v16i8|v8i16|v4i32)")>;
+def KryoWrite_3cyc_XY_XY_XY_XY_177ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_3cyc_XY_XY_XY_XY_177ln],
+ (instregex "(S|U)ABALv.*")>;
+def KryoWrite_3cyc_XY_XY_166ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_XY_166ln],
+ (instregex "(S|U)(ABD|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)")>;
+def KryoWrite_3cyc_XY_noRSV_159ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_159ln],
+ (instregex "(S|U)(ABD|RHADD)(v8i8|v4i16|v2i32)")>;
+def KryoWrite_3cyc_XY_XY_165ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_XY_165ln],
+ (instregex "(S|U)ABDLv.*")>;
+def KryoWrite_3cyc_X_noRSV_154ln :
+ SchedWriteRes<[KryoUnitX]> {
+let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_X_noRSV_154ln],
+ (instregex "(S|U)ADALP(v8i8|v4i16|v2i32)_v.*")>;
+def KryoWrite_3cyc_X_X_155ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_X_X_155ln],
+ (instregex "(S|U)ADALP(v16i8|v8i16|v4i32)_v.*")>;
+def KryoWrite_2cyc_XY_XY_151ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_151ln],
+ (instregex "(S|U)(ADD|SUB)Lv.*")>;
+def KryoWrite_2cyc_XY_noRSV_148ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_148ln],
+ (instregex "((S|U)ADDLP|ABS)(v2i32|v4i16|v8i8)(_v.*)?")>;
+def KryoWrite_2cyc_XY_XY_150ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_150ln],
+ (instregex "((S|U)ADDLP|ABS)(v2i64|v4i32|v8i16|v16i8)(_v.*)?")>;
+def KryoWrite_3cyc_XY_XY_XY_noRSV_179ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_3cyc_XY_XY_XY_noRSV_179ln],
+ (instrs SADDLVv4i32v, UADDLVv4i32v)>;
+def KryoWrite_5cyc_XY_XY_XY_noRSV_180ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY]> {
+ let Latency = 5; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_5cyc_XY_XY_XY_noRSV_180ln],
+ (instrs SADDLVv8i16v, UADDLVv8i16v)>;
+def KryoWrite_6cyc_XY_XY_X_noRSV_181ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitX]> {
+ let Latency = 6; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_6cyc_XY_XY_X_noRSV_181ln],
+ (instrs SADDLVv16i8v, UADDLVv16i8v)>;
+def KryoWrite_3cyc_XY_noRSV_158ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_158ln],
+ (instrs SADDLVv4i16v, UADDLVv4i16v, ADDVv4i16v)>;
+def KryoWrite_4cyc_X_noRSV_169ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_noRSV_169ln],
+ (instrs SADDLVv8i8v, UADDLVv8i8v, ADDVv8i8v)>;
+def KryoWrite_2cyc_XY_XY_XY_XY_176ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_XY_XY_176ln],
+ (instregex "(S|U)(ADDW|SUBW)v.*")>;
+def KryoWrite_4cyc_X_noRSV_40ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_noRSV_40ln],
+ (instregex "(S|U)CVTFS(W|X)(D|S)ri")>;
+def KryoWrite_4cyc_X_noRSV_97ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_noRSV_97ln],
+ (instregex "(S|U)CVTFU(W|X)(D|S)ri")>;
+def KryoWrite_4cyc_X_noRSV_110ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_noRSV_110ln],
+ (instregex "(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>;
+def KryoWrite_4cyc_X_X_114ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_X_114ln],
+ (instregex "(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>;
+def KryoWrite_1cyc_XA_Y_98ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XA_Y_98ln],
+ (instregex "(S|U)DIV(_Int)?(W|X)r")>;
+def KryoWrite_2cyc_XY_XY_152ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_152ln],
+ (instregex "(S|U)H(ADD|SUB)(v16i8|v8i16|v4i32)")>;
+def KryoWrite_2cyc_XY_noRSV_149ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_149ln],
+ (instregex "((S|U)H(ADD|SUB)|ADDP)(v8i8|v4i16|v2i32)")>;
+def KryoWrite_4cyc_X_70ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_4cyc_X_70ln],
+ (instregex "(S|U)(MADDL|MSUBL)rrr")>;
+def KryoWrite_4cyc_X_X_191ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_X_191ln],
+ (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
+def KryoWrite_1cyc_XY_195ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_1cyc_XY_195ln],
+ (instregex "(S|U)MOVv.*")>;
+def KryoWrite_5cyc_X_71ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 5; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_5cyc_X_71ln],
+ (instrs SMULHrr, UMULHrr)>;
+def KryoWrite_3cyc_XY_noRSV_186ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_186ln],
+ (instregex "^(S|U)QADD(v8i8|v4i16|v2i32)")>;
+def KryoWrite_3cyc_XY_XY_187ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_XY_187ln],
+ (instregex "^(S|U)QADD(v16i8|v8i16|v4i32|v2i64)")>;
+def KryoWrite_3cyc_XY_noRSV_69ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_69ln],
+ (instregex "(S|U|SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64)")>;
+def KryoWrite_3cyc_XY_noRSV_248ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_248ln],
+ (instregex "(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>;
+def KryoWrite_3cyc_XY_XY_250ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_XY_250ln],
+ (instregex "(S|U)(QSHLU?|RSHR)(v16i8|v8i16|v4i32|v2i64)_shift$")>;
+def KryoWrite_3cyc_XY_noRSV_246ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_246ln],
+ (instregex "(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32)$")>;
+def KryoWrite_3cyc_XY_XY_251ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_XY_251ln],
+ (instregex "(S|U)(QSHL|RSHL|QRSHL)(v16i8|v8i16|v4i32|v2i64)$")>;
+def KryoWrite_6cyc_XY_X_238ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitX]> {
+ let Latency = 6; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_6cyc_XY_X_238ln],
+ (instregex "((S|U)QR?SHRN|SQR?SHRUN)(v16i8|v8i16|v4i32)_shift$")>;
+def KryoWrite_3cyc_XY_noRSV_249ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_249ln],
+ (instregex "((S|U)QR?SHRN|SQR?SHRUN)(s|h|b)?")>;
+def KryoWrite_6cyc_XY_X_noRSV_252ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitX]> {
+ let Latency = 6; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_6cyc_XY_X_noRSV_252ln],
+ (instregex "((S|U)QR?SHRN|SQR?SHRUN)(v8i8|v4i16|v2i32)_shift?")>;
+def KryoWrite_3cyc_XY_noRSV_161ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_161ln],
+ (instregex "(S|U)QSUB(v8i8|v4i16|v2i32|v1i64|v1i32|v1i16|v1i8)")>;
+def KryoWrite_3cyc_XY_noRSV_163ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_163ln],
+ (instregex "(S|U)QXTU?N(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)")>;
+def KryoWrite_3cyc_XY_noRSV_162ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_162ln],
+ (instregex "(S|U)QXTU?N(v1i8|v1i16|v1i32)")>;
+def KryoWrite_3cyc_XY_noRSV_247ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_247ln],
+ (instregex "(S|U)RSHR(d|(v8i8|v4i16|v2i32)_shift)$")>;
+def KryoWrite_2cyc_XY_noRSV_239ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_239ln],
+ (instregex "(S|U)SHL(d|v8i8|v4i16|v2i32|v1i64)$")>;
+def KryoWrite_2cyc_XY_XY_243ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_243ln],
+ (instregex "(S|U)SHL(v16i8|v8i16|v4i32|v2i64)$")>;
+def KryoWrite_2cyc_XY_XY_241ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_241ln],
+ (instregex "(S|U)?SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
+def KryoWrite_2cyc_XY_noRSV_240ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_240ln],
+ (instregex "((S|U)SHR|SHL)(d|(v8i8|v4i16|v2i32)_shift)$")>;
+def KryoWrite_2cyc_XY_XY_242ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_242ln],
+ (instregex "((S|U)SHR|SHL)(v16i8|v8i16|v4i32|v2i64)_shift$")>;
+def KryoWrite_2cyc_XY_XY_183ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_183ln],
+ (instregex "(S|U)(MAX|MIN)P?(v16i8|v8i16|v4i32)")>;
+def KryoWrite_2cyc_XY_noRSV_182ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_182ln],
+ (instregex "(S|U)(MAX|MIN)P?(v8i8|v4i16|v2i32)")>;
+def KryoWrite_3cyc_XY_noRSV_184ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_184ln],
+ (instregex "(S|U)(MAX|MIN)V(v4i16v|v8i8v|v4i32)")>;
+def KryoWrite_4cyc_X_noRSV_185ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_noRSV_185ln],
+ (instregex "(S|U)(MAX|MIN)V(v16i8v|v8i16v)")>;
+def KryoWrite_2cyc_XY_noRSV_67ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_67ln],
+ (instrs ABSv1i64)>;
+def KryoWrite_1cyc_XY_63ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_1cyc_XY_63ln, ReadI, ReadI],
+ (instregex "ADC.*")>;
+def KryoWrite_1cyc_XY_63_1ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_1cyc_XY_63_1ln],
+ (instregex "ADR.*")>;
+def KryoWrite_1cyc_XY_62ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_1cyc_XY_62ln, ReadI],
+ (instregex "ADDS?(W|X)ri")>;
+def KryoWrite_2cyc_XY_XY_64ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_64ln, ReadI, ReadI],
+ (instregex "ADDS?(W|X)r(r|s|x)(64)?")>;
+def KryoWrite_1cyc_XY_noRSV_65ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_noRSV_65ln],
+ (instrs ADDv1i64)>;
+def KryoWrite_1cyc_XY_noRSV_144ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_noRSV_144ln],
+ (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>;
+def KryoWrite_1cyc_XY_XY_146ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_XY_146ln],
+ (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>;
+def KryoWrite_4cyc_XY_X_noRSV_171ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_4cyc_XY_X_noRSV_171ln],
+ (instregex "(ADD|SUB)HNv.*")>;
+def KryoWrite_1cyc_XY_noRSV_66ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_noRSV_66ln],
+ (instrs ADDPv2i64p)>;
+def KryoWrite_2cyc_XY_XY_153ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_153ln],
+ (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>;
+def KryoWrite_3cyc_XY_XY_noRSV_170ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_XY_XY_noRSV_170ln],
+ (instrs ADDVv4i32v)>;
+def KryoWrite_4cyc_XY_XY_noRSV_173ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 4; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_4cyc_XY_XY_noRSV_173ln],
+ (instrs ADDVv8i16v)>;
+def KryoWrite_5cyc_XY_X_noRSV_174ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitX]> {
+ let Latency = 5; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_5cyc_XY_X_noRSV_174ln],
+ (instrs ADDVv16i8v)>;
+def KryoWrite_3cyc_XY_XY_X_X_27ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitX, KryoUnitX]> {
+ let Latency = 3; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_3cyc_XY_XY_X_X_27ln],
+ (instrs AESDrr, AESErr)>;
+def KryoWrite_2cyc_X_X_22ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_X_X_22ln],
+ (instrs AESIMCrr, AESMCrr)>;
+def KryoWrite_1cyc_XY_noRSV_76ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_noRSV_76ln],
+ (instregex "((AND|ORN|EOR|EON)S?(Wr[rsi]|v8i8|v4i16|v2i32)|(ORR|BIC)S?(Wr[rs]|v8i8|v4i16|v2i32))")>;
+def KryoWrite_1cyc_XY_XY_79ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_XY_79ln],
+ (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>;
+def KryoWrite_1cyc_X_72ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_1cyc_X_72ln],
+ (instregex "(S|U)?BFM.*")>;
+def KryoWrite_1cyc_XY_noRSV_77ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_noRSV_77ln],
+ (instregex "(BIC|ORR)S?Wri")>;
+def KryoWrite_1cyc_XY_XY_78ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_XY_78ln],
+ (instregex "(BIC|ORR)S?Xri")>;
+def KryoWrite_1cyc_X_noRSV_74ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_X_noRSV_74ln],
+ (instrs BIFv8i8, BITv8i8, BSLv8i8)>;
+def KryoWrite_1cyc_X_X_75ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_X_X_75ln],
+ (instrs BIFv16i8, BITv16i8, BSLv16i8)>;
+def KryoWrite_0cyc_noRSV_11ln :
+ SchedWriteRes<[]> {
+ let Latency = 0; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_0cyc_noRSV_11ln],
+ (instrs BRK, DCPS1, DCPS2, DCPS3, HLT, HVC, ISB, HINT, SMC, SVC)>;
+def KryoWrite_0cyc_XY_16ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 0; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_0cyc_XY_16ln, ReadI],
+ (instregex "(CCMN|CCMP)(W|X)i")>;
+def KryoWrite_0cyc_XY_16_1ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 0; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_0cyc_XY_16_1ln, ReadI, ReadI],
+ (instregex "(CCMN|CCMP)(W|X)r")>;
+def KryoWrite_2cyc_XY_3ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_2cyc_XY_3ln, ReadI],
+ (instregex "(CLS|CLZ)(W|X)r")>;
+def KryoWrite_2cyc_XY_noRSV_7ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_7ln],
+ (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>;
+def KryoWrite_2cyc_XY_XY_8ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_8ln],
+ (instregex "(CLS|CLZ|CNT)(v2i32|v4i16|v8i8)")>;
+def KryoWrite_2cyc_XY_noRSV_80ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_80ln],
+ (instregex "CM(EQ|GE|HS|GT|HI|TST)(v8i8|v4i16|v2i32|v1i64)$")>;
+def KryoWrite_2cyc_XY_XY_83ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_83ln],
+ (instregex "CM(EQ|GE|HS|GT|HI|TST)(v16i8|v8i16|v4i32|v2i64)$")>;
+def KryoWrite_2cyc_XY_noRSV_81ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_81ln],
+ (instregex "CM(EQ|LE|GE|GT|LT)(v8i8|v4i16|v2i32|v1i64)rz$")>;
+def KryoWrite_2cyc_XY_XY_82ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_82ln],
+ (instregex "CM(EQ|LE|GE|GT|LT)(v16i8|v8i16|v4i32|v2i64)rz$")>;
+def KryoWrite_3cyc_XY_4ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_3cyc_XY_4ln, ReadI, ReadISReg],
+ (instregex "CRC32.*")>;
+def KryoWrite_1cyc_XY_20ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_1cyc_XY_20ln, ReadI, ReadI],
+ (instregex "CSEL(W|X)r")>;
+def KryoWrite_1cyc_X_17ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_1cyc_X_17ln, ReadI, ReadI],
+ (instregex "(CSINC|CSNEG)(W|X)r")>;
+def KryoWrite_1cyc_XY_18ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_1cyc_XY_18ln, ReadI, ReadI],
+ (instregex "(CSINV)(W|X)r")>;
+def KryoWrite_3cyc_LS_X_13ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitX]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_X_13ln],
+ (instrs DRPS)>;
+def KryoWrite_0cyc_LS_10ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 0; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_0cyc_LS_10ln],
+ (instrs DSB, DMB, CLREX)>;
+def KryoWrite_1cyc_X_noRSV_196ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_X_noRSV_196ln],
+ (instregex "DUP(v8i8|v4i16|v2i32)(gpr|lane)")>;
+def KryoWrite_1cyc_X_X_197ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_X_X_197ln],
+ (instregex "DUP(v16i8|v8i16|v4i32|v2i64)(gpr|lane)")>;
+def KryoWrite_3cyc_LS_LS_X_15ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_X_15ln],
+ (instrs ERET)>;
+def KryoWrite_1cyc_X_noRSV_207ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_X_noRSV_207ln],
+ (instrs EXTv8i8)>;
+def KryoWrite_1cyc_X_X_212ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_X_X_212ln],
+ (instrs EXTv16i8)>;
+def KryoWrite_2cyc_XY_X_136ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitX]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_X_136ln],
+ (instrs EXTRWrri, EXTRXrri)>;
+def KryoWrite_2cyc_XY_noRSV_35ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_35ln],
+ (instregex "F(MAX|MIN)(NM)?P?(D|S)rr")>;
+def KryoWrite_2cyc_XY_XY_106ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_106ln],
+ (instregex "(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2i64p|v2f64|v4f32)")>;
+def KryoWrite_2cyc_XY_noRSV_104ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_104ln],
+ (instregex "(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f32|v2i32p)")>;
+def KryoWrite_3cyc_XY_noRSV_107ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_107ln],
+ (instregex "F(MAX|MIN)(NM)?Vv4i32v")>;
+def KryoWrite_3cyc_XY_noRSV_101ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_101ln],
+ (instregex "FABD(32|64|v2f32)")>;
+def KryoWrite_3cyc_XY_XY_103ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_XY_103ln],
+ (instregex "(FABD|FADD|FSUB|FADDP)(v4f32|v2f64)")>;
+def KryoWrite_1cyc_XY_noRSV_48ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_noRSV_48ln],
+ (instregex "F(ABS|NEG)(D|S)r")>;
+def KryoWrite_1cyc_XY_noRSV_124ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_noRSV_124ln],
+ (instregex "F(ABS|NEG)v2f32")>;
+def KryoWrite_1cyc_XY_XY_125ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_XY_125ln],
+ (instregex "F(ABS|NEG)(v2f64|v4f32)")>;
+def KryoWrite_2cyc_XY_noRSV_33ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_33ln],
+ (instregex "(FAC(GE|GT)|FCM(EQ|GE|GT))(32|64)")>;
+def KryoWrite_3cyc_XY_noRSV_30ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_30ln],
+ (instregex "(FADD|FSUB)(D|S)rr")>;
+def KryoWrite_3cyc_XY_noRSV_100ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_100ln],
+ (instregex "(FADD|FSUB|FADDP)v2f32")>;
+def KryoWrite_3cyc_XY_noRSV_29ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_29ln],
+ (instregex "FADDP(v2i32p|v2i64p)")>;
+def KryoWrite_0cyc_XY_31ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 0; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_0cyc_XY_31ln],
+ (instregex "FCCMPE?(D|S)rr")>;
+def KryoWrite_2cyc_XY_noRSV_34ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_34ln],
+ (instregex "FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64)rz")>;
+def KryoWrite_2cyc_XY_XY_36ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_36ln],
+ (instregex "FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz")>;
+def KryoWrite_2cyc_XY_noRSV_105ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_105ln],
+ (instregex "FCM(EQ|LE|GE|GT|LT)v2i32rz")>;
+def KryoWrite_0cyc_XY_32ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 0; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_0cyc_XY_32ln],
+ (instregex "FCMPE?(D|S)r(r|i)")>;
+def KryoWrite_1cyc_XY_noRSV_49ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_noRSV_49ln],
+ (instrs FCSELDrrr, FCSELSrrr)>;
+def KryoWrite_4cyc_X_noRSV_41ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_noRSV_41ln],
+ (instrs FCVTDHr, FCVTDSr, FCVTHDr, FCVTHSr, FCVTSDr, FCVTSHr)>;
+def KryoWrite_4cyc_X_38ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_4cyc_X_38ln],
+ (instregex "FCVT(((A|N|M|P)(S|U)(S|U)|Z(S|U)_Int(S|U))(W|X)(D|S)ri?|Z(S|U)(d|s))$")>;
+def KryoWrite_4cyc_X_noRSV_113ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_noRSV_113ln],
+ (instregex "FCVT((A|N|M|P)(S|U)|Z(S|U)_Int)(v1i32|v1i64|v2f32)$")>;
+def KryoWrite_4cyc_X_X_117ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_X_117ln],
+ (instregex "FCVT((A|N|M|P)(S|U)|Z(S|U)_Int)(v4f32|v2f64)$")>;
+def KryoWrite_5cyc_X_X_XY_noRSV_119ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitXY]> {
+ let Latency = 5; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_5cyc_X_X_XY_noRSV_119ln],
+ (instregex "FCVTX?N(v2f32|v4f32|v2i32|v4i16|v4i32|v8i16)$")>;
+def KryoWrite_4cyc_X_X_116ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_X_116ln],
+ (instregex "FCVTL(v2i32|v4i16|v4i32|v8i16)$")>;
+def KryoWrite_4cyc_X_noRSV_112ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_noRSV_112ln],
+ (instrs FCVTXNv1i64)>;
+def KryoWrite_4cyc_X_37ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_4cyc_X_37ln],
+ (instregex "FCVTZ(S|U)(S|U)(W|X)(D|S)ri?$")>;
+def KryoWrite_4cyc_X_noRSV_111ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_noRSV_111ln],
+ (instregex "FCVTZ(S|U)(v2f32|v1i32|v1i64|v2i32(_shift)?)$")>;
+def KryoWrite_4cyc_X_X_115ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_X_115ln],
+ (instregex "FCVTZ(S|U)(v2f64|v4f32|(v2i64|v4i32)(_shift)?)$")>;
+def KryoWrite_10cyc_XA_Y_noRSV_43ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
+ let Latency = 10; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_10cyc_XA_Y_noRSV_43ln],
+ (instrs FDIVSrr)>;
+def KryoWrite_14cyc_XA_Y_noRSV_43ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
+ let Latency = 14; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_14cyc_XA_Y_noRSV_43ln],
+ (instrs FDIVDrr)>;
+def KryoWrite_10cyc_XA_Y_noRSV_121ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
+ let Latency = 10; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_10cyc_XA_Y_noRSV_121ln],
+ (instrs FDIVv2f32)>;
+def KryoWrite_14cyc_XA_Y_XA_Y_123ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> {
+ let Latency = 14; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_14cyc_XA_Y_XA_Y_123ln],
+ (instrs FDIVv2f64, FDIVv4f32)>;
+def KryoWrite_5cyc_X_noRSV_55ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 5; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_5cyc_X_noRSV_55ln],
+ (instregex "FN?M(ADD|SUB)Srrr")>;
+def KryoWrite_6cyc_X_noRSV_57ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 6; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_6cyc_X_noRSV_57ln],
+ (instregex "FN?M(ADD|SUB)Drrr")>;
+def KryoWrite_5cyc_X_noRSV_51ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 5; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_5cyc_X_noRSV_51ln],
+ (instrs FMLAv2f32, FMLSv2f32, FMLAv1i32_indexed, FMLSv1i32_indexed)>;
+def KryoWrite_5cyc_X_X_56ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 5; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_5cyc_X_X_56ln],
+ (instrs FMLAv4f32, FMLSv4f32)>;
+def KryoWrite_6cyc_X_X_61ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 6; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_6cyc_X_X_61ln],
+ (instrs FMLAv2f64, FMLSv2f64)>;
+def KryoWrite_5cyc_X_noRSV_128ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 5; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_5cyc_X_noRSV_128ln],
+ (instrs FMLAv2i32_indexed, FMLSv2i32_indexed)>;
+def KryoWrite_5cyc_X_X_131ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 5; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_5cyc_X_X_131ln],
+ (instrs FMLAv4i32_indexed, FMLSv4i32_indexed)>;
+def KryoWrite_6cyc_X_X_134ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 6; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_6cyc_X_X_134ln],
+ (instrs FMLAv2i64_indexed, FMLSv2i64_indexed)>;
+def KryoWrite_6cyc_X_noRSV_60ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 6; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_6cyc_X_noRSV_60ln],
+ (instrs FMLAv1i64_indexed, FMLSv1i64_indexed, FMULv1i64_indexed, FMULXv1i64_indexed)>;
+def KryoWrite_1cyc_XY_45ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_1cyc_XY_45ln],
+ (instregex "FMOV(XDHigh|DXHigh|DX)r")>;
+def KryoWrite_1cyc_XY_noRSV_47ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_noRSV_47ln],
+ (instregex "FMOV(Di|Dr|Si|Sr|SWr|WSr|XDr|v.*_ns)")>;
+def KryoWrite_5cyc_X_noRSV_53ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 5; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_5cyc_X_noRSV_53ln],
+ (instrs FMULv1i32_indexed, FMULXv1i32_indexed)>;
+def KryoWrite_5cyc_X_noRSV_127ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 5; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_5cyc_X_noRSV_127ln],
+ (instrs FMULv2f32, FMULXv2f32, FMULv2i32_indexed, FMULXv2i32_indexed)>;
+def KryoWrite_5cyc_X_X_130ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 5; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_5cyc_X_X_130ln],
+ (instrs FMULv4f32, FMULXv4f32, FMULv4i32_indexed, FMULXv4i32_indexed)>;
+def KryoWrite_6cyc_X_X_133ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 6; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_6cyc_X_X_133ln],
+ (instrs FMULv2f64, FMULXv2f64, FMULv2i64_indexed, FMULXv2i64_indexed)>;
+def KryoWrite_5cyc_X_noRSV_54ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 5; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_5cyc_X_noRSV_54ln],
+ (instrs FMULSrr, FNMULSrr, FMULX32)>;
+def KryoWrite_6cyc_X_noRSV_59ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 6; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_6cyc_X_noRSV_59ln],
+ (instrs FMULDrr, FNMULDrr, FMULX64)>;
+def KryoWrite_3cyc_XY_noRSV_28ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_28ln],
+ (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64 )>;
+def KryoWrite_3cyc_XY_noRSV_99ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_99ln],
+ (instrs FRECPEv2f32, FRSQRTEv2f32)>;
+def KryoWrite_3cyc_XY_XY_102ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_XY_102ln],
+ (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>;
+def KryoWrite_5cyc_X_noRSV_52ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 5; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_5cyc_X_noRSV_52ln],
+ (instrs FRECPS32, FRSQRTS32)>;
+def KryoWrite_6cyc_X_noRSV_58ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 6; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_6cyc_X_noRSV_58ln],
+ (instrs FRECPS64, FRSQRTS64)>;
+def KryoWrite_5cyc_X_noRSV_126ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 5; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_5cyc_X_noRSV_126ln],
+ (instrs FRECPSv2f32, FRSQRTSv2f32)>;
+def KryoWrite_5cyc_X_X_129ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 5; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_5cyc_X_X_129ln],
+ (instrs FRECPSv4f32, FRSQRTSv4f32)>;
+def KryoWrite_6cyc_X_X_132ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 6; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_6cyc_X_X_132ln],
+ (instrs FRECPSv2f64, FRSQRTSv2f64)>;
+def KryoWrite_3cyc_XY_noRSV_50ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_50ln],
+ (instrs FRECPXv1i32, FRECPXv1i64)>;
+def KryoWrite_2cyc_XY_noRSV_39ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_39ln],
+ (instregex "FRINT(A|I|M|N|P|X|Z)(S|D)r")>;
+def KryoWrite_2cyc_XY_noRSV_108ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_108ln],
+ (instregex "FRINT(A|I|M|N|P|X|Z)v2f32")>;
+def KryoWrite_2cyc_XY_XY_109ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_109ln],
+ (instregex "FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)")>;
+def KryoWrite_12cyc_XA_Y_noRSV_42ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
+ let Latency = 12; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_12cyc_XA_Y_noRSV_42ln],
+ (instrs FSQRTSr)>;
+def KryoWrite_21cyc_XA_Y_noRSV_42ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
+ let Latency = 21; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_21cyc_XA_Y_noRSV_42ln],
+ (instrs FSQRTDr)>;
+def KryoWrite_12cyc_XA_Y_noRSV_120ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
+ let Latency = 12; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_12cyc_XA_Y_noRSV_120ln],
+ (instrs FSQRTv2f32)>;
+def KryoWrite_21cyc_XA_Y_XA_Y_122ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> {
+ let Latency = 21; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_21cyc_XA_Y_XA_Y_122ln],
+ (instrs FSQRTv4f32)>;
+def KryoWrite_36cyc_XA_Y_XA_Y_122ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> {
+ let Latency = 36; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_36cyc_XA_Y_XA_Y_122ln],
+ (instrs FSQRTv2f64)>;
+def KryoWrite_1cyc_X_201ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_1cyc_X_201ln],
+ (instregex "INSv.*")>;
+def KryoWrite_3cyc_LS_255ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_3cyc_LS_255ln],
+ (instregex "LD1(One(v16b|v8h|v4s|v2d)|i64)$")>;
+def KryoWrite_4cyc_LS_X_270ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_LS_X_270ln],
+ (instregex "LD1(i8|i16|i32)$")>;
+def KryoWrite_3cyc_LS_noRSV_285ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_noRSV_285ln],
+ (instregex "LD1One(v8b|v4h|v2s|v1d)$")>;
+def KryoWrite_3cyc_LS_XY_289ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_289ln, WriteAdr],
+ (instregex "LD1(One(v16b|v8h|v4s|v2d)|i64)_POST$")>;
+def KryoWrite_4cyc_LS_XY_X_298ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_4cyc_LS_XY_X_298ln, WriteAdr],
+ (instregex "LD1(i8|i16|i32)_POST$")>;
+def KryoWrite_3cyc_LS_LS_LS_308ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_LS_308ln],
+ (instregex "LD1Three(v16b|v8h|v4s|v2d)$")>;
+def KryoWrite_3cyc_LS_XY_noRSV_317ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_317ln, WriteAdr],
+ (instregex "LD1One(v8b|v4h|v2s|v1d)_POST$")>;
+def KryoWrite_3cyc_LS_LS_LS_LS_328ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_LS_LS_328ln, WriteAdr],
+ (instregex "LD1Four(v16b|v8h|v4s|v2d)_POST$")>;
+def KryoWrite_3cyc_LS_XY_LS_LS_332ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_332ln, WriteAdr],
+ (instregex "LD1Three(v16b|v8h|v4s|v2d)_POST$")>;
+def KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_348ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 5;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_348ln],
+ (instregex "LD1Three(v8b|v4h|v2s|v1d)$")>;
+def KryoWrite_3cyc_LS_XY_LS_LS_LS_351ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 5;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_LS_351ln],
+ (instregex "LD1Four(v16b|v8h|v4s|v2d)$")>;
+def KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_noRSV_358ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 6;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_noRSV_358ln],
+ (instregex "LD1Four(v8b|v4h|v2s|v1d)$")>;
+def KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_360ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 6;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_360ln, WriteAdr],
+ (instregex "LD1Three(v8b|v4h|v2s|v1d)_POST$")>;
+def KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_noRSV_368ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 7;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_noRSV_368ln, WriteAdr],
+ (instregex "LD1Four(v8b|v4h|v2s|v1d)_POST$")>;
+def KryoWrite_3cyc_LS_LS_281ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_281ln],
+ (instregex "LD(1|2)Two(v16b|v8h|v4s|v2d)$")>;
+def KryoWrite_3cyc_LS_noRSV_noRSV_311ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_LS_noRSV_noRSV_311ln],
+ (instregex "LD(1|2)Two(v8b|v4h|v2s|v1d)$")>;
+def KryoWrite_3cyc_LS_XY_LS_313ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_LS_313ln, WriteAdr],
+ (instregex "LD(1|2)Two(v16b|v8h|v4s|v2d)_POST$")>;
+def KryoWrite_3cyc_LS_XY_noRSV_noRSV_334ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_noRSV_334ln, WriteAdr],
+ (instregex "LD(1|2)Two(v8b|v4h|v2s|v1d)_POST$")>;
+def KryoWrite_3cyc_LS_256ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_3cyc_LS_256ln],
+ (instregex "LD1R(v16b|v8h|v4s|v2d)$")>;
+def KryoWrite_3cyc_LS_noRSV_286ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_noRSV_286ln],
+ (instregex "LD1R(v8b|v4h|v2s|v1d)$")>;
+def KryoWrite_3cyc_LS_XY_290ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_290ln, WriteAdr],
+ (instregex "LD1R(v16b|v8h|v4s|v2d)_POST$")>;
+def KryoWrite_3cyc_LS_XY_noRSV_318ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_318ln, WriteAdr],
+ (instregex "LD1R(v8b|v4h|v2s|v1d)_POST$")>;
+def KryoWrite_3cyc_LS_257ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_3cyc_LS_257ln],
+ (instregex "LD2i64$")>;
+def KryoWrite_3cyc_LS_XY_291ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_291ln, WriteAdr],
+ (instregex "LD2i64_POST$")>;
+def KryoWrite_4cyc_LS_X_X_296ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_4cyc_LS_X_X_296ln],
+ (instregex "LD2(i8|i16|i32)$")>;
+def KryoWrite_4cyc_LS_XY_X_X_321ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_4cyc_LS_XY_X_X_321ln, WriteAdr],
+ (instregex "LD2(i8|i16|i32)_POST$")>;
+def KryoWrite_3cyc_LS_LS_282ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_282ln],
+ (instregex "LD2R(v16b|v8h|v4s|v2d)$")>;
+def KryoWrite_3cyc_LS_noRSV_noRSV_312ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_LS_noRSV_noRSV_312ln],
+ (instregex "LD2R(v8b|v4h|v2s|v1d)$")>;
+def KryoWrite_3cyc_LS_XY_LS_314ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_LS_314ln, WriteAdr],
+ (instregex "LD2R(v16b|v8h|v4s|v2d)_POST$")>;
+def KryoWrite_3cyc_LS_XY_noRSV_noRSV_335ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_noRSV_335ln, WriteAdr],
+ (instregex "LD2R(v8b|v4h|v2s|v1d)_POST$")>;
+def KryoWrite_3cyc_LS_LS_283ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_283ln],
+ (instregex "LD3i64$")>;
+def KryoWrite_3cyc_LS_LS_LS_309ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_LS_309ln],
+ (instregex "LD3Threev2d$")>;
+def KryoWrite_3cyc_LS_XY_LS_315ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_LS_315ln, WriteAdr],
+ (instregex "LD3i64_POST$")>;
+def KryoWrite_4cyc_LS_X_X_X_320ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_4cyc_LS_X_X_X_320ln],
+ (instregex "LD3(i8|i16|i32)$")>;
+def KryoWrite_3cyc_LS_XY_LS_LS_331ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_331ln, WriteAdr],
+ (instregex "LD3Threev2d_POST$")>;
+def KryoWrite_4cyc_LS_XY_X_X_X_338ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitX, KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 5;
+}
+def : InstRW<[KryoWrite_4cyc_LS_XY_X_X_X_338ln, WriteAdr],
+ (instregex "LD3(i8|i16|i32)_POST$")>;
+def KryoWrite_4cyc_LS_LS_X_X_X_noRSV_noRSV_noRSV_373ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 8;
+}
+def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_noRSV_noRSV_noRSV_373ln],
+ (instregex "LD3Three(v8b|v4h|v2s)$")>;
+def KryoWrite_4cyc_LS_XY_LS_X_X_X_noRSV_noRSV_noRSV_380ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitX, KryoUnitX,
+ KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 9;
+}
+def : InstRW<[KryoWrite_4cyc_LS_XY_LS_X_X_X_noRSV_noRSV_noRSV_380ln, WriteAdr],
+ (instregex "LD3Three(v8b|v4h|v2s)_POST$")>;
+def KryoWrite_4cyc_LS_LS_X_X_X_LS_LS_X_X_X_381ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 10;
+}
+def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_LS_LS_X_X_X_381ln],
+ (instregex "LD3Three(v16b|v8h|v4s)$")>;
+def KryoWrite_4cyc_LS_LS_X_X_X_LS_XY_LS_X_X_X_383ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitX, KryoUnitX,
+ KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 11;
+}
+def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_LS_XY_LS_X_X_X_383ln, WriteAdr],
+ (instregex "LD3Three(v16b|v8h|v4s)_POST$")>;
+def KryoWrite_3cyc_LS_LS_LS_310ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_LS_310ln],
+ (instregex "LD3R(v16b|v8h|v4s|v2d)$")>;
+def KryoWrite_3cyc_LS_XY_LS_LS_333ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_333ln, WriteAdr],
+ (instregex "LD3R(v16b|v8h|v4s|v2d)_POST$")>;
+def KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_349ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 5;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_349ln],
+ (instregex "LD3R(v8b|v4h|v2s|v1d)$")>;
+def KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_361ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 6;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_361ln, WriteAdr],
+ (instregex "LD3R(v8b|v4h|v2s|v1d)_POST$")>;
+def KryoWrite_3cyc_LS_LS_284ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_284ln],
+ (instregex "LD4i64$")>;
+def KryoWrite_3cyc_LS_XY_LS_316ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_LS_316ln, WriteAdr],
+ (instregex "LD4i64_POST$")>;
+def KryoWrite_3cyc_LS_LS_LS_LS_329ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_LS_LS_329ln],
+ (instregex "LD4Four(v2d)$")>;
+def KryoWrite_4cyc_LS_X_X_X_X_337ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 5;
+}
+def : InstRW<[KryoWrite_4cyc_LS_X_X_X_X_337ln],
+ (instregex "LD4(i8|i16|i32)$")>;
+def KryoWrite_3cyc_LS_XY_LS_LS_LS_350ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 5;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_LS_350ln, WriteAdr],
+ (instregex "LD4Four(v2d)_POST$")>;
+def KryoWrite_4cyc_LS_XY_X_X_X_X_355ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 6;
+}
+def : InstRW<[KryoWrite_4cyc_LS_XY_X_X_X_X_355ln, WriteAdr],
+ (instregex "LD4(i8|i16|i32)_POST$")>;
+def KryoWrite_4cyc_LS_LS_X_X_X_X_noRSV_noRSV_noRSV_noRSV_382ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 10;
+}
+def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_X_noRSV_noRSV_noRSV_noRSV_382ln],
+ (instregex "LD4Four(v8b|v4h|v2s)$")>;
+def KryoWrite_4cyc_LS_XY_LS_X_X_X_X_noRSV_noRSV_noRSV_noRSV_384ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitX, KryoUnitX,
+ KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 11;
+}
+def : InstRW<[KryoWrite_4cyc_LS_XY_LS_X_X_X_X_noRSV_noRSV_noRSV_noRSV_384ln, WriteAdr],
+ (instregex "LD4Four(v8b|v4h|v2s)_POST$")>;
+def KryoWrite_4cyc_LS_LS_X_X_X_X_LS_LS_X_X_X_X_386ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitX, KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX,
+ KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 12;
+}
+def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_X_LS_LS_X_X_X_X_386ln],
+ (instregex "LD4Four(v16b|v8h|v4s)$")>;
+def KryoWrite_4cyc_LS_LS_X_X_X_X_LS_XY_LS_X_X_X_X_389ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitX, KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitX,
+ KryoUnitX, KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 13;
+}
+def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_X_LS_XY_LS_X_X_X_X_389ln, WriteAdr],
+ (instregex "LD4Four(v16b|v8h|v4s)_POST$")>;
+def KryoWrite_3cyc_LS_LS_LS_LS_330ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_LS_LS_330ln],
+ (instregex "LD4R(v16b|v8h|v4s|v2d)$")>;
+def KryoWrite_3cyc_LS_XY_LS_LS_LS_352ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 5;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_LS_352ln, WriteAdr],
+ (instregex "LD4R(v16b|v8h|v4s|v2d)_POST$")>;
+def KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_noRSV_359ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 6;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_noRSV_359ln],
+ (instregex "LD4R(v8b|v4h|v2s|v1d)$")>;
+def KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_noRSV_369ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 7;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_noRSV_369ln, WriteAdr],
+ (instregex "LD4R(v8b|v4h|v2s|v1d)_POST$")>;
+def KryoWrite_3cyc_LS_LS_400ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_400ln],
+ (instregex "LDAX?R(B|H|W|X)")>;
+def : InstRW<[KryoWrite_3cyc_LS_LS_400ln, WriteLDHi],
+ (instregex "LDAXP(W|X)")>;
+def KryoWrite_3cyc_LS_LS_401ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_401ln, WriteLDHi],
+ (instrs LDNPQi)>;
+def KryoWrite_3cyc_LS_noRSV_noRSV_408ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_LS_noRSV_noRSV_408ln, WriteLDHi],
+ (instrs LDNPDi, LDNPSi)>;
+def KryoWrite_3cyc_LS_394ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_3cyc_LS_394ln, WriteLDHi],
+ (instrs LDNPWi, LDNPXi)>;
+def KryoWrite_3cyc_LS_LS_402ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_402ln, WriteLDHi],
+ (instrs LDPQi)>;
+def KryoWrite_3cyc_LS_noRSV_noRSV_409ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_LS_noRSV_noRSV_409ln, WriteLDHi],
+ (instrs LDPDi, LDPSi)>;
+def KryoWrite_3cyc_LS_XY_LS_410ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_LS_410ln, WriteLDHi, WriteAdr],
+ (instregex "LDPQ(post|pre)")>;
+def KryoWrite_3cyc_LS_XY_noRSV_noRSV_411ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_noRSV_411ln, WriteLDHi, WriteAdr],
+ (instregex "LDP(D|S)(post|pre)")>;
+def KryoWrite_3cyc_LS_393ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_3cyc_LS_393ln, WriteLDHi],
+ (instrs LDPWi, LDPXi)>;
+def KryoWrite_3cyc_LS_XY_403ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_403ln, WriteLDHi, WriteAdr],
+ (instregex "LDP(W|X)(post|pre)")>;
+def KryoWrite_4cyc_LS_395ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 4; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_4cyc_LS_395ln, WriteLDHi],
+ (instrs LDPSWi)>;
+def KryoWrite_4cyc_LS_XY_405ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_LS_XY_405ln, WriteLDHi, WriteAdr],
+ (instrs LDPSWpost, LDPSWpre)>;
+def KryoWrite_3cyc_LS_264ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_3cyc_LS_264ln],
+ (instrs LDRQui, LDRQl)>;
+def KryoWrite_4cyc_X_LS_271ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitLS]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_LS_271ln],
+ (instrs LDRQroW, LDRQroX)>;
+def KryoWrite_3cyc_LS_noRSV_287ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_noRSV_287ln],
+ (instregex "LDR((D|S)l|(D|S|H|B)ui)")>;
+def KryoWrite_3cyc_LS_XY_293ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_293ln, WriteAdr],
+ (instrs LDRQpost, LDRQpre)>;
+def KryoWrite_4cyc_X_LS_noRSV_297ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitLS]> {
+ let Latency = 4; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_4cyc_X_LS_noRSV_297ln],
+ (instregex "LDR(D|S|H|B)ro(W|X)")>;
+def KryoWrite_3cyc_LS_XY_noRSV_319ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_319ln, WriteAdr],
+ (instregex "LDR(D|S|H|B)(post|pre)")>;
+def KryoWrite_3cyc_LS_261ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_3cyc_LS_261ln],
+ (instregex "LDR(BB|HH|W|X)ui")>;
+def KryoWrite_3cyc_LS_XY_292ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_XY_292ln, WriteAdr],
+ (instregex "LDR(BB|HH|W|X)(post|pre)")>;
+def KryoWrite_4cyc_X_LS_272ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitLS]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_LS_272ln],
+ (instregex "(LDR(BB|HH|W|X)ro(W|X)|PRFMro(W|X))")>;
+def KryoWrite_3cyc_LS_262ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_3cyc_LS_262ln],
+ (instrs LDRWl, LDRXl)>;
+def KryoWrite_4cyc_LS_268ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 4; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_4cyc_LS_268ln],
+ (instregex "LDRS(BW|BX|HW|HX|W)ui")>;
+def KryoWrite_5cyc_X_LS_273ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitLS]> {
+ let Latency = 5; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_5cyc_X_LS_273ln],
+ (instregex "LDRS(BW|BX|HW|HX|W)ro(W|X)")>;
+def KryoWrite_4cyc_LS_XY_294ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitXY]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_LS_XY_294ln, WriteAdr],
+ (instregex "LDRS(BW|BX|HW|HX|W)(post|pre)")>;
+def KryoWrite_4cyc_LS_269ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 4; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_4cyc_LS_269ln],
+ (instrs LDRSWl)>;
+def KryoWrite_3cyc_LS_260ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_3cyc_LS_260ln],
+ (instregex "LDTR(B|H|W|X)i")>;
+def KryoWrite_4cyc_LS_267ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 4; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_4cyc_LS_267ln],
+ (instregex "LDTRS(BW|BX|HW|HX|W)i")>;
+def KryoWrite_3cyc_LS_263ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_3cyc_LS_263ln],
+ (instrs LDURQi)>;
+def KryoWrite_3cyc_LS_noRSV_288ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_noRSV_288ln],
+ (instregex "LDUR(D|S|H|B)i")>;
+def KryoWrite_3cyc_LS_259ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_3cyc_LS_259ln],
+ (instregex "LDUR(BB|HH|W|X)i")>;
+def KryoWrite_4cyc_LS_266ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 4; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_4cyc_LS_266ln],
+ (instregex "LDURS(B|H)?(W|X)i")>;
+def KryoWrite_3cyc_LS_258ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_3cyc_LS_258ln, WriteLDHi],
+ (instregex "LDXP(W|X)")>;
+def KryoWrite_3cyc_LS_258_1ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 3; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_3cyc_LS_258_1ln],
+ (instregex "LDXR(B|H|W|X)")>;
+def KryoWrite_2cyc_XY_XY_137ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_137ln],
+ (instrs LSLVWr, LSLVXr)>;
+def KryoWrite_1cyc_XY_135ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_1cyc_XY_135ln],
+ (instregex "(LS|AS|RO)RV(W|X)r")>;
+def KryoWrite_4cyc_X_84ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_4cyc_X_84ln],
+ (instrs MADDWrrr, MSUBWrrr)>;
+def KryoWrite_5cyc_X_85ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 5; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_5cyc_X_85ln],
+ (instrs MADDXrrr, MSUBXrrr)>;
+def KryoWrite_4cyc_X_noRSV_188ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_noRSV_188ln],
+ (instregex "(MLA|MLS|MUL)(v8i8|v4i16|v2i32)(_indexed)?")>;
+def KryoWrite_4cyc_X_X_192ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_X_192ln],
+ (instregex "(MLA|MLS|MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?")>;
+def KryoWrite_1cyc_XY_noRSV_198ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_noRSV_198ln],
+ (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)")>;
+def KryoWrite_1cyc_XY_XY_199ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_XY_199ln],
+ (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)")>;
+def KryoWrite_1cyc_X_89ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_1cyc_X_89ln],
+ (instrs MOVKWi, MOVKXi)>;
+def KryoWrite_1cyc_XY_91ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_1cyc_XY_91ln],
+ (instrs MOVNWi, MOVNXi)>;
+def KryoWrite_1cyc_XY_90ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_1cyc_XY_90ln],
+ (instrs MOVZWi, MOVZXi)>;
+def KryoWrite_2cyc_XY_93ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_2cyc_XY_93ln],
+ (instrs MRS)>;
+def KryoWrite_0cyc_X_87ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 0; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_0cyc_X_87ln],
+ (instrs MSRpstateImm4)>;
+def : InstRW<[KryoWrite_0cyc_X_87ln],
+ (instrs MSRpstateImm1)>;
+def KryoWrite_0cyc_XY_88ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 0; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_0cyc_XY_88ln],
+ (instrs MSR)>;
+def KryoWrite_1cyc_XY_noRSV_143ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_noRSV_143ln],
+ (instregex "NEG(v8i8|v4i16|v2i32|v1i64)")>;
+def KryoWrite_1cyc_XY_XY_145ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_XY_145ln],
+ (instregex "NEG(v16i8|v8i16|v4i32|v2i64)")>;
+def KryoWrite_1cyc_XY_noRSV_193ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_noRSV_193ln],
+ (instrs NOTv8i8)>;
+def KryoWrite_1cyc_XY_XY_194ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_XY_194ln],
+ (instrs NOTv16i8)>;
+def KryoWrite_2cyc_XY_noRSV_234ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_234ln],
+ (instrs PMULv8i8)>;
+def KryoWrite_2cyc_XY_XY_236ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_236ln],
+ (instrs PMULv16i8)>;
+def KryoWrite_2cyc_XY_XY_235ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_235ln],
+ (instrs PMULLv8i8, PMULLv16i8)>;
+def KryoWrite_3cyc_XY_XY_237ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_XY_237ln],
+ (instrs PMULLv1i64, PMULLv2i64)>;
+def KryoWrite_0cyc_LS_254ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 0; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_0cyc_LS_254ln],
+ (instrs PRFMl, PRFMui)>;
+def KryoWrite_0cyc_LS_253ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 0; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_0cyc_LS_253ln],
+ (instrs PRFUMi)>;
+def KryoWrite_6cyc_XY_X_noRSV_175ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitX]> {
+ let Latency = 6; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_6cyc_XY_X_noRSV_175ln],
+ (instregex "R(ADD|SUB)HNv.*")>;
+def KryoWrite_2cyc_XY_204ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_2cyc_XY_204ln],
+ (instrs RBITWr, RBITXr)>;
+def KryoWrite_2cyc_XY_noRSV_218ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_noRSV_218ln],
+ (instrs RBITv8i8)>;
+def KryoWrite_2cyc_XY_XY_219ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_219ln],
+ (instrs RBITv16i8)>;
+def KryoWrite_1cyc_X_202ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_1cyc_X_202ln],
+ (instregex "REV(16|32)?(W|X)r")>;
+def KryoWrite_1cyc_XY_noRSV_214ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_noRSV_214ln],
+ (instregex "REV(16|32|64)(v8i8|v4i16|v2i32)")>;
+def KryoWrite_1cyc_XY_XY_216ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_XY_216ln],
+ (instregex "REV(16|32|64)(v16i8|v8i16|v4i32)")>;
+def KryoWrite_3cyc_X_noRSV_244ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_X_noRSV_244ln],
+ (instregex "S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)")>;
+def KryoWrite_3cyc_X_X_245ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_X_X_245ln],
+ (instregex "S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift")>;
+def KryoWrite_1cyc_XY_2ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_1cyc_XY_2ln, ReadI, ReadI],
+ (instregex "SBCS?(W|X)r")>;
+def KryoWrite_2cyc_XA_XA_XA_24ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitXA, KryoUnitXA]> {
+ let Latency = 2; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_2cyc_XA_XA_XA_24ln],
+ (instrs SHA1Crrr, SHA1Mrrr, SHA1Prrr)>;
+def KryoWrite_1cyc_XY_noRSV_21ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_noRSV_21ln],
+ (instrs SHA1Hrr)>;
+def KryoWrite_2cyc_X_X_23ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_X_X_23ln],
+ (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>;
+def KryoWrite_4cyc_XA_XA_XA_25ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitXA, KryoUnitXA]> {
+ let Latency = 4; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_4cyc_XA_XA_XA_25ln],
+ (instrs SHA256Hrrr, SHA256H2rrr)>;
+def KryoWrite_3cyc_XY_XY_X_X_26ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitX, KryoUnitX]> {
+ let Latency = 3; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_3cyc_XY_XY_X_X_26ln],
+ (instrs SHA256SU1rrr)>;
+def KryoWrite_4cyc_X_noRSV_189ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_noRSV_189ln],
+ (instregex "SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?")>;
+def KryoWrite_3cyc_XY_noRSV_68ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_68ln],
+ (instregex "SQ(ABS|NEG)(v1i8|v1i16|v1i32|v1i64)")>;
+def KryoWrite_3cyc_XY_noRSV_157ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_157ln],
+ (instregex "SQ(ABS|NEG)(v8i8|v4i16|v2i32)")>;
+def KryoWrite_3cyc_XY_XY_164ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_XY_164ln],
+ (instregex "SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)")>;
+def KryoWrite_4cyc_X_noRSV_190ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_4cyc_X_noRSV_190ln],
+ (instregex "SQD(MLAL|MLSL|MULL)(i16|i32)")>;
+def KryoWrite_0cyc_LS_Y_274ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_274ln],
+ (instregex "ST1(One(v8b|v4h|v2s|v1d|v16b|v8h|v4s|v2d)|(i8|i16|i32|i64)|Two(v8b|v4h|v2s|v1d))$")>;
+def KryoWrite_1cyc_LS_Y_X_301ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 3;
+}
+def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_X_301ln],
+ (instregex "ST1(One(v8b|v4h|v2s|v1d|v16b|v8h|v4s|v2d)|(i8|i16|i32|i64)|Two(v8b|v4h|v2s|v1d))_POST$")>;
+def KryoWrite_1cyc_LS_Y_XY_305ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 3;
+}
+def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_305ln],
+ (instregex "ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
+def KryoWrite_0cyc_LS_Y_LS_Y_323ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 4;
+}
+def : InstRW<[WriteAdr, KryoWrite_0cyc_LS_Y_LS_Y_323ln],
+ (instregex "ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>;
+def KryoWrite_1cyc_LS_Y_XY_LS_Y_345ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 5;
+}
+def : InstRW<[KryoWrite_1cyc_LS_Y_XY_LS_Y_345ln],
+ (instregex "ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
+def KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_356ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitLS,
+ KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 6;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_356ln],
+ (instregex "ST1Three(v16b|v8h|v4s|v2d)$")>;
+def KryoWrite_1cyc_LS_Y_XY_LS_Y_LS_Y_366ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY,
+ KryoUnitLS, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 7;
+}
+def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_LS_Y_366ln],
+ (instregex "ST1Three(v16b|v8h|v4s|v2d)_POST$")>;
+def KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_LS_Y_371ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitLS,
+ KryoUnitY, KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 8;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_LS_Y_371ln],
+ (instregex "ST1Four(v16b|v8h|v4s|v2d)$")>;
+def KryoWrite_0cyc_LS_Y_LS_Y_XY_LS_Y_LS_Y_377ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitXY,
+ KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 9;
+}
+def : InstRW<[WriteAdr, KryoWrite_0cyc_LS_Y_LS_Y_XY_LS_Y_LS_Y_377ln],
+ (instregex "ST1Four(v16b|v8h|v4s|v2d)_POST$")>;
+def KryoWrite_0cyc_LS_Y_275ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_275ln],
+ (instregex "ST2(Two(v8b|v4h|v2s|v1d|v16b|v8h|v4s|v2d)|(i8|i16|i32|i64))$")>;
+def KryoWrite_1cyc_LS_Y_XY_306ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 3;
+}
+def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_306ln],
+ (instregex "ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>;
+def KryoWrite_0cyc_LS_Y_LS_Y_322ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_322ln],
+ (instregex "ST2Two(v16b|v8h|v4s|v2d)$")>;
+def KryoWrite_1cyc_LS_Y_XY_LS_Y_344ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 5;
+}
+def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_344ln],
+ (instregex "ST2Two(v16b|v8h|v4s|v2d)_POST$")>;
+def KryoWrite_0cyc_LS_Y_LS_Y_324ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_324ln],
+ (instregex "ST3(Threev1d|(i8|i16|i32|i64))$")>;
+def KryoWrite_1cyc_LS_Y_XY_LS_Y_346ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 5;
+}
+def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_346ln],
+ (instregex "ST3(Threev1d|(i8|i16|i32|i64))_POST$")>;
+def KryoWrite_1cyc_X_X_LS_Y_LS_Y_353ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitLS,
+ KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 6;
+}
+def : InstRW<[KryoWrite_1cyc_X_X_LS_Y_LS_Y_353ln],
+ (instregex "ST3Three(v8b|v4h|v2s)$")>;
+def KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_357ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitLS,
+ KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 6;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_357ln],
+ (instregex "ST3Threev2d$")>;
+def KryoWrite_1cyc_X_X_LS_Y_XY_LS_Y_363ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitXY,
+ KryoUnitLS, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 7;
+}
+def : InstRW<[WriteAdr, KryoWrite_1cyc_X_X_LS_Y_XY_LS_Y_363ln],
+ (instregex "ST3Three(v8b|v4h|v2s)_POST$")>;
+def KryoWrite_1cyc_LS_Y_XY_LS_Y_LS_Y_367ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY,
+ KryoUnitLS, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 7;
+}
+def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_LS_Y_367ln],
+ (instregex "ST3Threev2d_POST$")>;
+def KryoWrite_1cyc_X_X_LS_Y_LS_Y_X_X_LS_Y_LS_Y_385ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitLS,
+ KryoUnitY, KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY,
+ KryoUnitLS, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 12;
+}
+def : InstRW<[KryoWrite_1cyc_X_X_LS_Y_LS_Y_X_X_LS_Y_LS_Y_385ln],
+ (instregex "ST3Three(v16b|v8h|v4s)$")>;
+def KryoWrite_1cyc_X_X_LS_Y_LS_Y_X_X_LS_Y_XY_LS_Y_388ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitLS,
+ KryoUnitY, KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY,
+ KryoUnitXY, KryoUnitLS, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 13;
+}
+def : InstRW<[WriteAdr, KryoWrite_1cyc_X_X_LS_Y_LS_Y_X_X_LS_Y_XY_LS_Y_388ln],
+ (instregex "ST3Three(v16b|v8h|v4s)_POST$")>;
+def KryoWrite_0cyc_LS_Y_LS_Y_325ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_325ln],
+ (instregex "ST4(Fourv1d|(i8|i16|i32|i64))$")>;
+def KryoWrite_1cyc_LS_Y_XY_LS_Y_347ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 5;
+}
+def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_347ln],
+ (instregex "ST4(Fourv1d|(i8|i16|i32|i64))_POST$")>;
+def KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_370ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX,
+ KryoUnitX, KryoUnitLS, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 8;
+}
+def : InstRW<[KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_370ln],
+ (instregex "ST4Four(v8b|v4h|v2s)$")>;
+def KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_LS_Y_372ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitLS,
+ KryoUnitY, KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 8;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_LS_Y_372ln],
+ (instregex "ST4Fourv2d$")>;
+def KryoWrite_1cyc_X_X_LS_Y_XY_X_X_LS_Y_375ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitXY,
+ KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 9;
+}
+def : InstRW<[WriteAdr, KryoWrite_1cyc_X_X_LS_Y_XY_X_X_LS_Y_375ln],
+ (instregex "ST4Four(v8b|v4h|v2s)_POST$")>;
+def KryoWrite_0cyc_LS_Y_LS_Y_XY_LS_Y_LS_Y_379ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitXY,
+ KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 9;
+}
+def : InstRW<[WriteAdr, KryoWrite_0cyc_LS_Y_LS_Y_XY_LS_Y_LS_Y_379ln],
+ (instregex "ST4Fourv2d_POST$")>;
+def KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_390ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX,
+ KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX, KryoUnitX,
+ KryoUnitLS, KryoUnitY, KryoUnitX, KryoUnitX, KryoUnitLS,
+ KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 16;
+}
+def : InstRW<[KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_390ln],
+ (instregex "ST4Four(v16b|v8h|v4s)$")>;
+def KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_XY_X_X_LS_Y_392ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX,
+ KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX, KryoUnitX,
+ KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitX, KryoUnitX,
+ KryoUnitLS, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 17;
+}
+def : InstRW<[WriteAdr, KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_XY_X_X_LS_Y_392ln],
+ (instregex "ST4Four(v16b|v8h|v4s)_POST$")>;
+def KryoWrite_0cyc_LS_LS_Y_299ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_0cyc_LS_LS_Y_299ln],
+ (instregex "STLR(B|H|W|X)")>;
+def KryoWrite_3cyc_LS_LS_Y_307ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitY]> {
+ let Latency = 3; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_3cyc_LS_LS_Y_307ln],
+ (instregex "STLX(P(W|X)|R(B|H|W|X))")>;
+def KryoWrite_0cyc_LS_Y_276ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_276ln],
+ (instrs STNPDi, STNPSi)>;
+def KryoWrite_0cyc_LS_Y_LS_Y_326ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_326ln],
+ (instrs STNPQi)>;
+def KryoWrite_0cyc_LS_Y_280ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_280ln],
+ (instrs STNPWi, STNPXi)>;
+def KryoWrite_0cyc_LS_Y_277ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_277ln],
+ (instregex "STP(D|S)i")>;
+def KryoWrite_1cyc_LS_Y_X_303ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 3;
+}
+def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_X_303ln],
+ (instregex "STP(D|S)(post|pre)")>;
+def KryoWrite_0cyc_LS_Y_LS_Y_327ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_327ln],
+ (instrs STPQi)>;
+def KryoWrite_1cyc_LS_Y_X_LS_Y_343ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitX, KryoUnitLS, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 5;
+}
+def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_X_LS_Y_343ln],
+ (instrs STPQpost, STPQpre)>;
+def KryoWrite_0cyc_LS_Y_279ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_279ln],
+ (instregex "STP(W|X)i")>;
+def KryoWrite_1cyc_LS_X_Y_300ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 3;
+}
+def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_X_Y_300ln],
+ (instregex "STP(W|X)(post|pre)")>;
+def KryoWrite_0cyc_LS_Y_278ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_278ln],
+ (instregex "STR(Q|D|S|H|B)ui")>;
+def KryoWrite_1cyc_X_LS_Y_295ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitLS, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_1cyc_X_LS_Y_295ln],
+ (instregex "STR(D|S|H|B)ro(W|X)")>;
+def KryoWrite_1cyc_LS_Y_X_304ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 3;
+}
+def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_X_304ln],
+ (instregex "STR(Q|D|S|H|B)(post|pre)")>;
+def KryoWrite_2cyc_X_LS_Y_XY_LS_Y_354ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS,
+ KryoUnitY]> {
+ let Latency = 2; let NumMicroOps = 6;
+}
+def : InstRW<[KryoWrite_2cyc_X_LS_Y_XY_LS_Y_354ln],
+ (instregex "STRQro(W|X)")>;
+def KryoWrite_0cyc_LS_Y_399ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_399ln],
+ (instregex "STR(BB|HH|W|X)ui")>;
+def KryoWrite_1cyc_X_LS_Y_406ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitLS, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_1cyc_X_LS_Y_406ln],
+ (instregex "STR(BB|HH|W|X)ro(W|X)")>;
+def KryoWrite_1cyc_LS_X_Y_407ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitY]> {
+ let Latency = 1; let NumMicroOps = 3;
+}
+def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_X_Y_407ln],
+ (instregex "STR(BB|HH|W|X)(post|pre)")>;
+def KryoWrite_0cyc_LS_Y_398ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_398ln],
+ (instregex "STTR(B|H|W|X)i")>;
+def KryoWrite_0cyc_LS_Y_396ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_396ln],
+ (instregex "STUR(Q|D|S|H|B)i")>;
+def KryoWrite_0cyc_LS_Y_397ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY]> {
+ let Latency = 0; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_0cyc_LS_Y_397ln],
+ (instregex "STUR(BB|HH|W|X)i")>;
+def KryoWrite_3cyc_LS_Y_404ln :
+ SchedWriteRes<[KryoUnitLS, KryoUnitY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_LS_Y_404ln],
+ (instregex "STX(P(W|X)|R(B|H|W|X))")>;
+def KryoWrite_3cyc_XY_noRSV_160ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_160ln],
+ (instregex "^(SU|US)QADD(v8i8|v4i16|v2i32)")>;
+def KryoWrite_3cyc_XY_XY_167ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_XY_167ln],
+ (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)")>;
+def KryoWrite_1cyc_XY_1ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_1cyc_XY_1ln, ReadI],
+ (instregex "SUBS?(W|X)ri")>;
+def KryoWrite_2cyc_XY_XY_5ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_5ln, ReadI, ReadIEReg],
+ (instregex "SUBS?(W|X)rx")>;
+def KryoWrite_2cyc_XY_XY_5_1ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 2; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_2cyc_XY_XY_5_1ln, ReadI, ReadISReg],
+ (instregex "SUBS?(W|X)rs")>;
+def KryoWrite_1cyc_XY_noRSV_6ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_noRSV_6ln, ReadI, ReadI],
+ (instregex "SUBS?(W|X)rr")>;
+def KryoWrite_0cyc_LS_9ln :
+ SchedWriteRes<[KryoUnitLS]> {
+ let Latency = 0; let NumMicroOps = 1;
+}
+def : InstRW<[KryoWrite_0cyc_LS_9ln],
+ (instregex "SYSL?xt")>;
+def KryoWrite_1cyc_X_noRSV_205ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_X_noRSV_205ln],
+ (instrs TBLv8i8One)>;
+def KryoWrite_1cyc_X_X_208ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_X_X_208ln],
+ (instrs TBLv16i8One)>;
+def KryoWrite_2cyc_X_X_X_noRSV_222ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX]> {
+ let Latency = 2; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_2cyc_X_X_X_noRSV_222ln],
+ (instrs TBLv8i8Two)>;
+def KryoWrite_2cyc_X_X_X_X_X_X_224ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitX]> {
+ let Latency = 2; let NumMicroOps = 6;
+}
+def : InstRW<[KryoWrite_2cyc_X_X_X_X_X_X_224ln],
+ (instrs TBLv16i8Two)>;
+def KryoWrite_3cyc_X_X_X_X_X_noRSV_225ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> {
+ let Latency = 3; let NumMicroOps = 6;
+}
+def : InstRW<[KryoWrite_3cyc_X_X_X_X_X_noRSV_225ln],
+ (instrs TBLv8i8Three)>;
+def KryoWrite_3cyc_X_X_X_X_X_X_X_noRSV_228ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitX, KryoUnitX]> {
+ let Latency = 3; let NumMicroOps = 8;
+}
+def : InstRW<[KryoWrite_3cyc_X_X_X_X_X_X_X_noRSV_228ln],
+ (instrs TBLv8i8Four)>;
+def KryoWrite_4cyc_X_X_X_X_X_X_X_X_XY_X_X_230ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitXY, KryoUnitX,
+ KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 11;
+}
+def : InstRW<[KryoWrite_4cyc_X_X_X_X_X_X_X_X_XY_X_X_230ln],
+ (instrs TBLv16i8Three)>;
+def KryoWrite_4cyc_X_X_X_X_X_X_X_X_X_X_XY_X_X_X_X_232ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitXY, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 15;
+}
+def : InstRW<[KryoWrite_4cyc_X_X_X_X_X_X_X_X_X_X_XY_X_X_X_X_232ln],
+ (instrs TBLv16i8Four)>;
+def KryoWrite_2cyc_X_X_noRSV_220ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 2; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_2cyc_X_X_noRSV_220ln],
+ (instrs TBXv8i8One)>;
+def KryoWrite_2cyc_X_X_X_X_221ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> {
+ let Latency = 2; let NumMicroOps = 4;
+}
+def : InstRW<[KryoWrite_2cyc_X_X_X_X_221ln],
+ (instrs TBXv16i8One)>;
+def KryoWrite_3cyc_X_X_X_X_noRSV_223ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> {
+ let Latency = 3; let NumMicroOps = 5;
+}
+def : InstRW<[KryoWrite_3cyc_X_X_X_X_noRSV_223ln],
+ (instrs TBXv8i8Two)>;
+def KryoWrite_4cyc_X_X_X_X_X_X_noRSV_226ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 7;
+}
+def : InstRW<[KryoWrite_4cyc_X_X_X_X_X_X_noRSV_226ln],
+ (instrs TBXv8i8Three)>;
+def KryoWrite_3cyc_X_X_X_X_X_X_X_X_227ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitX, KryoUnitX, KryoUnitX]> {
+ let Latency = 3; let NumMicroOps = 8;
+}
+def : InstRW<[KryoWrite_3cyc_X_X_X_X_X_X_X_X_227ln],
+ (instrs TBXv16i8Two)>;
+def KryoWrite_4cyc_X_X_X_X_X_X_X_X_noRSV_229ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitX, KryoUnitX, KryoUnitX]> {
+ let Latency = 4; let NumMicroOps = 9;
+}
+def : InstRW<[KryoWrite_4cyc_X_X_X_X_X_X_X_X_noRSV_229ln],
+ (instrs TBXv8i8Four)>;
+def KryoWrite_5cyc_X_X_X_X_X_X_X_X_X_XY_X_X_X_231ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitXY,
+ KryoUnitX, KryoUnitX, KryoUnitX]> {
+ let Latency = 5; let NumMicroOps = 13;
+}
+def : InstRW<[KryoWrite_5cyc_X_X_X_X_X_X_X_X_X_XY_X_X_X_231ln],
+ (instrs TBXv16i8Three)>;
+def KryoWrite_5cyc_X_X_X_X_X_X_X_X_X_X_X_XY_X_X_X_X_X_233ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitX, KryoUnitXY, KryoUnitX, KryoUnitX, KryoUnitX,
+ KryoUnitX, KryoUnitX]> {
+ let Latency = 5; let NumMicroOps = 17;
+}
+def : InstRW<[KryoWrite_5cyc_X_X_X_X_X_X_X_X_X_X_X_XY_X_X_X_X_X_233ln],
+ (instrs TBXv16i8Four)>;
+def KryoWrite_1cyc_XY_XY_217ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_XY_217ln],
+ (instregex "((TRN1|TRN2|ZIP1|UZP1|UZP2)v2i64|ZIP2(v2i64|v4i32|v8i16|v16i8))")>;
+def KryoWrite_1cyc_X_X_211ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_X_X_211ln],
+ (instregex "(TRN1|TRN2)(v4i32|v8i16|v16i8)")>;
+def KryoWrite_1cyc_X_XY_213ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_X_XY_213ln],
+ (instregex "(TRN1|TRN2)(v2i32|v4i16|v8i8)")>;
+def KryoWrite_3cyc_XY_noRSV_156ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_noRSV_156ln],
+ (instrs URECPEv2i32, URSQRTEv2i32)>;
+def KryoWrite_3cyc_XY_XY_168ln :
+ SchedWriteRes<[KryoUnitXY, KryoUnitXY]> {
+ let Latency = 3; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_3cyc_XY_XY_168ln],
+ (instrs URECPEv4i32, URSQRTEv4i32)>;
+def KryoWrite_1cyc_X_X_210ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_X_X_210ln],
+ (instregex "(UZP1|UZP2)(v4i32|v8i16|v16i8)")>;
+def KryoWrite_1cyc_X_noRSV_206ln :
+ SchedWriteRes<[KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_X_noRSV_206ln],
+ (instregex "(UZP1|UZP2|ZIP1|ZIP2)(v2i32|v4i16|v8i8)")>;
+def KryoWrite_1cyc_XY_noRSV_215ln :
+ SchedWriteRes<[KryoUnitXY]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_XY_noRSV_215ln],
+ (instregex "XTNv.*")>;
+def KryoWrite_1cyc_X_X_209ln :
+ SchedWriteRes<[KryoUnitX, KryoUnitX]> {
+ let Latency = 1; let NumMicroOps = 2;
+}
+def : InstRW<[KryoWrite_1cyc_X_X_209ln],
+ (instregex "ZIP1(v4i32|v8i16|v16i8)")>;
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td
new file mode 100644
index 000000000..fbbd3850d
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td
@@ -0,0 +1,357 @@
+//==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM ThunderX T8X
+// (T88, T81, T83) processors.
+// Loosely based on Cortex-A53 which is somewhat similar.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details.
+
+// Cavium ThunderX T8X scheduling machine model.
+def ThunderXT8XModel : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops dispatched per cycle.
+ let MicroOpBufferSize = 0; // ThunderX T88/T81/T83 are in-order.
+ let LoadLatency = 3; // Optimistic load latency.
+ let MispredictPenalty = 8; // Branch mispredict penalty.
+ let PostRAScheduler = 1; // Use PostRA scheduler.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
+
+ // FIXME: Remove when all errors have been fixed.
+ let FullInstRWOverlapCheck = 0;
+}
+
+// Modeling each pipeline with BufferSize == 0 since T8X is in-order.
+def THXT8XUnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
+def THXT8XUnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
+def THXT8XUnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
+def THXT8XUnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store
+def THXT8XUnitBr : ProcResource<1> { let BufferSize = 0; } // Branch
+def THXT8XUnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU
+def THXT8XUnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mul/Div/Sqrt
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types mapping the ProcResources and
+// latencies.
+
+let SchedModel = ThunderXT8XModel in {
+
+// ALU
+def : WriteRes<WriteImm, [THXT8XUnitALU]> { let Latency = 1; }
+def : WriteRes<WriteI, [THXT8XUnitALU]> { let Latency = 1; }
+def : WriteRes<WriteISReg, [THXT8XUnitALU]> { let Latency = 2; }
+def : WriteRes<WriteIEReg, [THXT8XUnitALU]> { let Latency = 2; }
+def : WriteRes<WriteIS, [THXT8XUnitALU]> { let Latency = 2; }
+def : WriteRes<WriteExtr, [THXT8XUnitALU]> { let Latency = 2; }
+
+// MAC
+def : WriteRes<WriteIM32, [THXT8XUnitMAC]> {
+ let Latency = 4;
+ let ResourceCycles = [1];
+}
+
+def : WriteRes<WriteIM64, [THXT8XUnitMAC]> {
+ let Latency = 4;
+ let ResourceCycles = [1];
+}
+
+// Div
+def : WriteRes<WriteID32, [THXT8XUnitDiv]> {
+ let Latency = 12;
+ let ResourceCycles = [6];
+}
+
+def : WriteRes<WriteID64, [THXT8XUnitDiv]> {
+ let Latency = 14;
+ let ResourceCycles = [8];
+}
+
+// Load
+def : WriteRes<WriteLD, [THXT8XUnitLdSt]> { let Latency = 3; }
+def : WriteRes<WriteLDIdx, [THXT8XUnitLdSt]> { let Latency = 3; }
+def : WriteRes<WriteLDHi, [THXT8XUnitLdSt]> { let Latency = 3; }
+
+// Vector Load
+def : WriteRes<WriteVLD, [THXT8XUnitLdSt]> {
+ let Latency = 8;
+ let ResourceCycles = [3];
+}
+
+def THXT8XWriteVLD1 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 6;
+ let ResourceCycles = [1];
+}
+
+def THXT8XWriteVLD2 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 11;
+ let ResourceCycles = [7];
+}
+
+def THXT8XWriteVLD3 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 12;
+ let ResourceCycles = [8];
+}
+
+def THXT8XWriteVLD4 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 13;
+ let ResourceCycles = [9];
+}
+
+def THXT8XWriteVLD5 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 13;
+ let ResourceCycles = [9];
+}
+
+// Pre/Post Indexing
+def : WriteRes<WriteAdr, []> { let Latency = 0; }
+
+// Store
+def : WriteRes<WriteST, [THXT8XUnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTP, [THXT8XUnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTIdx, [THXT8XUnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTX, [THXT8XUnitLdSt]> { let Latency = 1; }
+
+// Vector Store
+def : WriteRes<WriteVST, [THXT8XUnitLdSt]>;
+def THXT8XWriteVST1 : SchedWriteRes<[THXT8XUnitLdSt]>;
+
+def THXT8XWriteVST2 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 10;
+ let ResourceCycles = [9];
+}
+
+def THXT8XWriteVST3 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 11;
+ let ResourceCycles = [10];
+}
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// Branch
+def : WriteRes<WriteBr, [THXT8XUnitBr]>;
+def THXT8XWriteBR : SchedWriteRes<[THXT8XUnitBr]>;
+def : WriteRes<WriteBrReg, [THXT8XUnitBr]>;
+def THXT8XWriteBRR : SchedWriteRes<[THXT8XUnitBr]>;
+def THXT8XWriteRET : SchedWriteRes<[THXT8XUnitALU]>;
+def : WriteRes<WriteSys, [THXT8XUnitBr]>;
+def : WriteRes<WriteBarrier, [THXT8XUnitBr]>;
+def : WriteRes<WriteHint, [THXT8XUnitBr]>;
+
+// FP ALU
+def : WriteRes<WriteF, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteV, [THXT8XUnitFPALU]> { let Latency = 6; }
+
+// FP Mul, Div, Sqrt
+def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; }
+def : WriteRes<WriteFDiv, [THXT8XUnitFPMDS]> {
+ let Latency = 22;
+ let ResourceCycles = [19];
+}
+
+def THXT8XWriteFMAC : SchedWriteRes<[THXT8XUnitFPMDS]> { let Latency = 10; }
+
+def THXT8XWriteFDivSP : SchedWriteRes<[THXT8XUnitFPMDS]> {
+ let Latency = 12;
+ let ResourceCycles = [9];
+}
+
+def THXT8XWriteFDivDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
+ let Latency = 22;
+ let ResourceCycles = [19];
+}
+
+def THXT8XWriteFSqrtSP : SchedWriteRes<[THXT8XUnitFPMDS]> {
+ let Latency = 17;
+ let ResourceCycles = [14];
+}
+
+def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
+ let Latency = 31;
+ let ResourceCycles = [28];
+}
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedRead types.
+
+// No forwarding for these reads.
+def : ReadAdvance<ReadExtrHi, 1>;
+def : ReadAdvance<ReadAdrBase, 2>;
+def : ReadAdvance<ReadVLD, 2>;
+
+// FIXME: This needs more targeted benchmarking.
+// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
+// operands are needed one cycle later if and only if they are to be
+// shifted. Otherwise, they too are needed two cycles later. This same
+// ReadAdvance applies to Extended registers as well, even though there is
+// a separate SchedPredicate for them.
+def : ReadAdvance<ReadI, 2, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+def THXT8XReadShifted : SchedReadAdvance<1, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+def THXT8XReadNotShifted : SchedReadAdvance<2, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+def THXT8XReadISReg : SchedReadVariant<[
+ SchedVar<RegShiftedPred, [THXT8XReadShifted]>,
+ SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>;
+def : SchedAlias<ReadISReg, THXT8XReadISReg>;
+
+def THXT8XReadIEReg : SchedReadVariant<[
+ SchedVar<RegExtendedPred, [THXT8XReadShifted]>,
+ SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>;
+def : SchedAlias<ReadIEReg, THXT8XReadIEReg>;
+
+// MAC - Operands are generally needed one cycle later in the MAC pipe.
+// Accumulator operands are needed two cycles later.
+def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadIMA, 2, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+
+// Div
+def : ReadAdvance<ReadID, 1, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific InstRW.
+
+//---
+// Branch
+//---
+def : InstRW<[THXT8XWriteBR], (instregex "^B$")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^BL$")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^B..$")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^CBNZ")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^CBZ")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^TBNZ")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^TBZ")>;
+def : InstRW<[THXT8XWriteBRR], (instregex "^BR$")>;
+def : InstRW<[THXT8XWriteBRR], (instregex "^BLR$")>;
+
+//---
+// Ret
+//---
+def : InstRW<[THXT8XWriteRET], (instregex "^RET$")>;
+
+//---
+// Miscellaneous
+//---
+def : InstRW<[WriteI], (instrs COPY)>;
+
+//---
+// Vector Loads
+//---
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD1i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[THXT8XWriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
+def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
+
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[THXT8XWriteVLD3], (instregex "LD3Threev(2d)$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD4i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[THXT8XWriteVLD4], (instregex "LD4Fourv(2d)$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
+
+//---
+// Vector Stores
+//---
+def : InstRW<[THXT8XWriteVST1], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVST1], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVST2], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST3Threev(2d)$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVST2], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST4Fourv(2d)$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+
+//---
+// Floating Point MAC, DIV, SQRT
+//---
+def : InstRW<[THXT8XWriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
+def : InstRW<[THXT8XWriteFMAC], (instregex "^FML(A|S).*")>;
+def : InstRW<[THXT8XWriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[THXT8XWriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[THXT8XWriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[THXT8XWriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[THXT8XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[THXT8XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+
+}
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td
new file mode 100644
index 000000000..bee3392b6
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td
@@ -0,0 +1,1880 @@
+//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 ---*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for Cavium ThunderX2T99
+// processors.
+// Based on Broadcom Vulcan.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// 2. Pipeline Description.
+
+def ThunderX2T99Model : SchedMachineModel {
+ let IssueWidth = 4; // 4 micro-ops dispatched at a time.
+ let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer.
+ let LoadLatency = 4; // Optimistic load latency.
+ let MispredictPenalty = 12; // Extra cycles for mispredicted branch.
+ // Determined via a mix of micro-arch details and experimentation.
+ let LoopMicroOpBufferSize = 128;
+ let PostRAScheduler = 1; // Using PostRA sched.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
+
+ // FIXME: Remove when all errors have been fixed.
+ let FullInstRWOverlapCheck = 0;
+}
+
+let SchedModel = ThunderX2T99Model in {
+
+// Define the issue ports.
+
+// Port 0: ALU, FP/SIMD.
+def THX2T99P0 : ProcResource<1>;
+
+// Port 1: ALU, FP/SIMD, integer mul/div.
+def THX2T99P1 : ProcResource<1>;
+
+// Port 2: ALU, Branch.
+def THX2T99P2 : ProcResource<1>;
+
+// Port 3: Store data.
+def THX2T99P3 : ProcResource<1>;
+
+// Port 4: Load/store.
+def THX2T99P4 : ProcResource<1>;
+
+// Port 5: Load/store.
+def THX2T99P5 : ProcResource<1>;
+
+// Define groups for the functional units on each issue port. Each group
+// created will be used by a WriteRes later on.
+//
+// NOTE: Some groups only contain one member. This is a way to create names for
+// the various functional units that share a single issue port. For example,
+// "THX2T99I1" for ALU ops on port 1 and "THX2T99F1" for FP ops on port 1.
+
+// Integer divide and multiply micro-ops only on port 1.
+def THX2T99I1 : ProcResGroup<[THX2T99P1]>;
+
+// Branch micro-ops only on port 2.
+def THX2T99I2 : ProcResGroup<[THX2T99P2]>;
+
+// ALU micro-ops on ports 0, 1, and 2.
+def THX2T99I012 : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2]>;
+
+// Crypto FP/SIMD micro-ops only on port 1.
+def THX2T99F1 : ProcResGroup<[THX2T99P1]>;
+
+// FP/SIMD micro-ops on ports 0 and 1.
+def THX2T99F01 : ProcResGroup<[THX2T99P0, THX2T99P1]>;
+
+// Store data micro-ops only on port 3.
+def THX2T99SD : ProcResGroup<[THX2T99P3]>;
+
+// Load/store micro-ops on ports 4 and 5.
+def THX2T99LS01 : ProcResGroup<[THX2T99P4, THX2T99P5]>;
+
+// 60 entry unified scheduler.
+def THX2T99Any : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2,
+ THX2T99P3, THX2T99P4, THX2T99P5]> {
+ let BufferSize = 60;
+}
+
+// Define commonly used write types for InstRW specializations.
+// All definitions follow the format: THX2T99Write_<NumCycles>Cyc_<Resources>.
+
+// 3 cycles on I1.
+def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+// 1 cycles on I2.
+def THX2T99Write_1Cyc_I2 : SchedWriteRes<[THX2T99I2]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// 4 cycles on I1.
+def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+// 23 cycles on I1.
+def THX2T99Write_23Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
+ let Latency = 23;
+ let ResourceCycles = [13, 23];
+ let NumMicroOps = 4;
+}
+
+// 39 cycles on I1.
+def THX2T99Write_39Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
+ let Latency = 39;
+ let ResourceCycles = [13, 39];
+ let NumMicroOps = 4;
+}
+
+// 1 cycle on I0, I1, or I2.
+def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// 2 cycles on I0, I1, or I2.
+def THX2T99Write_2Cyc_I012 : SchedWriteRes<[THX2T99I012]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+// 4 cycles on I0, I1, or I2.
+def THX2T99Write_4Cyc_I012 : SchedWriteRes<[THX2T99I012]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+// 5 cycles on I0, I1, or I2.
+def THX2T99Write_5Cyc_I012 : SchedWriteRes<[THX2T99I012]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+// 5 cycles on F1.
+def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+// 7 cycles on F1.
+def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+// 4 cycles on F0 or F1.
+def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+// 5 cycles on F0 or F1.
+def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+// 6 cycles on F0 or F1.
+def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+// 7 cycles on F0 or F1.
+def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+// 8 cycles on F0 or F1.
+def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+// 10 cycles on F0 or F1.
+def THX2T99Write_10Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+
+// 16 cycles on F0 or F1.
+def THX2T99Write_16Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 16;
+ let NumMicroOps = 3;
+ let ResourceCycles = [8];
+}
+
+// 23 cycles on F0 or F1.
+def THX2T99Write_23Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 23;
+ let NumMicroOps = 3;
+ let ResourceCycles = [11];
+}
+
+// 1 cycles on LS0 or LS1.
+def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 0;
+}
+
+// 1 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_1Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
+
+// 1 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
+def THX2T99Write_1Cyc_LS01_I012_I012 :
+ SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
+ let Latency = 0;
+ let NumMicroOps = 3;
+}
+
+// 2 cycles on LS0 or LS1.
+def THX2T99Write_2Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// 4 cycles on LS0 or LS1.
+def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+// 5 cycles on LS0 or LS1.
+def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+// 6 cycles on LS0 or LS1.
+def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+// 4 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_4Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+// 4 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
+def THX2T99Write_4Cyc_LS01_I012_I012 :
+ SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+// 5 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_5Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+// 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
+def THX2T99Write_5Cyc_LS01_I012_I012 :
+ SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+// 6 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_6Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+// 6 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
+def THX2T99Write_6Cyc_LS01_I012_I012 :
+ SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+// 1 cycles on LS0 or LS1 and F0 or F1.
+def THX2T99Write_1Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// 5 cycles on LS0 or LS1 and F0 or F1.
+def THX2T99Write_5Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+// 6 cycles on LS0 or LS1 and F0 or F1.
+def THX2T99Write_6Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+// 7 cycles on LS0 or LS1 and F0 or F1.
+def THX2T99Write_7Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+// 8 cycles on LS0 or LS1 and F0 or F1.
+def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+// 8 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_8Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+// 12 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_12Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 12;
+ let NumMicroOps = 6;
+}
+
+// 16 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_16Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 16;
+ let NumMicroOps = 8;
+}
+
+// 24 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_24Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 24;
+ let NumMicroOps = 12;
+}
+
+// 32 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_32Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 32;
+ let NumMicroOps = 16;
+}
+
+// Define commonly used read types.
+
+// No forwarding is provided for these types.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+//===----------------------------------------------------------------------===//
+// 3. Instruction Tables.
+
+//---
+// 3.1 Branch Instructions
+//---
+
+// Branch, immed
+// Branch and link, immed
+// Compare and branch
+def : WriteRes<WriteBr, [THX2T99I2]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Branch, register
+// Branch and link, register != LR
+// Branch and link, register = LR
+def : WriteRes<WriteBrReg, [THX2T99I2]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+def : WriteRes<WriteAtomic, []> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+//---
+// Branch
+//---
+def : InstRW<[THX2T99Write_1Cyc_I2], (instrs B, BL, BR, BLR)>;
+def : InstRW<[THX2T99Write_1Cyc_I2], (instrs RET)>;
+def : InstRW<[THX2T99Write_1Cyc_I2], (instregex "^B..$")>;
+def : InstRW<[THX2T99Write_1Cyc_I2],
+ (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>;
+
+//---
+// 3.2 Arithmetic and Logical Instructions
+// 3.3 Move and Shift Instructions
+//---
+
+
+// ALU, basic
+// Conditional compare
+// Conditional select
+// Address generation
+def : WriteRes<WriteI, [THX2T99I012]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[WriteI],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC(W|X)r",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
+ "SBCS(W|X)r", "CCMN(W|X)(i|r)",
+ "CCMP(W|X)(i|r)", "CSEL(W|X)r",
+ "CSINC(W|X)r", "CSINV(W|X)r",
+ "CSNEG(W|X)r")>;
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+// ALU, extend and/or shift
+def : WriteRes<WriteISReg, [THX2T99I012]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[WriteISReg],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC(W|X)r",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
+ "SBCS(W|X)r", "CCMN(W|X)(i|r)",
+ "CCMP(W|X)(i|r)", "CSEL(W|X)r",
+ "CSINC(W|X)r", "CSINV(W|X)r",
+ "CSNEG(W|X)r")>;
+
+def : WriteRes<WriteIEReg, [THX2T99I012]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[WriteIEReg],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC(W|X)r",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
+ "SBCS(W|X)r", "CCMN(W|X)(i|r)",
+ "CCMP(W|X)(i|r)", "CSEL(W|X)r",
+ "CSINC(W|X)r", "CSINV(W|X)r",
+ "CSNEG(W|X)r")>;
+
+// Move immed
+def : WriteRes<WriteImm, [THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[THX2T99Write_1Cyc_I012],
+ (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
+
+def : InstRW<[THX2T99Write_1Cyc_I012],
+ (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>;
+
+// Variable shift
+def : WriteRes<WriteIS, [THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+//---
+// 3.4 Divide and Multiply Instructions
+//---
+
+// Divide, W-form
+// Latency range of 13-23/13-39.
+def : WriteRes<WriteID32, [THX2T99I1]> {
+ let Latency = 39;
+ let ResourceCycles = [39];
+ let NumMicroOps = 4;
+}
+
+// Divide, X-form
+def : WriteRes<WriteID64, [THX2T99I1]> {
+ let Latency = 23;
+ let ResourceCycles = [23];
+ let NumMicroOps = 4;
+}
+
+// Multiply accumulate, W-form
+def : WriteRes<WriteIM32, [THX2T99I012]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+// Multiply accumulate, X-form
+def : WriteRes<WriteIM64, [THX2T99I012]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+//def : InstRW<[WriteIM32, ReadIM, ReadIM, ReadIMA, THX2T99Write_5Cyc_I012],
+// (instrs MADDWrrr, MSUBWrrr)>;
+def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>;
+def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>;
+def : InstRW<[THX2T99Write_5Cyc_I012],
+ (instregex "(S|U)(MADDL|MSUBL)rrr")>;
+
+def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>;
+def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>;
+
+// Bitfield extract, two reg
+def : WriteRes<WriteExtr, [THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Multiply high
+def : InstRW<[THX2T99Write_4Cyc_I1], (instrs SMULHrr, UMULHrr)>;
+
+// Miscellaneous Data-Processing Instructions
+// Bitfield extract
+def : InstRW<[THX2T99Write_1Cyc_I012], (instrs EXTRWrri, EXTRXrri)>;
+
+// Bitifield move - basic
+def : InstRW<[THX2T99Write_1Cyc_I012],
+ (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>;
+
+// Bitfield move, insert
+def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "^BFM")>;
+def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "(S|U)?BFM.*")>;
+
+// Count leading
+def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$",
+ "^CLZ(W|X)r$")>;
+
+// Reverse bits
+def : InstRW<[THX2T99Write_1Cyc_I012], (instrs RBITWr, RBITXr)>;
+
+// Cryptography Extensions
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES[DE]")>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AESI?MC")>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1SU0")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1(H|SU1)")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1[CMP]")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256SU0")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256(H|H2|SU1)")>;
+
+// CRC Instructions
+// def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32", "^CRC32C")>;
+def : InstRW<[THX2T99Write_4Cyc_I1],
+ (instrs CRC32Brr, CRC32Hrr, CRC32Wrr, CRC32Xrr)>;
+
+def : InstRW<[THX2T99Write_4Cyc_I1],
+ (instrs CRC32CBrr, CRC32CHrr, CRC32CWrr, CRC32CXrr)>;
+
+// Reverse bits/bytes
+// NOTE: Handled by WriteI.
+
+//---
+// 3.6 Load Instructions
+// 3.10 FP Load Instructions
+//---
+
+// Load register, literal
+// Load register, unscaled immed
+// Load register, immed unprivileged
+// Load register, unsigned immed
+def : WriteRes<WriteLD, [THX2T99LS01]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+// Load register, immed post-index
+// NOTE: Handled by WriteLD, WriteI.
+// Load register, immed pre-index
+// NOTE: Handled by WriteLD, WriteAdr.
+def : WriteRes<WriteAdr, [THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Load pair, immed offset, normal
+// Load pair, immed offset, signed words, base != SP
+// Load pair, immed offset signed words, base = SP
+// LDP only breaks into *one* LS micro-op. Thus
+// the resources are handled by WriteLD.
+def : WriteRes<WriteLDHi, []> {
+ let Latency = 5;
+ let NumMicroOps = 5;
+}
+
+// Load register offset, basic
+// Load register, register offset, scale by 4/8
+// Load register, register offset, scale by 2
+// Load register offset, extend
+// Load register, register offset, extend, scale by 4/8
+// Load register, register offset, extend, scale by 2
+def THX2T99WriteLDIdx : SchedWriteVariant<[
+ SchedVar<ScaledIdxPred, [THX2T99Write_6Cyc_LS01_I012_I012]>,
+ SchedVar<NoSchedPred, [THX2T99Write_5Cyc_LS01_I012]>]>;
+def : SchedAlias<WriteLDIdx, THX2T99WriteLDIdx>;
+
+def THX2T99ReadAdrBase : SchedReadVariant<[
+ SchedVar<ScaledIdxPred, [ReadDefault]>,
+ SchedVar<NoSchedPred, [ReadDefault]>]>;
+def : SchedAlias<ReadAdrBase, THX2T99ReadAdrBase>;
+
+// Load pair, immed pre-index, normal
+// Load pair, immed pre-index, signed words
+// Load pair, immed post-index, normal
+// Load pair, immed post-index, signed words
+// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPDi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPQi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPSi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPWi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPXi)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPDi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPQi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSWi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPWi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPXi)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRBui)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDui)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRHui)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRQui)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRSui)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDl)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRQl)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRWl)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRXl)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRBi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRHi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRXi)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSWi)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPDpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPQpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPSpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRBpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRDpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRHpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRQpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRSpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRWpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRXpre)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpost)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpost)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpost)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpost)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpost)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpost)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPDpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPQpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPSpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPXpost)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRBpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRDpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRHpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRQpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRSpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRWpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRXpost)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPDpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPQpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPSpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPXpre)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRBpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRDpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRHpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRQpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRSpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRWpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRXpre)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPDpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPQpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPSpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPXpost)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRBpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRDpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRHpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRQpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRSpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRWpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRXpost)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroW)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroX)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRBroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRBroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRDroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRHroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRHHroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRQroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSHWroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSHXroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRWroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRXroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRBroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRDroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRHroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRHHroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRQroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSHWroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSHXroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRWroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRXroX)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBBi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURDi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHHi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURQi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSWi)>;
+
+//---
+// Prefetch
+//---
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMl)>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFUMi)>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMui)>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroW)>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroX)>;
+
+//--
+// 3.7 Store Instructions
+// 3.11 FP Store Instructions
+//--
+
+// Store register, unscaled immed
+// Store register, immed unprivileged
+// Store register, unsigned immed
+def : WriteRes<WriteST, [THX2T99LS01, THX2T99SD]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Store register, immed post-index
+// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase
+
+// Store register, immed pre-index
+// NOTE: Handled by WriteAdr, WriteST
+
+// Store register, register offset, basic
+// Store register, register offset, scaled by 4/8
+// Store register, register offset, scaled by 2
+// Store register, register offset, extend
+// Store register, register offset, extend, scale by 4/8
+// Store register, register offset, extend, scale by 1
+def : WriteRes<WriteSTIdx, [THX2T99LS01, THX2T99SD, THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 3;
+}
+
+// Store pair, immed offset, W-form
+// Store pair, immed offset, X-form
+def : WriteRes<WriteSTP, [THX2T99LS01, THX2T99SD]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Store pair, immed post-index, W-form
+// Store pair, immed post-index, X-form
+// Store pair, immed pre-index, W-form
+// Store pair, immed pre-index, X-form
+// NOTE: Handled by WriteAdr, WriteSTP.
+
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBBi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURDi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHHi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURQi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURSi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURWi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURXi)>;
+
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRBi)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRHi)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRWi)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRXi)>;
+
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPDi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPQi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPXi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPWi)>;
+
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPDi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPQi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPXi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPWi)>;
+
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRBui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRBui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRDui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRDui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRHui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRHui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRQui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRQui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRXui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRXui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRWui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRWui)>;
+
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPXpre, STPXpost)>;
+
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRXpre, STRXpost)>;
+
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRBroW, STRBroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRBroW, STRBroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRBBroW, STRBBroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRBBroW, STRBBroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRDroW, STRDroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRDroW, STRDroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRHroW, STRHroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRHroW, STRHroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRHHroW, STRHHroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRHHroW, STRHHroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRQroW, STRQroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRQroW, STRQroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRSroW, STRSroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRSroW, STRSroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRWroW, STRWroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRWroW, STRWroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRXroW, STRXroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRXroW, STRXroX)>;
+
+//---
+// 3.8 FP Data Processing Instructions
+//---
+
+// FP absolute value
+// FP min/max
+// FP negate
+def : WriteRes<WriteF, [THX2T99F01]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+// FP arithmetic
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>;
+
+// FP compare
+def : WriteRes<WriteFCmp, [THX2T99F01]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+// FP Mul, Div, Sqrt
+def : WriteRes<WriteFDiv, [THX2T99F01]> {
+ let Latency = 22;
+ let ResourceCycles = [19];
+}
+
+def THX2T99XWriteFDiv : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 16;
+ let ResourceCycles = [8];
+ let NumMicroOps = 4;
+}
+
+def THX2T99XWriteFDivSP : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 16;
+ let ResourceCycles = [8];
+ let NumMicroOps = 4;
+}
+
+def THX2T99XWriteFDivDP : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 23;
+ let ResourceCycles = [12];
+ let NumMicroOps = 4;
+}
+
+def THX2T99XWriteFSqrtSP : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 16;
+ let ResourceCycles = [8];
+ let NumMicroOps = 4;
+}
+
+def THX2T99XWriteFSqrtDP : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 23;
+ let ResourceCycles = [12];
+ let NumMicroOps = 4;
+}
+
+// FP divide, S-form
+// FP square root, S-form
+def : InstRW<[THX2T99XWriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[THX2T99XWriteFSqrtSP], (instrs FSQRTSr)>;
+def : InstRW<[THX2T99XWriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[THX2T99XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "^FDIVSrr", "^FSQRTSr")>;
+
+// FP divide, D-form
+// FP square root, D-form
+def : InstRW<[THX2T99XWriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[THX2T99XWriteFSqrtDP], (instrs FSQRTDr)>;
+def : InstRW<[THX2T99XWriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[THX2T99XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDr")>;
+
+// FP multiply
+// FP multiply accumulate
+def : WriteRes<WriteFMul, [THX2T99F01]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 3;
+}
+
+def THX2T99XWriteFMul : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 3;
+}
+
+def THX2T99XWriteFMulAcc : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 3;
+}
+
+def : InstRW<[THX2T99XWriteFMul], (instregex "^FMUL", "^FNMUL")>;
+def : InstRW<[THX2T99XWriteFMulAcc],
+ (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>;
+
+// FP round to integral
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
+
+// FP select
+def : InstRW<[THX2T99Write_4Cyc_F01], (instregex "^FCSEL")>;
+
+//---
+// 3.9 FP Miscellaneous Instructions
+//---
+
+// FP convert, from vec to vec reg
+// FP convert, from gen to vec reg
+// FP convert, from vec to gen reg
+def : WriteRes<WriteFCvt, [THX2T99F01]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+// FP move, immed
+// FP move, register
+def : WriteRes<WriteFImm, [THX2T99F01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+// FP transfer, from gen to vec reg
+// FP transfer, from vec to gen reg
+def : WriteRes<WriteFCopy, [THX2T99F01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
+
+//---
+// 3.12 ASIMD Integer Instructions
+//---
+
+// ASIMD absolute diff, D-form
+// ASIMD absolute diff, Q-form
+// ASIMD absolute diff accum, D-form
+// ASIMD absolute diff accum, Q-form
+// ASIMD absolute diff accum long
+// ASIMD absolute diff long
+// ASIMD arith, basic
+// ASIMD arith, complex
+// ASIMD compare
+// ASIMD logical (AND, BIC, EOR)
+// ASIMD max/min, basic
+// ASIMD max/min, reduce, 4H/4S
+// ASIMD max/min, reduce, 8B/8H
+// ASIMD max/min, reduce, 16B
+// ASIMD multiply, D-form
+// ASIMD multiply, Q-form
+// ASIMD multiply accumulate long
+// ASIMD multiply accumulate saturating long
+// ASIMD multiply long
+// ASIMD pairwise add and accumulate
+// ASIMD shift accumulate
+// ASIMD shift by immed, basic
+// ASIMD shift by immed and insert, basic, D-form
+// ASIMD shift by immed and insert, basic, Q-form
+// ASIMD shift by immed, complex
+// ASIMD shift by register, basic, D-form
+// ASIMD shift by register, basic, Q-form
+// ASIMD shift by register, complex, D-form
+// ASIMD shift by register, complex, Q-form
+def : WriteRes<WriteV, [THX2T99F01]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [4];
+}
+
+// ASIMD arith, reduce, 4H/4S
+// ASIMD arith, reduce, 8B/8H
+// ASIMD arith, reduce, 16B
+
+// ASIMD logical (MVN (alias for NOT), ORN, ORR)
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
+
+// ASIMD arith, reduce
+def : InstRW<[THX2T99Write_10Cyc_F01],
+ (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
+
+// ASIMD polynomial (8x8) multiply long
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^(S|U|SQD)MULL")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL(v8i8|v16i8)")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^PMULL(v1i64|v2i64)")>;
+
+// ASIMD absolute diff accum, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
+// ASIMD absolute diff accum, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
+// ASIMD absolute diff accum long
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU]ABAL")>;
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
+// ASIMD arith, reduce, 8B
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
+// ASIMD arith, reduce, 16B/16H
+def : InstRW<[THX2T99Write_10Cyc_F01],
+ (instregex "^[SU]?ADDL?Vv16i8v$")>;
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[THX2T99Write_10Cyc_F01],
+ (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
+// ASIMD max/min, reduce, 16B/16H
+def : InstRW<[THX2T99Write_10Cyc_F01],
+ (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
+// ASIMD multiply, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^(P?MUL|SQR?DMULH)" #
+ "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" #
+ "(_indexed)?$")>;
+// ASIMD multiply, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+// ASIMD multiply accumulate, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
+// ASIMD multiply accumulate, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
+// ASIMD shift accumulate
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>;
+
+// ASIMD shift by immed, basic
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "RSHRNv","SHRNv", "SQRSHRNv","SQRSHRUNv",
+ "SQSHRNv","SQSHRUNv", "UQRSHRNv",
+ "UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>;
+// ASIMD shift by immed, complex
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^[SU]?(Q|R){1,2}SHR")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SQSHLU")>;
+// ASIMD shift by register, basic, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
+// ASIMD shift by register, complex, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU][QR]{1,2}SHL" #
+ "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
+// ASIMD shift by register, complex, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD Arithmetic
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(ADD|SUB)HNv.*")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(RADD|RSUB)HNv.*")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD",
+ "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" #
+ "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>;
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADALP","^UADALP")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLPv","^UADDLPv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLV","^UADDLV")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^ADDVv","^SMAXVv","^UMAXVv","^SMINVv","^UMINVv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SABAv","^UABAv","^SABALv","^UABALv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SQADDv","^SQSUBv","^UQADDv","^UQSUBv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SUQADDv","^USQADDv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^ADDHNv","^RADDHNv", "^RSUBHNv",
+ "^SQABS", "^SQADD", "^SQNEG", "^SQSUB",
+ "^SRHADD", "^SUBHNv", "^SUQADD",
+ "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^CMEQv","^CMGEv","^CMGTv",
+ "^CMLEv","^CMLTv", "^CMHIv","^CMHSv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SMAXv","^SMINv","^UMAXv","^UMINv",
+ "^SMAXPv","^SMINPv","^UMAXPv","^UMINPv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SABDv","^UABDv", "^SABDLv","^UABDLv")>;
+
+//---
+// 3.13 ASIMD Floating-point Instructions
+//---
+
+// ASIMD FP absolute value
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FABSv")>;
+
+// ASIMD FP arith, normal, D-form
+// ASIMD FP arith, normal, Q-form
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FABDv", "^FADDv", "^FSUBv")>;
+
+// ASIMD FP arith,pairwise, D-form
+// ASIMD FP arith, pairwise, Q-form
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADDPv")>;
+
+// ASIMD FP compare, D-form
+// ASIMD FP compare, Q-form
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv",
+ "^FCMGTv", "^FCMLEv",
+ "^FCMLTv")>;
+
+// ASIMD FP round, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^FRINT[AIMNPXZ](v2f32)")>;
+// ASIMD FP round, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
+
+// ASIMD FP convert, long
+// ASIMD FP convert, narrow
+// ASIMD FP convert, other, D-form
+// ASIMD FP convert, other, Q-form
+// NOTE: Handled by WriteV.
+
+// ASIMD FP convert, long and narrow
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^FCVT(L|N|XN)v")>;
+// ASIMD FP convert, other, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
+// ASIMD FP convert, other, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP divide, D-form, F32
+def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv2f32)>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv2f32")>;
+
+// ASIMD FP divide, Q-form, F32
+def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv4f32)>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv4f32")>;
+
+// ASIMD FP divide, Q-form, F64
+def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVv2f64)>;
+def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "FDIVv2f64")>;
+
+// ASIMD FP max/min, normal, D-form
+// ASIMD FP max/min, normal, Q-form
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXv", "^FMAXNMv",
+ "^FMINv", "^FMINNMv")>;
+
+// ASIMD FP max/min, pairwise, D-form
+// ASIMD FP max/min, pairwise, Q-form
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXPv", "^FMAXNMPv",
+ "^FMINPv", "^FMINNMPv")>;
+
+// ASIMD FP max/min, reduce
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv",
+ "^FMINVv", "^FMINNMVv")>;
+
+// ASIMD FP multiply, D-form, FZ
+// ASIMD FP multiply, D-form, no FZ
+// ASIMD FP multiply, Q-form, FZ
+// ASIMD FP multiply, Q-form, no FZ
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP multiply accumulate, Dform, FZ
+// ASIMD FP multiply accumulate, Dform, no FZ
+// ASIMD FP multiply accumulate, Qform, FZ
+// ASIMD FP multiply accumulate, Qform, no FZ
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP negate
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FNEGv")>;
+
+//--
+// 3.14 ASIMD Miscellaneous Instructions
+//--
+
+// ASIMD bit reverse
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>;
+
+// ASIMD bitwise insert, D-form
+// ASIMD bitwise insert, Q-form
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^BIFv", "^BITv", "^BSLv")>;
+
+// ASIMD count, D-form
+// ASIMD count, Q-form
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^CLSv", "^CLZv", "^CNTv")>;
+
+// ASIMD duplicate, gen reg
+// ASIMD duplicate, element
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CPY")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv.+gpr")>;
+
+// ASIMD extract
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^EXTv")>;
+
+// ASIMD extract narrow
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^XTNv")>;
+
+// ASIMD extract narrow, saturating
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>;
+
+// ASIMD insert, element to element
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>;
+
+// ASIMD move, integer immed
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv")>;
+
+// ASIMD move, FP immed
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>;
+
+// ASIMD table lookup, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8One")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Two")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Three")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Four")>;
+
+// ASIMD table lookup, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8One")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Two")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Three")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Four")>;
+
+// ASIMD transpose
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1", "^TRN2")>;
+
+// ASIMD unzip/zip
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
+
+// ASIMD reciprocal estimate, D-form
+// ASIMD reciprocal estimate, Q-form
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
+ "^FRSQRTEv", "^URSQRTEv")>;
+
+// ASIMD reciprocal step, D-form, FZ
+// ASIMD reciprocal step, D-form, no FZ
+// ASIMD reciprocal step, Q-form, FZ
+// ASIMD reciprocal step, Q-form, no FZ
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>;
+
+// ASIMD reverse
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^REV16v", "^REV32v", "^REV64v")>;
+
+// ASIMD table lookup, D-form
+// ASIMD table lookup, Q-form
+def : InstRW<[THX2T99Write_8Cyc_F01], (instregex "^TBLv", "^TBXv")>;
+
+// ASIMD transfer, element to word or word
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "(S|U)MOVv.*")>;
+
+// ASIMD transfer gen reg to element
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>;
+
+// ASIMD transpose
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1v", "^TRN2v",
+ "^UZP1v", "^UZP2v")>;
+
+// ASIMD unzip/zip
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>;
+
+//--
+// 3.15 ASIMD Load Instructions
+//--
+
+// ASIMD load, 1 element, multiple, 1 reg, D-form
+// ASIMD load, 1 element, multiple, 1 reg, Q-form
+def : InstRW<[THX2T99Write_4Cyc_LS01],
+ (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
+ (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 2 reg, D-form
+// ASIMD load, 1 element, multiple, 2 reg, Q-form
+def : InstRW<[THX2T99Write_4Cyc_LS01],
+ (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
+ (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 3 reg, D-form
+// ASIMD load, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[THX2T99Write_5Cyc_LS01],
+ (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr],
+ (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 4 reg, D-form
+// ASIMD load, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[THX2T99Write_6Cyc_LS01],
+ (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr],
+ (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, one lane, B/H/S
+// ASIMD load, 1 element, one lane, D
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+ (instregex "^LD1i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 1 element, all lanes, D-form, B/H/S
+// ASIMD load, 1 element, all lanes, D-form, D
+// ASIMD load, 1 element, all lanes, Q-form
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
+ (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+ (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, multiple, D-form, B/H/S
+// ASIMD load, 2 element, multiple, Q-form, D
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
+ (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+ (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, one lane, B/H
+// ASIMD load, 2 element, one lane, S
+// ASIMD load, 2 element, one lane, D
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+ (instregex "^LD2i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 2 element, all lanes, D-form, B/H/S
+// ASIMD load, 2 element, all lanes, D-form, D
+// ASIMD load, 2 element, all lanes, Q-form
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
+ (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+ (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, multiple, D-form, B/H/S
+// ASIMD load, 3 element, multiple, Q-form, B/H/S
+// ASIMD load, 3 element, multiple, Q-form, D
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
+ (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
+ (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, one lone, B/H
+// ASIMD load, 3 element, one lane, S
+// ASIMD load, 3 element, one lane, D
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
+ (instregex "^LD3i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 3 element, all lanes, D-form, B/H/S
+// ASIMD load, 3 element, all lanes, D-form, D
+// ASIMD load, 3 element, all lanes, Q-form, B/H/S
+// ASIMD load, 3 element, all lanes, Q-form, D
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01],
+ (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
+ (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, multiple, D-form, B/H/S
+// ASIMD load, 4 element, multiple, Q-form, B/H/S
+// ASIMD load, 4 element, multiple, Q-form, D
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
+ (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
+ (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, one lane, B/H
+// ASIMD load, 4 element, one lane, S
+// ASIMD load, 4 element, one lane, D
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
+ (instregex "^LD4i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 4 element, all lanes, D-form, B/H/S
+// ASIMD load, 4 element, all lanes, D-form, D
+// ASIMD load, 4 element, all lanes, Q-form, B/H/S
+// ASIMD load, 4 element, all lanes, Q-form, D
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01],
+ (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
+ (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+//--
+// 3.16 ASIMD Store Instructions
+//--
+
+// ASIMD store, 1 element, multiple, 1 reg, D-form
+// ASIMD store, 1 element, multiple, 1 reg, Q-form
+def : InstRW<[THX2T99Write_1Cyc_LS01],
+ (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
+ (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 2 reg, D-form
+// ASIMD store, 1 element, multiple, 2 reg, Q-form
+def : InstRW<[THX2T99Write_1Cyc_LS01],
+ (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
+ (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 3 reg, D-form
+// ASIMD store, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[THX2T99Write_1Cyc_LS01],
+ (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
+ (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 4 reg, D-form
+// ASIMD store, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[THX2T99Write_1Cyc_LS01],
+ (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
+ (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, one lane, B/H/S
+// ASIMD store, 1 element, one lane, D
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+ (instregex "^ST1i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+ (instregex "^ST1i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 2 element, multiple, D-form, B/H/S
+// ASIMD store, 2 element, multiple, Q-form, B/H/S
+// ASIMD store, 2 element, multiple, Q-form, D
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+ (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+ (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 2 element, one lane, B/H/S
+// ASIMD store, 2 element, one lane, D
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+ (instregex "^ST2i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+ (instregex "^ST2i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 3 element, multiple, D-form, B/H/S
+// ASIMD store, 3 element, multiple, Q-form, B/H/S
+// ASIMD store, 3 element, multiple, Q-form, D
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+ (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+ (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 3 element, one lane, B/H
+// ASIMD store, 3 element, one lane, S
+// ASIMD store, 3 element, one lane, D
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+ (instregex "^ST3i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 4 element, multiple, D-form, B/H/S
+// ASIMD store, 4 element, multiple, Q-form, B/H/S
+// ASIMD store, 4 element, multiple, Q-form, D
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+ (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+ (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 4 element, one lane, B/H
+// ASIMD store, 4 element, one lane, S
+// ASIMD store, 4 element, one lane, D
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+ (instregex "^ST4i(8|16|32|64)_POST$")>;
+
+// V8.1a Atomics (LSE)
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs CASB, CASH, CASW, CASX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs CASAB, CASAH, CASAW, CASAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs CASLB, CASLH, CASLW, CASLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+ (instrs CASALB, CASALH, CASALW, CASALX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDLARB, LDLARH, LDLARW, LDLARX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs LDADDB, LDADDH, LDADDW, LDADDX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+ (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+ (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs LDEORB, LDEORH, LDEORW, LDEORX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+ (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs LDSETB, LDSETH, LDSETW, LDSETX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+ (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX,
+ LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX,
+ LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX,
+ LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX,
+ LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX,
+ LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX,
+ LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX,
+ LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX,
+ LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX,
+ LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX,
+ LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX,
+ LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX,
+ LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs SWPB, SWPH, SWPW, SWPX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs SWPAB, SWPAH, SWPAW, SWPAX)>;
+
+def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic],
+ (instrs SWPLB, SWPLH, SWPLW, SWPLX)>;
+
+def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic],
+ (instrs SWPALB, SWPALH, SWPALW, SWPALX)>;
+
+def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic],
+ (instrs STLLRB, STLLRH, STLLRW, STLLRX)>;
+
+} // SchedModel = ThunderX2T99Model
+
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64Schedule.td b/capstone/suite/synctools/tablegen/AArch64/AArch64Schedule.td
new file mode 100644
index 000000000..ce81f48ac
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64Schedule.td
@@ -0,0 +1,106 @@
+//==-- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Define TII for use in SchedVariant Predicates.
+// const MachineInstr *MI and const TargetSchedModel *SchedModel
+// are defined by default.
+def : PredicateProlog<[{
+ const AArch64InstrInfo *TII =
+ static_cast<const AArch64InstrInfo*>(SchedModel->getInstrInfo());
+ (void)TII;
+}]>;
+
+// AArch64 Scheduler Definitions
+
+def WriteImm : SchedWrite; // MOVN, MOVZ
+// TODO: Provide variants for MOV32/64imm Pseudos that dynamically
+// select the correct sequence of WriteImms.
+
+def WriteI : SchedWrite; // ALU
+def WriteISReg : SchedWrite; // ALU of Shifted-Reg
+def WriteIEReg : SchedWrite; // ALU of Extended-Reg
+def ReadI : SchedRead; // ALU
+def ReadISReg : SchedRead; // ALU of Shifted-Reg
+def ReadIEReg : SchedRead; // ALU of Extended-Reg
+def WriteExtr : SchedWrite; // EXTR shifts a reg pair
+def ReadExtrHi : SchedRead; // Read the high reg of the EXTR pair
+def WriteIS : SchedWrite; // Shift/Scale
+def WriteID32 : SchedWrite; // 32-bit Divide
+def WriteID64 : SchedWrite; // 64-bit Divide
+def ReadID : SchedRead; // 32/64-bit Divide
+def WriteIM32 : SchedWrite; // 32-bit Multiply
+def WriteIM64 : SchedWrite; // 64-bit Multiply
+def ReadIM : SchedRead; // 32/64-bit Multiply
+def ReadIMA : SchedRead; // 32/64-bit Multiply Accumulate
+def WriteBr : SchedWrite; // Branch
+def WriteBrReg : SchedWrite; // Indirect Branch
+
+def WriteLD : SchedWrite; // Load from base addr plus immediate offset
+def WriteST : SchedWrite; // Store to base addr plus immediate offset
+def WriteSTP : SchedWrite; // Store a register pair.
+def WriteAdr : SchedWrite; // Address pre/post increment.
+
+def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled).
+def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
+def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.
+
+// Predicate for determining when a shiftable register is shifted.
+def RegShiftedPred : SchedPredicate<[{TII->hasShiftedReg(*MI)}]>;
+
+// Predicate for determining when a extendedable register is extended.
+def RegExtendedPred : SchedPredicate<[{TII->hasExtendedReg(*MI)}]>;
+
+// ScaledIdxPred is true if a WriteLDIdx operand will be
+// scaled. Subtargets can use this to dynamically select resources and
+// latency for WriteLDIdx and ReadAdrBase.
+def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(*MI)}]>;
+
+// Serialized two-level address load.
+// EXAMPLE: LOADGot
+def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>;
+
+// Serialized two-level address lookup.
+// EXAMPLE: MOVaddr...
+def WriteAdrAdr : WriteSequence<[WriteAdr, WriteAdr]>;
+
+// The second register of a load-pair.
+// LDP,LDPSW,LDNP,LDXP,LDAXP
+def WriteLDHi : SchedWrite;
+
+// Store-exclusive is a store followed by a dependent load.
+def WriteSTX : WriteSequence<[WriteST, WriteLD]>;
+
+def WriteSys : SchedWrite; // Long, variable latency system ops.
+def WriteBarrier : SchedWrite; // Memory barrier.
+def WriteHint : SchedWrite; // Hint instruction.
+
+def WriteF : SchedWrite; // General floating-point ops.
+def WriteFCmp : SchedWrite; // Floating-point compare.
+def WriteFCvt : SchedWrite; // Float conversion.
+def WriteFCopy : SchedWrite; // Float-int register copy.
+def WriteFImm : SchedWrite; // Floating-point immediate.
+def WriteFMul : SchedWrite; // Floating-point multiply.
+def WriteFDiv : SchedWrite; // Floating-point division.
+
+def WriteV : SchedWrite; // Vector ops.
+def WriteVLD : SchedWrite; // Vector loads.
+def WriteVST : SchedWrite; // Vector stores.
+
+def WriteAtomic : SchedWrite; // Atomic memory operations (CAS, Swap, LDOP)
+
+// Read the unwritten lanes of the VLD's destination registers.
+def ReadVLD : SchedRead;
+
+// Sequential vector load and shuffle.
+def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteV]>;
+def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>;
+
+// Store a shuffled vector.
+def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>;
+def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>;
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td
new file mode 100644
index 000000000..dbc4deaf3
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td
@@ -0,0 +1,1332 @@
+//===- AArch64SystemOperands.td ----------------------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the symbolic operands permitted for various kinds of
+// AArch64 system instruction.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/TableGen/SearchableTable.td"
+
+//===----------------------------------------------------------------------===//
+// AT (address translate) instruction options.
+//===----------------------------------------------------------------------===//
+
+class AT<string name, bits<3> op1, bits<4> crn, bits<4> crm,
+ bits<3> op2> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<14> Encoding;
+ let Encoding{13-11} = op1;
+ let Encoding{10-7} = crn;
+ let Encoding{6-3} = crm;
+ let Encoding{2-0} = op2;
+ code Requires = [{ {} }];
+}
+
+def : AT<"S1E1R", 0b000, 0b0111, 0b1000, 0b000>;
+def : AT<"S1E2R", 0b100, 0b0111, 0b1000, 0b000>;
+def : AT<"S1E3R", 0b110, 0b0111, 0b1000, 0b000>;
+def : AT<"S1E1W", 0b000, 0b0111, 0b1000, 0b001>;
+def : AT<"S1E2W", 0b100, 0b0111, 0b1000, 0b001>;
+def : AT<"S1E3W", 0b110, 0b0111, 0b1000, 0b001>;
+def : AT<"S1E0R", 0b000, 0b0111, 0b1000, 0b010>;
+def : AT<"S1E0W", 0b000, 0b0111, 0b1000, 0b011>;
+def : AT<"S12E1R", 0b100, 0b0111, 0b1000, 0b100>;
+def : AT<"S12E1W", 0b100, 0b0111, 0b1000, 0b101>;
+def : AT<"S12E0R", 0b100, 0b0111, 0b1000, 0b110>;
+def : AT<"S12E0W", 0b100, 0b0111, 0b1000, 0b111>;
+
+let Requires = [{ {AArch64::HasV8_2aOps} }] in {
+def : AT<"S1E1RP", 0b000, 0b0111, 0b1001, 0b000>;
+def : AT<"S1E1WP", 0b000, 0b0111, 0b1001, 0b001>;
+}
+
+//===----------------------------------------------------------------------===//
+// DMB/DSB (data barrier) instruction options.
+//===----------------------------------------------------------------------===//
+
+class DB<string name, bits<4> encoding> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<4> Encoding = encoding;
+}
+
+def : DB<"oshld", 0x1>;
+def : DB<"oshst", 0x2>;
+def : DB<"osh", 0x3>;
+def : DB<"nshld", 0x5>;
+def : DB<"nshst", 0x6>;
+def : DB<"nsh", 0x7>;
+def : DB<"ishld", 0x9>;
+def : DB<"ishst", 0xa>;
+def : DB<"ish", 0xb>;
+def : DB<"ld", 0xd>;
+def : DB<"st", 0xe>;
+def : DB<"sy", 0xf>;
+
+//===----------------------------------------------------------------------===//
+// DC (data cache maintenance) instruction options.
+//===----------------------------------------------------------------------===//
+
+class DC<string name, bits<3> op1, bits<4> crn, bits<4> crm,
+ bits<3> op2> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<14> Encoding;
+ let Encoding{13-11} = op1;
+ let Encoding{10-7} = crn;
+ let Encoding{6-3} = crm;
+ let Encoding{2-0} = op2;
+ code Requires = [{ {} }];
+}
+
+def : DC<"ZVA", 0b011, 0b0111, 0b0100, 0b001>;
+def : DC<"IVAC", 0b000, 0b0111, 0b0110, 0b001>;
+def : DC<"ISW", 0b000, 0b0111, 0b0110, 0b010>;
+def : DC<"CVAC", 0b011, 0b0111, 0b1010, 0b001>;
+def : DC<"CSW", 0b000, 0b0111, 0b1010, 0b010>;
+def : DC<"CVAU", 0b011, 0b0111, 0b1011, 0b001>;
+def : DC<"CIVAC", 0b011, 0b0111, 0b1110, 0b001>;
+def : DC<"CISW", 0b000, 0b0111, 0b1110, 0b010>;
+
+let Requires = [{ {AArch64::HasV8_2aOps} }] in
+def : DC<"CVAP", 0b011, 0b0111, 0b1100, 0b001>;
+
+//===----------------------------------------------------------------------===//
+// IC (instruction cache maintenance) instruction options.
+//===----------------------------------------------------------------------===//
+
+class IC<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2,
+ bit needsreg> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<14> Encoding;
+ let Encoding{13-11} = op1;
+ let Encoding{10-7} = crn;
+ let Encoding{6-3} = crm;
+ let Encoding{2-0} = op2;
+ bit NeedsReg = needsreg;
+}
+
+def : IC<"IALLUIS", 0b000, 0b0111, 0b0001, 0b000, 0>;
+def : IC<"IALLU", 0b000, 0b0111, 0b0101, 0b000, 0>;
+def : IC<"IVAU", 0b011, 0b0111, 0b0101, 0b001, 1>;
+
+//===----------------------------------------------------------------------===//
+// ISB (instruction-fetch barrier) instruction options.
+//===----------------------------------------------------------------------===//
+
+class ISB<string name, bits<4> encoding> : SearchableTable{
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<4> Encoding;
+ let Encoding = encoding;
+}
+
+def : ISB<"sy", 0xf>;
+
+//===----------------------------------------------------------------------===//
+// TSB (Trace synchronization barrier) instruction options.
+//===----------------------------------------------------------------------===//
+
+class TSB<string name, bits<4> encoding> : SearchableTable{
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<4> Encoding;
+ let Encoding = encoding;
+
+ code Requires = [{ {AArch64::HasV8_4aOps} }];
+}
+
+def : TSB<"csync", 0>;
+
+//===----------------------------------------------------------------------===//
+// PRFM (prefetch) instruction options.
+//===----------------------------------------------------------------------===//
+
+class PRFM<string name, bits<5> encoding> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<5> Encoding;
+ let Encoding = encoding;
+}
+
+def : PRFM<"pldl1keep", 0x00>;
+def : PRFM<"pldl1strm", 0x01>;
+def : PRFM<"pldl2keep", 0x02>;
+def : PRFM<"pldl2strm", 0x03>;
+def : PRFM<"pldl3keep", 0x04>;
+def : PRFM<"pldl3strm", 0x05>;
+def : PRFM<"plil1keep", 0x08>;
+def : PRFM<"plil1strm", 0x09>;
+def : PRFM<"plil2keep", 0x0a>;
+def : PRFM<"plil2strm", 0x0b>;
+def : PRFM<"plil3keep", 0x0c>;
+def : PRFM<"plil3strm", 0x0d>;
+def : PRFM<"pstl1keep", 0x10>;
+def : PRFM<"pstl1strm", 0x11>;
+def : PRFM<"pstl2keep", 0x12>;
+def : PRFM<"pstl2strm", 0x13>;
+def : PRFM<"pstl3keep", 0x14>;
+def : PRFM<"pstl3strm", 0x15>;
+
+//===----------------------------------------------------------------------===//
+// SVE Prefetch instruction options.
+//===----------------------------------------------------------------------===//
+
+class SVEPRFM<string name, bits<4> encoding> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<4> Encoding;
+ let Encoding = encoding;
+ code Requires = [{ {} }];
+}
+
+let Requires = [{ {AArch64::FeatureSVE} }] in {
+def : SVEPRFM<"pldl1keep", 0x00>;
+def : SVEPRFM<"pldl1strm", 0x01>;
+def : SVEPRFM<"pldl2keep", 0x02>;
+def : SVEPRFM<"pldl2strm", 0x03>;
+def : SVEPRFM<"pldl3keep", 0x04>;
+def : SVEPRFM<"pldl3strm", 0x05>;
+def : SVEPRFM<"pstl1keep", 0x08>;
+def : SVEPRFM<"pstl1strm", 0x09>;
+def : SVEPRFM<"pstl2keep", 0x0a>;
+def : SVEPRFM<"pstl2strm", 0x0b>;
+def : SVEPRFM<"pstl3keep", 0x0c>;
+def : SVEPRFM<"pstl3strm", 0x0d>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Predicate patterns
+//===----------------------------------------------------------------------===//
+
+class SVEPREDPAT<string name, bits<5> encoding> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<5> Encoding;
+ let Encoding = encoding;
+}
+
+def : SVEPREDPAT<"pow2", 0x00>;
+def : SVEPREDPAT<"vl1", 0x01>;
+def : SVEPREDPAT<"vl2", 0x02>;
+def : SVEPREDPAT<"vl3", 0x03>;
+def : SVEPREDPAT<"vl4", 0x04>;
+def : SVEPREDPAT<"vl5", 0x05>;
+def : SVEPREDPAT<"vl6", 0x06>;
+def : SVEPREDPAT<"vl7", 0x07>;
+def : SVEPREDPAT<"vl8", 0x08>;
+def : SVEPREDPAT<"vl16", 0x09>;
+def : SVEPREDPAT<"vl32", 0x0a>;
+def : SVEPREDPAT<"vl64", 0x0b>;
+def : SVEPREDPAT<"vl128", 0x0c>;
+def : SVEPREDPAT<"vl256", 0x0d>;
+def : SVEPREDPAT<"mul4", 0x1d>;
+def : SVEPREDPAT<"mul3", 0x1e>;
+def : SVEPREDPAT<"all", 0x1f>;
+
+//===----------------------------------------------------------------------===//
+// Exact FP Immediates.
+//
+// These definitions are used to create a lookup table with FP Immediates that
+// is used for a few instructions that only accept a limited set of exact FP
+// immediates values.
+//===----------------------------------------------------------------------===//
+class ExactFPImm<string name, string repr, bits<4> enum > : SearchableTable {
+ let SearchableFields = ["Enum", "Repr"];
+ let EnumValueField = "Enum";
+
+ string Name = name;
+ bits<4> Enum = enum;
+ string Repr = repr;
+}
+
+def : ExactFPImm<"zero", "0.0", 0x0>;
+def : ExactFPImm<"half", "0.5", 0x1>;
+def : ExactFPImm<"one", "1.0", 0x2>;
+def : ExactFPImm<"two", "2.0", 0x3>;
+
+//===----------------------------------------------------------------------===//
+// PState instruction options.
+//===----------------------------------------------------------------------===//
+
+class PState<string name, bits<5> encoding> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<5> Encoding;
+ let Encoding = encoding;
+ code Requires = [{ {} }];
+}
+
+def : PState<"SPSel", 0b00101>;
+def : PState<"DAIFSet", 0b11110>;
+def : PState<"DAIFClr", 0b11111>;
+// v8.1a "Privileged Access Never" extension-specific PStates
+let Requires = [{ {AArch64::HasV8_1aOps} }] in
+def : PState<"PAN", 0b00100>;
+// v8.2a "User Access Override" extension-specific PStates
+let Requires = [{ {AArch64::HasV8_2aOps} }] in
+def : PState<"UAO", 0b00011>;
+// v8.4a timining insensitivity of data processing instructions
+let Requires = [{ {AArch64::HasV8_4aOps} }] in
+def : PState<"DIT", 0b11010>;
+
+//===----------------------------------------------------------------------===//
+// PSB instruction options.
+//===----------------------------------------------------------------------===//
+
+class PSB<string name, bits<5> encoding> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<5> Encoding;
+ let Encoding = encoding;
+}
+
+def : PSB<"csync", 0x11>;
+
+//===----------------------------------------------------------------------===//
+// TLBI (translation lookaside buffer invalidate) instruction options.
+//===----------------------------------------------------------------------===//
+
+class TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
+ bits<3> op2, bit needsreg = 1> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<14> Encoding;
+ let Encoding{13-11} = op1;
+ let Encoding{10-7} = crn;
+ let Encoding{6-3} = crm;
+ let Encoding{2-0} = op2;
+ bit NeedsReg = needsreg;
+ code Requires = [{ {} }];
+}
+
+def : TLBI<"IPAS2E1IS", 0b100, 0b1000, 0b0000, 0b001>;
+def : TLBI<"IPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b101>;
+def : TLBI<"VMALLE1IS", 0b000, 0b1000, 0b0011, 0b000, 0>;
+def : TLBI<"ALLE2IS", 0b100, 0b1000, 0b0011, 0b000, 0>;
+def : TLBI<"ALLE3IS", 0b110, 0b1000, 0b0011, 0b000, 0>;
+def : TLBI<"VAE1IS", 0b000, 0b1000, 0b0011, 0b001>;
+def : TLBI<"VAE2IS", 0b100, 0b1000, 0b0011, 0b001>;
+def : TLBI<"VAE3IS", 0b110, 0b1000, 0b0011, 0b001>;
+def : TLBI<"ASIDE1IS", 0b000, 0b1000, 0b0011, 0b010>;
+def : TLBI<"VAAE1IS", 0b000, 0b1000, 0b0011, 0b011>;
+def : TLBI<"ALLE1IS", 0b100, 0b1000, 0b0011, 0b100, 0>;
+def : TLBI<"VALE1IS", 0b000, 0b1000, 0b0011, 0b101>;
+def : TLBI<"VALE2IS", 0b100, 0b1000, 0b0011, 0b101>;
+def : TLBI<"VALE3IS", 0b110, 0b1000, 0b0011, 0b101>;
+def : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>;
+def : TLBI<"VAALE1IS", 0b000, 0b1000, 0b0011, 0b111>;
+def : TLBI<"IPAS2E1", 0b100, 0b1000, 0b0100, 0b001>;
+def : TLBI<"IPAS2LE1", 0b100, 0b1000, 0b0100, 0b101>;
+def : TLBI<"VMALLE1", 0b000, 0b1000, 0b0111, 0b000, 0>;
+def : TLBI<"ALLE2", 0b100, 0b1000, 0b0111, 0b000, 0>;
+def : TLBI<"ALLE3", 0b110, 0b1000, 0b0111, 0b000, 0>;
+def : TLBI<"VAE1", 0b000, 0b1000, 0b0111, 0b001>;
+def : TLBI<"VAE2", 0b100, 0b1000, 0b0111, 0b001>;
+def : TLBI<"VAE3", 0b110, 0b1000, 0b0111, 0b001>;
+def : TLBI<"ASIDE1", 0b000, 0b1000, 0b0111, 0b010>;
+def : TLBI<"VAAE1", 0b000, 0b1000, 0b0111, 0b011>;
+def : TLBI<"ALLE1", 0b100, 0b1000, 0b0111, 0b100, 0>;
+def : TLBI<"VALE1", 0b000, 0b1000, 0b0111, 0b101>;
+def : TLBI<"VALE2", 0b100, 0b1000, 0b0111, 0b101>;
+def : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>;
+def : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>;
+def : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>;
+
+// Armv8.4-A Outer Sharable TLB Maintenance instructions:
+let Requires = [{ {AArch64::HasV8_4aOps} }] in {
+// op1 CRn CRm op2
+def : TLBI<"VMALLE1OS", 0b000, 0b1000, 0b0001, 0b000, 0>;
+def : TLBI<"VAE1OS", 0b000, 0b1000, 0b0001, 0b001>;
+def : TLBI<"ASIDE1OS", 0b000, 0b1000, 0b0001, 0b010>;
+def : TLBI<"VAAE1OS", 0b000, 0b1000, 0b0001, 0b011>;
+def : TLBI<"VALE1OS", 0b000, 0b1000, 0b0001, 0b101>;
+def : TLBI<"VAALE1OS", 0b000, 0b1000, 0b0001, 0b111>;
+def : TLBI<"IPAS2E1OS", 0b100, 0b1000, 0b0100, 0b000>;
+def : TLBI<"IPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b100>;
+def : TLBI<"VAE2OS", 0b100, 0b1000, 0b0001, 0b001>;
+def : TLBI<"VALE2OS", 0b100, 0b1000, 0b0001, 0b101>;
+def : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>;
+def : TLBI<"VAE3OS", 0b110, 0b1000, 0b0001, 0b001>;
+def : TLBI<"VALE3OS", 0b110, 0b1000, 0b0001, 0b101>;
+def : TLBI<"ALLE2OS", 0b100, 0b1000, 0b0001, 0b000, 0>;
+def : TLBI<"ALLE1OS", 0b100, 0b1000, 0b0001, 0b100, 0>;
+def : TLBI<"ALLE3OS", 0b110, 0b1000, 0b0001, 0b000, 0>;
+
+// Armv8.4-A TLB Range Maintenance instructions:
+// op1 CRn CRm op2
+def : TLBI<"RVAE1", 0b000, 0b1000, 0b0110, 0b001>;
+def : TLBI<"RVAAE1", 0b000, 0b1000, 0b0110, 0b011>;
+def : TLBI<"RVALE1", 0b000, 0b1000, 0b0110, 0b101>;
+def : TLBI<"RVAALE1", 0b000, 0b1000, 0b0110, 0b111>;
+def : TLBI<"RVAE1IS", 0b000, 0b1000, 0b0010, 0b001>;
+def : TLBI<"RVAAE1IS", 0b000, 0b1000, 0b0010, 0b011>;
+def : TLBI<"RVALE1IS", 0b000, 0b1000, 0b0010, 0b101>;
+def : TLBI<"RVAALE1IS", 0b000, 0b1000, 0b0010, 0b111>;
+def : TLBI<"RVAE1OS", 0b000, 0b1000, 0b0101, 0b001>;
+def : TLBI<"RVAAE1OS", 0b000, 0b1000, 0b0101, 0b011>;
+def : TLBI<"RVALE1OS", 0b000, 0b1000, 0b0101, 0b101>;
+def : TLBI<"RVAALE1OS", 0b000, 0b1000, 0b0101, 0b111>;
+def : TLBI<"RIPAS2E1IS", 0b100, 0b1000, 0b0000, 0b010>;
+def : TLBI<"RIPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b110>;
+def : TLBI<"RIPAS2E1", 0b100, 0b1000, 0b0100, 0b010>;
+def : TLBI<"RIPAS2LE1", 0b100, 0b1000, 0b0100, 0b110>;
+def : TLBI<"RIPAS2E1OS", 0b100, 0b1000, 0b0100, 0b011>;
+def : TLBI<"RIPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b111>;
+def : TLBI<"RVAE2", 0b100, 0b1000, 0b0110, 0b001>;
+def : TLBI<"RVALE2", 0b100, 0b1000, 0b0110, 0b101>;
+def : TLBI<"RVAE2IS", 0b100, 0b1000, 0b0010, 0b001>;
+def : TLBI<"RVALE2IS", 0b100, 0b1000, 0b0010, 0b101>;
+def : TLBI<"RVAE2OS", 0b100, 0b1000, 0b0101, 0b001>;
+def : TLBI<"RVALE2OS", 0b100, 0b1000, 0b0101, 0b101>;
+def : TLBI<"RVAE3", 0b110, 0b1000, 0b0110, 0b001>;
+def : TLBI<"RVALE3", 0b110, 0b1000, 0b0110, 0b101>;
+def : TLBI<"RVAE3IS", 0b110, 0b1000, 0b0010, 0b001>;
+def : TLBI<"RVALE3IS", 0b110, 0b1000, 0b0010, 0b101>;
+def : TLBI<"RVAE3OS", 0b110, 0b1000, 0b0101, 0b001>;
+def : TLBI<"RVALE3OS", 0b110, 0b1000, 0b0101, 0b101>;
+}
+
+//===----------------------------------------------------------------------===//
+// MRS/MSR (system register read/write) instruction options.
+//===----------------------------------------------------------------------===//
+
+class SysReg<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
+ bits<3> op2> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<16> Encoding;
+ let Encoding{15-14} = op0;
+ let Encoding{13-11} = op1;
+ let Encoding{10-7} = crn;
+ let Encoding{6-3} = crm;
+ let Encoding{2-0} = op2;
+ bit Readable = ?;
+ bit Writeable = ?;
+ code Requires = [{ {} }];
+}
+
+class RWSysReg<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
+ bits<3> op2>
+ : SysReg<name, op0, op1, crn, crm, op2> {
+ let Readable = 1;
+ let Writeable = 1;
+}
+
+class ROSysReg<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
+ bits<3> op2>
+ : SysReg<name, op0, op1, crn, crm, op2> {
+ let Readable = 1;
+ let Writeable = 0;
+}
+
+class WOSysReg<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
+ bits<3> op2>
+ : SysReg<name, op0, op1, crn, crm, op2> {
+ let Readable = 0;
+ let Writeable = 1;
+}
+
+//===----------------------
+// Read-only regs
+//===----------------------
+
+// Op0 Op1 CRn CRm Op2
+def : ROSysReg<"MDCCSR_EL0", 0b10, 0b011, 0b0000, 0b0001, 0b000>;
+def : ROSysReg<"DBGDTRRX_EL0", 0b10, 0b011, 0b0000, 0b0101, 0b000>;
+def : ROSysReg<"MDRAR_EL1", 0b10, 0b000, 0b0001, 0b0000, 0b000>;
+def : ROSysReg<"OSLSR_EL1", 0b10, 0b000, 0b0001, 0b0001, 0b100>;
+def : ROSysReg<"DBGAUTHSTATUS_EL1", 0b10, 0b000, 0b0111, 0b1110, 0b110>;
+def : ROSysReg<"PMCEID0_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b110>;
+def : ROSysReg<"PMCEID1_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b111>;
+def : ROSysReg<"MIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b000>;
+def : ROSysReg<"CCSIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b000>;
+def : ROSysReg<"CCSIDR2_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b010> {
+ let Requires = [{ {AArch64::HasV8_3aOps} }];
+}
+def : ROSysReg<"CLIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b001>;
+def : ROSysReg<"CTR_EL0", 0b11, 0b011, 0b0000, 0b0000, 0b001>;
+def : ROSysReg<"MPIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b101>;
+def : ROSysReg<"REVIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b110>;
+def : ROSysReg<"AIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b111>;
+def : ROSysReg<"DCZID_EL0", 0b11, 0b011, 0b0000, 0b0000, 0b111>;
+def : ROSysReg<"ID_PFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b000>;
+def : ROSysReg<"ID_PFR1_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b001>;
+def : ROSysReg<"ID_DFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b010>;
+def : ROSysReg<"ID_AFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b011>;
+def : ROSysReg<"ID_MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b100>;
+def : ROSysReg<"ID_MMFR1_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b101>;
+def : ROSysReg<"ID_MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b110>;
+def : ROSysReg<"ID_MMFR3_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b111>;
+def : ROSysReg<"ID_ISAR0_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b000>;
+def : ROSysReg<"ID_ISAR1_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b001>;
+def : ROSysReg<"ID_ISAR2_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b010>;
+def : ROSysReg<"ID_ISAR3_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b011>;
+def : ROSysReg<"ID_ISAR4_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b100>;
+def : ROSysReg<"ID_ISAR5_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b101>;
+def : ROSysReg<"ID_ISAR6_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b111> {
+ let Requires = [{ {AArch64::HasV8_2aOps} }];
+}
+def : ROSysReg<"ID_AA64PFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b000>;
+def : ROSysReg<"ID_AA64PFR1_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b001>;
+def : ROSysReg<"ID_AA64DFR0_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b000>;
+def : ROSysReg<"ID_AA64DFR1_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b001>;
+def : ROSysReg<"ID_AA64AFR0_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b100>;
+def : ROSysReg<"ID_AA64AFR1_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b101>;
+def : ROSysReg<"ID_AA64ISAR0_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b000>;
+def : ROSysReg<"ID_AA64ISAR1_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b001>;
+def : ROSysReg<"ID_AA64MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b000>;
+def : ROSysReg<"ID_AA64MMFR1_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b001>;
+def : ROSysReg<"ID_AA64MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b010> {
+ let Requires = [{ {AArch64::HasV8_2aOps} }];
+}
+def : ROSysReg<"MVFR0_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b000>;
+def : ROSysReg<"MVFR1_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b001>;
+def : ROSysReg<"MVFR2_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b010>;
+def : ROSysReg<"RVBAR_EL1", 0b11, 0b000, 0b1100, 0b0000, 0b001>;
+def : ROSysReg<"RVBAR_EL2", 0b11, 0b100, 0b1100, 0b0000, 0b001>;
+def : ROSysReg<"RVBAR_EL3", 0b11, 0b110, 0b1100, 0b0000, 0b001>;
+def : ROSysReg<"ISR_EL1", 0b11, 0b000, 0b1100, 0b0001, 0b000>;
+def : ROSysReg<"CNTPCT_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b001>;
+def : ROSysReg<"CNTVCT_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b010>;
+def : ROSysReg<"ID_MMFR4_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b110>;
+
+// Trace registers
+// Op0 Op1 CRn CRm Op2
+def : ROSysReg<"TRCSTATR", 0b10, 0b001, 0b0000, 0b0011, 0b000>;
+def : ROSysReg<"TRCIDR8", 0b10, 0b001, 0b0000, 0b0000, 0b110>;
+def : ROSysReg<"TRCIDR9", 0b10, 0b001, 0b0000, 0b0001, 0b110>;
+def : ROSysReg<"TRCIDR10", 0b10, 0b001, 0b0000, 0b0010, 0b110>;
+def : ROSysReg<"TRCIDR11", 0b10, 0b001, 0b0000, 0b0011, 0b110>;
+def : ROSysReg<"TRCIDR12", 0b10, 0b001, 0b0000, 0b0100, 0b110>;
+def : ROSysReg<"TRCIDR13", 0b10, 0b001, 0b0000, 0b0101, 0b110>;
+def : ROSysReg<"TRCIDR0", 0b10, 0b001, 0b0000, 0b1000, 0b111>;
+def : ROSysReg<"TRCIDR1", 0b10, 0b001, 0b0000, 0b1001, 0b111>;
+def : ROSysReg<"TRCIDR2", 0b10, 0b001, 0b0000, 0b1010, 0b111>;
+def : ROSysReg<"TRCIDR3", 0b10, 0b001, 0b0000, 0b1011, 0b111>;
+def : ROSysReg<"TRCIDR4", 0b10, 0b001, 0b0000, 0b1100, 0b111>;
+def : ROSysReg<"TRCIDR5", 0b10, 0b001, 0b0000, 0b1101, 0b111>;
+def : ROSysReg<"TRCIDR6", 0b10, 0b001, 0b0000, 0b1110, 0b111>;
+def : ROSysReg<"TRCIDR7", 0b10, 0b001, 0b0000, 0b1111, 0b111>;
+def : ROSysReg<"TRCOSLSR", 0b10, 0b001, 0b0001, 0b0001, 0b100>;
+def : ROSysReg<"TRCPDSR", 0b10, 0b001, 0b0001, 0b0101, 0b100>;
+def : ROSysReg<"TRCDEVAFF0", 0b10, 0b001, 0b0111, 0b1010, 0b110>;
+def : ROSysReg<"TRCDEVAFF1", 0b10, 0b001, 0b0111, 0b1011, 0b110>;
+def : ROSysReg<"TRCLSR", 0b10, 0b001, 0b0111, 0b1101, 0b110>;
+def : ROSysReg<"TRCAUTHSTATUS", 0b10, 0b001, 0b0111, 0b1110, 0b110>;
+def : ROSysReg<"TRCDEVARCH", 0b10, 0b001, 0b0111, 0b1111, 0b110>;
+def : ROSysReg<"TRCDEVID", 0b10, 0b001, 0b0111, 0b0010, 0b111>;
+def : ROSysReg<"TRCDEVTYPE", 0b10, 0b001, 0b0111, 0b0011, 0b111>;
+def : ROSysReg<"TRCPIDR4", 0b10, 0b001, 0b0111, 0b0100, 0b111>;
+def : ROSysReg<"TRCPIDR5", 0b10, 0b001, 0b0111, 0b0101, 0b111>;
+def : ROSysReg<"TRCPIDR6", 0b10, 0b001, 0b0111, 0b0110, 0b111>;
+def : ROSysReg<"TRCPIDR7", 0b10, 0b001, 0b0111, 0b0111, 0b111>;
+def : ROSysReg<"TRCPIDR0", 0b10, 0b001, 0b0111, 0b1000, 0b111>;
+def : ROSysReg<"TRCPIDR1", 0b10, 0b001, 0b0111, 0b1001, 0b111>;
+def : ROSysReg<"TRCPIDR2", 0b10, 0b001, 0b0111, 0b1010, 0b111>;
+def : ROSysReg<"TRCPIDR3", 0b10, 0b001, 0b0111, 0b1011, 0b111>;
+def : ROSysReg<"TRCCIDR0", 0b10, 0b001, 0b0111, 0b1100, 0b111>;
+def : ROSysReg<"TRCCIDR1", 0b10, 0b001, 0b0111, 0b1101, 0b111>;
+def : ROSysReg<"TRCCIDR2", 0b10, 0b001, 0b0111, 0b1110, 0b111>;
+def : ROSysReg<"TRCCIDR3", 0b10, 0b001, 0b0111, 0b1111, 0b111>;
+
+// GICv3 registers
+// Op0 Op1 CRn CRm Op2
+def : ROSysReg<"ICC_IAR1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b000>;
+def : ROSysReg<"ICC_IAR0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b000>;
+def : ROSysReg<"ICC_HPPIR1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b010>;
+def : ROSysReg<"ICC_HPPIR0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b010>;
+def : ROSysReg<"ICC_RPR_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b011>;
+def : ROSysReg<"ICH_VTR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b001>;
+def : ROSysReg<"ICH_EISR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b011>;
+def : ROSysReg<"ICH_ELRSR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b101>;
+
+// SVE control registers
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureSVE} }] in {
+def : ROSysReg<"ID_AA64ZFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b100>;
+}
+
+// v8.1a "Limited Ordering Regions" extension-specific system register
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::HasV8_1aOps} }] in
+def : ROSysReg<"LORID_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b111>;
+
+// v8.2a "RAS extension" registers
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureRAS} }] in {
+def : ROSysReg<"ERRIDR_EL1", 0b11, 0b000, 0b0101, 0b0011, 0b000>;
+def : ROSysReg<"ERXFR_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b000>;
+}
+
+//===----------------------
+// Write-only regs
+//===----------------------
+
+// Op0 Op1 CRn CRm Op2
+def : WOSysReg<"DBGDTRTX_EL0", 0b10, 0b011, 0b0000, 0b0101, 0b000>;
+def : WOSysReg<"OSLAR_EL1", 0b10, 0b000, 0b0001, 0b0000, 0b100>;
+def : WOSysReg<"PMSWINC_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b100>;
+
+// Trace Registers
+// Op0 Op1 CRn CRm Op2
+def : WOSysReg<"TRCOSLAR", 0b10, 0b001, 0b0001, 0b0000, 0b100>;
+def : WOSysReg<"TRCLAR", 0b10, 0b001, 0b0111, 0b1100, 0b110>;
+
+// GICv3 registers
+// Op0 Op1 CRn CRm Op2
+def : WOSysReg<"ICC_EOIR1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b001>;
+def : WOSysReg<"ICC_EOIR0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b001>;
+def : WOSysReg<"ICC_DIR_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b001>;
+def : WOSysReg<"ICC_SGI1R_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b101>;
+def : WOSysReg<"ICC_ASGI1R_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b110>;
+def : WOSysReg<"ICC_SGI0R_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b111>;
+
+//===----------------------
+// Read-write regs
+//===----------------------
+
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"OSDTRRX_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b010>;
+def : RWSysReg<"OSDTRTX_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b010>;
+def : RWSysReg<"TEECR32_EL1", 0b10, 0b010, 0b0000, 0b0000, 0b000>;
+def : RWSysReg<"MDCCINT_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b000>;
+def : RWSysReg<"MDSCR_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b010>;
+def : RWSysReg<"DBGDTR_EL0", 0b10, 0b011, 0b0000, 0b0100, 0b000>;
+def : RWSysReg<"OSECCR_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b010>;
+def : RWSysReg<"DBGVCR32_EL2", 0b10, 0b100, 0b0000, 0b0111, 0b000>;
+def : RWSysReg<"DBGBVR0_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b100>;
+def : RWSysReg<"DBGBVR1_EL1", 0b10, 0b000, 0b0000, 0b0001, 0b100>;
+def : RWSysReg<"DBGBVR2_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b100>;
+def : RWSysReg<"DBGBVR3_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b100>;
+def : RWSysReg<"DBGBVR4_EL1", 0b10, 0b000, 0b0000, 0b0100, 0b100>;
+def : RWSysReg<"DBGBVR5_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b100>;
+def : RWSysReg<"DBGBVR6_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b100>;
+def : RWSysReg<"DBGBVR7_EL1", 0b10, 0b000, 0b0000, 0b0111, 0b100>;
+def : RWSysReg<"DBGBVR8_EL1", 0b10, 0b000, 0b0000, 0b1000, 0b100>;
+def : RWSysReg<"DBGBVR9_EL1", 0b10, 0b000, 0b0000, 0b1001, 0b100>;
+def : RWSysReg<"DBGBVR10_EL1", 0b10, 0b000, 0b0000, 0b1010, 0b100>;
+def : RWSysReg<"DBGBVR11_EL1", 0b10, 0b000, 0b0000, 0b1011, 0b100>;
+def : RWSysReg<"DBGBVR12_EL1", 0b10, 0b000, 0b0000, 0b1100, 0b100>;
+def : RWSysReg<"DBGBVR13_EL1", 0b10, 0b000, 0b0000, 0b1101, 0b100>;
+def : RWSysReg<"DBGBVR14_EL1", 0b10, 0b000, 0b0000, 0b1110, 0b100>;
+def : RWSysReg<"DBGBVR15_EL1", 0b10, 0b000, 0b0000, 0b1111, 0b100>;
+def : RWSysReg<"DBGBCR0_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b101>;
+def : RWSysReg<"DBGBCR1_EL1", 0b10, 0b000, 0b0000, 0b0001, 0b101>;
+def : RWSysReg<"DBGBCR2_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b101>;
+def : RWSysReg<"DBGBCR3_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b101>;
+def : RWSysReg<"DBGBCR4_EL1", 0b10, 0b000, 0b0000, 0b0100, 0b101>;
+def : RWSysReg<"DBGBCR5_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b101>;
+def : RWSysReg<"DBGBCR6_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b101>;
+def : RWSysReg<"DBGBCR7_EL1", 0b10, 0b000, 0b0000, 0b0111, 0b101>;
+def : RWSysReg<"DBGBCR8_EL1", 0b10, 0b000, 0b0000, 0b1000, 0b101>;
+def : RWSysReg<"DBGBCR9_EL1", 0b10, 0b000, 0b0000, 0b1001, 0b101>;
+def : RWSysReg<"DBGBCR10_EL1", 0b10, 0b000, 0b0000, 0b1010, 0b101>;
+def : RWSysReg<"DBGBCR11_EL1", 0b10, 0b000, 0b0000, 0b1011, 0b101>;
+def : RWSysReg<"DBGBCR12_EL1", 0b10, 0b000, 0b0000, 0b1100, 0b101>;
+def : RWSysReg<"DBGBCR13_EL1", 0b10, 0b000, 0b0000, 0b1101, 0b101>;
+def : RWSysReg<"DBGBCR14_EL1", 0b10, 0b000, 0b0000, 0b1110, 0b101>;
+def : RWSysReg<"DBGBCR15_EL1", 0b10, 0b000, 0b0000, 0b1111, 0b101>;
+def : RWSysReg<"DBGWVR0_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b110>;
+def : RWSysReg<"DBGWVR1_EL1", 0b10, 0b000, 0b0000, 0b0001, 0b110>;
+def : RWSysReg<"DBGWVR2_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b110>;
+def : RWSysReg<"DBGWVR3_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b110>;
+def : RWSysReg<"DBGWVR4_EL1", 0b10, 0b000, 0b0000, 0b0100, 0b110>;
+def : RWSysReg<"DBGWVR5_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b110>;
+def : RWSysReg<"DBGWVR6_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b110>;
+def : RWSysReg<"DBGWVR7_EL1", 0b10, 0b000, 0b0000, 0b0111, 0b110>;
+def : RWSysReg<"DBGWVR8_EL1", 0b10, 0b000, 0b0000, 0b1000, 0b110>;
+def : RWSysReg<"DBGWVR9_EL1", 0b10, 0b000, 0b0000, 0b1001, 0b110>;
+def : RWSysReg<"DBGWVR10_EL1", 0b10, 0b000, 0b0000, 0b1010, 0b110>;
+def : RWSysReg<"DBGWVR11_EL1", 0b10, 0b000, 0b0000, 0b1011, 0b110>;
+def : RWSysReg<"DBGWVR12_EL1", 0b10, 0b000, 0b0000, 0b1100, 0b110>;
+def : RWSysReg<"DBGWVR13_EL1", 0b10, 0b000, 0b0000, 0b1101, 0b110>;
+def : RWSysReg<"DBGWVR14_EL1", 0b10, 0b000, 0b0000, 0b1110, 0b110>;
+def : RWSysReg<"DBGWVR15_EL1", 0b10, 0b000, 0b0000, 0b1111, 0b110>;
+def : RWSysReg<"DBGWCR0_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b111>;
+def : RWSysReg<"DBGWCR1_EL1", 0b10, 0b000, 0b0000, 0b0001, 0b111>;
+def : RWSysReg<"DBGWCR2_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b111>;
+def : RWSysReg<"DBGWCR3_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b111>;
+def : RWSysReg<"DBGWCR4_EL1", 0b10, 0b000, 0b0000, 0b0100, 0b111>;
+def : RWSysReg<"DBGWCR5_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b111>;
+def : RWSysReg<"DBGWCR6_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b111>;
+def : RWSysReg<"DBGWCR7_EL1", 0b10, 0b000, 0b0000, 0b0111, 0b111>;
+def : RWSysReg<"DBGWCR8_EL1", 0b10, 0b000, 0b0000, 0b1000, 0b111>;
+def : RWSysReg<"DBGWCR9_EL1", 0b10, 0b000, 0b0000, 0b1001, 0b111>;
+def : RWSysReg<"DBGWCR10_EL1", 0b10, 0b000, 0b0000, 0b1010, 0b111>;
+def : RWSysReg<"DBGWCR11_EL1", 0b10, 0b000, 0b0000, 0b1011, 0b111>;
+def : RWSysReg<"DBGWCR12_EL1", 0b10, 0b000, 0b0000, 0b1100, 0b111>;
+def : RWSysReg<"DBGWCR13_EL1", 0b10, 0b000, 0b0000, 0b1101, 0b111>;
+def : RWSysReg<"DBGWCR14_EL1", 0b10, 0b000, 0b0000, 0b1110, 0b111>;
+def : RWSysReg<"DBGWCR15_EL1", 0b10, 0b000, 0b0000, 0b1111, 0b111>;
+def : RWSysReg<"TEEHBR32_EL1", 0b10, 0b010, 0b0001, 0b0000, 0b000>;
+def : RWSysReg<"OSDLR_EL1", 0b10, 0b000, 0b0001, 0b0011, 0b100>;
+def : RWSysReg<"DBGPRCR_EL1", 0b10, 0b000, 0b0001, 0b0100, 0b100>;
+def : RWSysReg<"DBGCLAIMSET_EL1", 0b10, 0b000, 0b0111, 0b1000, 0b110>;
+def : RWSysReg<"DBGCLAIMCLR_EL1", 0b10, 0b000, 0b0111, 0b1001, 0b110>;
+def : RWSysReg<"CSSELR_EL1", 0b11, 0b010, 0b0000, 0b0000, 0b000>;
+def : RWSysReg<"VPIDR_EL2", 0b11, 0b100, 0b0000, 0b0000, 0b000>;
+def : RWSysReg<"VMPIDR_EL2", 0b11, 0b100, 0b0000, 0b0000, 0b101>;
+def : RWSysReg<"CPACR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b010>;
+def : RWSysReg<"SCTLR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b000>;
+def : RWSysReg<"SCTLR_EL2", 0b11, 0b100, 0b0001, 0b0000, 0b000>;
+def : RWSysReg<"SCTLR_EL3", 0b11, 0b110, 0b0001, 0b0000, 0b000>;
+def : RWSysReg<"ACTLR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b001>;
+def : RWSysReg<"ACTLR_EL2", 0b11, 0b100, 0b0001, 0b0000, 0b001>;
+def : RWSysReg<"ACTLR_EL3", 0b11, 0b110, 0b0001, 0b0000, 0b001>;
+def : RWSysReg<"HCR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b000>;
+def : RWSysReg<"SCR_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b000>;
+def : RWSysReg<"MDCR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b001>;
+def : RWSysReg<"SDER32_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b001>;
+def : RWSysReg<"CPTR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b010>;
+def : RWSysReg<"CPTR_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b010>;
+def : RWSysReg<"HSTR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b011>;
+def : RWSysReg<"HACR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b111>;
+def : RWSysReg<"MDCR_EL3", 0b11, 0b110, 0b0001, 0b0011, 0b001>;
+def : RWSysReg<"TTBR0_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b000>;
+def : RWSysReg<"TTBR0_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000>;
+def : RWSysReg<"TTBR0_EL3", 0b11, 0b110, 0b0010, 0b0000, 0b000>;
+def : RWSysReg<"TTBR1_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b001>;
+def : RWSysReg<"TCR_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b010>;
+def : RWSysReg<"TCR_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b010>;
+def : RWSysReg<"TCR_EL3", 0b11, 0b110, 0b0010, 0b0000, 0b010>;
+def : RWSysReg<"VTTBR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b000>;
+def : RWSysReg<"VTCR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b010>;
+def : RWSysReg<"DACR32_EL2", 0b11, 0b100, 0b0011, 0b0000, 0b000>;
+def : RWSysReg<"SPSR_EL1", 0b11, 0b000, 0b0100, 0b0000, 0b000>;
+def : RWSysReg<"SPSR_EL2", 0b11, 0b100, 0b0100, 0b0000, 0b000>;
+def : RWSysReg<"SPSR_EL3", 0b11, 0b110, 0b0100, 0b0000, 0b000>;
+def : RWSysReg<"ELR_EL1", 0b11, 0b000, 0b0100, 0b0000, 0b001>;
+def : RWSysReg<"ELR_EL2", 0b11, 0b100, 0b0100, 0b0000, 0b001>;
+def : RWSysReg<"ELR_EL3", 0b11, 0b110, 0b0100, 0b0000, 0b001>;
+def : RWSysReg<"SP_EL0", 0b11, 0b000, 0b0100, 0b0001, 0b000>;
+def : RWSysReg<"SP_EL1", 0b11, 0b100, 0b0100, 0b0001, 0b000>;
+def : RWSysReg<"SP_EL2", 0b11, 0b110, 0b0100, 0b0001, 0b000>;
+def : RWSysReg<"SPSel", 0b11, 0b000, 0b0100, 0b0010, 0b000>;
+def : RWSysReg<"NZCV", 0b11, 0b011, 0b0100, 0b0010, 0b000>;
+def : RWSysReg<"DAIF", 0b11, 0b011, 0b0100, 0b0010, 0b001>;
+def : RWSysReg<"CurrentEL", 0b11, 0b000, 0b0100, 0b0010, 0b010>;
+def : RWSysReg<"SPSR_irq", 0b11, 0b100, 0b0100, 0b0011, 0b000>;
+def : RWSysReg<"SPSR_abt", 0b11, 0b100, 0b0100, 0b0011, 0b001>;
+def : RWSysReg<"SPSR_und", 0b11, 0b100, 0b0100, 0b0011, 0b010>;
+def : RWSysReg<"SPSR_fiq", 0b11, 0b100, 0b0100, 0b0011, 0b011>;
+def : RWSysReg<"FPCR", 0b11, 0b011, 0b0100, 0b0100, 0b000>;
+def : RWSysReg<"FPSR", 0b11, 0b011, 0b0100, 0b0100, 0b001>;
+def : RWSysReg<"DSPSR_EL0", 0b11, 0b011, 0b0100, 0b0101, 0b000>;
+def : RWSysReg<"DLR_EL0", 0b11, 0b011, 0b0100, 0b0101, 0b001>;
+def : RWSysReg<"IFSR32_EL2", 0b11, 0b100, 0b0101, 0b0000, 0b001>;
+def : RWSysReg<"AFSR0_EL1", 0b11, 0b000, 0b0101, 0b0001, 0b000>;
+def : RWSysReg<"AFSR0_EL2", 0b11, 0b100, 0b0101, 0b0001, 0b000>;
+def : RWSysReg<"AFSR0_EL3", 0b11, 0b110, 0b0101, 0b0001, 0b000>;
+def : RWSysReg<"AFSR1_EL1", 0b11, 0b000, 0b0101, 0b0001, 0b001>;
+def : RWSysReg<"AFSR1_EL2", 0b11, 0b100, 0b0101, 0b0001, 0b001>;
+def : RWSysReg<"AFSR1_EL3", 0b11, 0b110, 0b0101, 0b0001, 0b001>;
+def : RWSysReg<"ESR_EL1", 0b11, 0b000, 0b0101, 0b0010, 0b000>;
+def : RWSysReg<"ESR_EL2", 0b11, 0b100, 0b0101, 0b0010, 0b000>;
+def : RWSysReg<"ESR_EL3", 0b11, 0b110, 0b0101, 0b0010, 0b000>;
+def : RWSysReg<"FPEXC32_EL2", 0b11, 0b100, 0b0101, 0b0011, 0b000>;
+def : RWSysReg<"FAR_EL1", 0b11, 0b000, 0b0110, 0b0000, 0b000>;
+def : RWSysReg<"FAR_EL2", 0b11, 0b100, 0b0110, 0b0000, 0b000>;
+def : RWSysReg<"FAR_EL3", 0b11, 0b110, 0b0110, 0b0000, 0b000>;
+def : RWSysReg<"HPFAR_EL2", 0b11, 0b100, 0b0110, 0b0000, 0b100>;
+def : RWSysReg<"PAR_EL1", 0b11, 0b000, 0b0111, 0b0100, 0b000>;
+def : RWSysReg<"PMCR_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b000>;
+def : RWSysReg<"PMCNTENSET_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b001>;
+def : RWSysReg<"PMCNTENCLR_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b010>;
+def : RWSysReg<"PMOVSCLR_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b011>;
+def : RWSysReg<"PMSELR_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b101>;
+def : RWSysReg<"PMCCNTR_EL0", 0b11, 0b011, 0b1001, 0b1101, 0b000>;
+def : RWSysReg<"PMXEVTYPER_EL0", 0b11, 0b011, 0b1001, 0b1101, 0b001>;
+def : RWSysReg<"PMXEVCNTR_EL0", 0b11, 0b011, 0b1001, 0b1101, 0b010>;
+def : RWSysReg<"PMUSERENR_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b000>;
+def : RWSysReg<"PMINTENSET_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b001>;
+def : RWSysReg<"PMINTENCLR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b010>;
+def : RWSysReg<"PMOVSSET_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b011>;
+def : RWSysReg<"MAIR_EL1", 0b11, 0b000, 0b1010, 0b0010, 0b000>;
+def : RWSysReg<"MAIR_EL2", 0b11, 0b100, 0b1010, 0b0010, 0b000>;
+def : RWSysReg<"MAIR_EL3", 0b11, 0b110, 0b1010, 0b0010, 0b000>;
+def : RWSysReg<"AMAIR_EL1", 0b11, 0b000, 0b1010, 0b0011, 0b000>;
+def : RWSysReg<"AMAIR_EL2", 0b11, 0b100, 0b1010, 0b0011, 0b000>;
+def : RWSysReg<"AMAIR_EL3", 0b11, 0b110, 0b1010, 0b0011, 0b000>;
+def : RWSysReg<"VBAR_EL1", 0b11, 0b000, 0b1100, 0b0000, 0b000>;
+def : RWSysReg<"VBAR_EL2", 0b11, 0b100, 0b1100, 0b0000, 0b000>;
+def : RWSysReg<"VBAR_EL3", 0b11, 0b110, 0b1100, 0b0000, 0b000>;
+def : RWSysReg<"RMR_EL1", 0b11, 0b000, 0b1100, 0b0000, 0b010>;
+def : RWSysReg<"RMR_EL2", 0b11, 0b100, 0b1100, 0b0000, 0b010>;
+def : RWSysReg<"RMR_EL3", 0b11, 0b110, 0b1100, 0b0000, 0b010>;
+def : RWSysReg<"CONTEXTIDR_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b001>;
+def : RWSysReg<"TPIDR_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b010>;
+def : RWSysReg<"TPIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b010>;
+def : RWSysReg<"TPIDR_EL3", 0b11, 0b110, 0b1101, 0b0000, 0b010>;
+def : RWSysReg<"TPIDRRO_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b011>;
+def : RWSysReg<"TPIDR_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b100>;
+def : RWSysReg<"CNTFRQ_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b000>;
+def : RWSysReg<"CNTVOFF_EL2", 0b11, 0b100, 0b1110, 0b0000, 0b011>;
+def : RWSysReg<"CNTKCTL_EL1", 0b11, 0b000, 0b1110, 0b0001, 0b000>;
+def : RWSysReg<"CNTHCTL_EL2", 0b11, 0b100, 0b1110, 0b0001, 0b000>;
+def : RWSysReg<"CNTP_TVAL_EL0", 0b11, 0b011, 0b1110, 0b0010, 0b000>;
+def : RWSysReg<"CNTHP_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0010, 0b000>;
+def : RWSysReg<"CNTPS_TVAL_EL1", 0b11, 0b111, 0b1110, 0b0010, 0b000>;
+def : RWSysReg<"CNTP_CTL_EL0", 0b11, 0b011, 0b1110, 0b0010, 0b001>;
+def : RWSysReg<"CNTHP_CTL_EL2", 0b11, 0b100, 0b1110, 0b0010, 0b001>;
+def : RWSysReg<"CNTPS_CTL_EL1", 0b11, 0b111, 0b1110, 0b0010, 0b001>;
+def : RWSysReg<"CNTP_CVAL_EL0", 0b11, 0b011, 0b1110, 0b0010, 0b010>;
+def : RWSysReg<"CNTHP_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0010, 0b010>;
+def : RWSysReg<"CNTPS_CVAL_EL1", 0b11, 0b111, 0b1110, 0b0010, 0b010>;
+def : RWSysReg<"CNTV_TVAL_EL0", 0b11, 0b011, 0b1110, 0b0011, 0b000>;
+def : RWSysReg<"CNTV_CTL_EL0", 0b11, 0b011, 0b1110, 0b0011, 0b001>;
+def : RWSysReg<"CNTV_CVAL_EL0", 0b11, 0b011, 0b1110, 0b0011, 0b010>;
+def : RWSysReg<"PMEVCNTR0_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b000>;
+def : RWSysReg<"PMEVCNTR1_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b001>;
+def : RWSysReg<"PMEVCNTR2_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b010>;
+def : RWSysReg<"PMEVCNTR3_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b011>;
+def : RWSysReg<"PMEVCNTR4_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b100>;
+def : RWSysReg<"PMEVCNTR5_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b101>;
+def : RWSysReg<"PMEVCNTR6_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b110>;
+def : RWSysReg<"PMEVCNTR7_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b111>;
+def : RWSysReg<"PMEVCNTR8_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b000>;
+def : RWSysReg<"PMEVCNTR9_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b001>;
+def : RWSysReg<"PMEVCNTR10_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b010>;
+def : RWSysReg<"PMEVCNTR11_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b011>;
+def : RWSysReg<"PMEVCNTR12_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b100>;
+def : RWSysReg<"PMEVCNTR13_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b101>;
+def : RWSysReg<"PMEVCNTR14_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b110>;
+def : RWSysReg<"PMEVCNTR15_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b111>;
+def : RWSysReg<"PMEVCNTR16_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b000>;
+def : RWSysReg<"PMEVCNTR17_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b001>;
+def : RWSysReg<"PMEVCNTR18_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b010>;
+def : RWSysReg<"PMEVCNTR19_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b011>;
+def : RWSysReg<"PMEVCNTR20_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b100>;
+def : RWSysReg<"PMEVCNTR21_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b101>;
+def : RWSysReg<"PMEVCNTR22_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b110>;
+def : RWSysReg<"PMEVCNTR23_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b111>;
+def : RWSysReg<"PMEVCNTR24_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b000>;
+def : RWSysReg<"PMEVCNTR25_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b001>;
+def : RWSysReg<"PMEVCNTR26_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b010>;
+def : RWSysReg<"PMEVCNTR27_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b011>;
+def : RWSysReg<"PMEVCNTR28_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b100>;
+def : RWSysReg<"PMEVCNTR29_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b101>;
+def : RWSysReg<"PMEVCNTR30_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b110>;
+def : RWSysReg<"PMCCFILTR_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b111>;
+def : RWSysReg<"PMEVTYPER0_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b000>;
+def : RWSysReg<"PMEVTYPER1_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b001>;
+def : RWSysReg<"PMEVTYPER2_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b010>;
+def : RWSysReg<"PMEVTYPER3_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b011>;
+def : RWSysReg<"PMEVTYPER4_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b100>;
+def : RWSysReg<"PMEVTYPER5_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b101>;
+def : RWSysReg<"PMEVTYPER6_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b110>;
+def : RWSysReg<"PMEVTYPER7_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b111>;
+def : RWSysReg<"PMEVTYPER8_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b000>;
+def : RWSysReg<"PMEVTYPER9_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b001>;
+def : RWSysReg<"PMEVTYPER10_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b010>;
+def : RWSysReg<"PMEVTYPER11_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b011>;
+def : RWSysReg<"PMEVTYPER12_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b100>;
+def : RWSysReg<"PMEVTYPER13_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b101>;
+def : RWSysReg<"PMEVTYPER14_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b110>;
+def : RWSysReg<"PMEVTYPER15_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b111>;
+def : RWSysReg<"PMEVTYPER16_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b000>;
+def : RWSysReg<"PMEVTYPER17_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b001>;
+def : RWSysReg<"PMEVTYPER18_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b010>;
+def : RWSysReg<"PMEVTYPER19_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b011>;
+def : RWSysReg<"PMEVTYPER20_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b100>;
+def : RWSysReg<"PMEVTYPER21_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b101>;
+def : RWSysReg<"PMEVTYPER22_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b110>;
+def : RWSysReg<"PMEVTYPER23_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b111>;
+def : RWSysReg<"PMEVTYPER24_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b000>;
+def : RWSysReg<"PMEVTYPER25_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b001>;
+def : RWSysReg<"PMEVTYPER26_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b010>;
+def : RWSysReg<"PMEVTYPER27_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b011>;
+def : RWSysReg<"PMEVTYPER28_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b100>;
+def : RWSysReg<"PMEVTYPER29_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b101>;
+def : RWSysReg<"PMEVTYPER30_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b110>;
+
+// Trace registers
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"TRCPRGCTLR", 0b10, 0b001, 0b0000, 0b0001, 0b000>;
+def : RWSysReg<"TRCPROCSELR", 0b10, 0b001, 0b0000, 0b0010, 0b000>;
+def : RWSysReg<"TRCCONFIGR", 0b10, 0b001, 0b0000, 0b0100, 0b000>;
+def : RWSysReg<"TRCAUXCTLR", 0b10, 0b001, 0b0000, 0b0110, 0b000>;
+def : RWSysReg<"TRCEVENTCTL0R", 0b10, 0b001, 0b0000, 0b1000, 0b000>;
+def : RWSysReg<"TRCEVENTCTL1R", 0b10, 0b001, 0b0000, 0b1001, 0b000>;
+def : RWSysReg<"TRCSTALLCTLR", 0b10, 0b001, 0b0000, 0b1011, 0b000>;
+def : RWSysReg<"TRCTSCTLR", 0b10, 0b001, 0b0000, 0b1100, 0b000>;
+def : RWSysReg<"TRCSYNCPR", 0b10, 0b001, 0b0000, 0b1101, 0b000>;
+def : RWSysReg<"TRCCCCTLR", 0b10, 0b001, 0b0000, 0b1110, 0b000>;
+def : RWSysReg<"TRCBBCTLR", 0b10, 0b001, 0b0000, 0b1111, 0b000>;
+def : RWSysReg<"TRCTRACEIDR", 0b10, 0b001, 0b0000, 0b0000, 0b001>;
+def : RWSysReg<"TRCQCTLR", 0b10, 0b001, 0b0000, 0b0001, 0b001>;
+def : RWSysReg<"TRCVICTLR", 0b10, 0b001, 0b0000, 0b0000, 0b010>;
+def : RWSysReg<"TRCVIIECTLR", 0b10, 0b001, 0b0000, 0b0001, 0b010>;
+def : RWSysReg<"TRCVISSCTLR", 0b10, 0b001, 0b0000, 0b0010, 0b010>;
+def : RWSysReg<"TRCVIPCSSCTLR", 0b10, 0b001, 0b0000, 0b0011, 0b010>;
+def : RWSysReg<"TRCVDCTLR", 0b10, 0b001, 0b0000, 0b1000, 0b010>;
+def : RWSysReg<"TRCVDSACCTLR", 0b10, 0b001, 0b0000, 0b1001, 0b010>;
+def : RWSysReg<"TRCVDARCCTLR", 0b10, 0b001, 0b0000, 0b1010, 0b010>;
+def : RWSysReg<"TRCSEQEVR0", 0b10, 0b001, 0b0000, 0b0000, 0b100>;
+def : RWSysReg<"TRCSEQEVR1", 0b10, 0b001, 0b0000, 0b0001, 0b100>;
+def : RWSysReg<"TRCSEQEVR2", 0b10, 0b001, 0b0000, 0b0010, 0b100>;
+def : RWSysReg<"TRCSEQRSTEVR", 0b10, 0b001, 0b0000, 0b0110, 0b100>;
+def : RWSysReg<"TRCSEQSTR", 0b10, 0b001, 0b0000, 0b0111, 0b100>;
+def : RWSysReg<"TRCEXTINSELR", 0b10, 0b001, 0b0000, 0b1000, 0b100>;
+def : RWSysReg<"TRCCNTRLDVR0", 0b10, 0b001, 0b0000, 0b0000, 0b101>;
+def : RWSysReg<"TRCCNTRLDVR1", 0b10, 0b001, 0b0000, 0b0001, 0b101>;
+def : RWSysReg<"TRCCNTRLDVR2", 0b10, 0b001, 0b0000, 0b0010, 0b101>;
+def : RWSysReg<"TRCCNTRLDVR3", 0b10, 0b001, 0b0000, 0b0011, 0b101>;
+def : RWSysReg<"TRCCNTCTLR0", 0b10, 0b001, 0b0000, 0b0100, 0b101>;
+def : RWSysReg<"TRCCNTCTLR1", 0b10, 0b001, 0b0000, 0b0101, 0b101>;
+def : RWSysReg<"TRCCNTCTLR2", 0b10, 0b001, 0b0000, 0b0110, 0b101>;
+def : RWSysReg<"TRCCNTCTLR3", 0b10, 0b001, 0b0000, 0b0111, 0b101>;
+def : RWSysReg<"TRCCNTVR0", 0b10, 0b001, 0b0000, 0b1000, 0b101>;
+def : RWSysReg<"TRCCNTVR1", 0b10, 0b001, 0b0000, 0b1001, 0b101>;
+def : RWSysReg<"TRCCNTVR2", 0b10, 0b001, 0b0000, 0b1010, 0b101>;
+def : RWSysReg<"TRCCNTVR3", 0b10, 0b001, 0b0000, 0b1011, 0b101>;
+def : RWSysReg<"TRCIMSPEC0", 0b10, 0b001, 0b0000, 0b0000, 0b111>;
+def : RWSysReg<"TRCIMSPEC1", 0b10, 0b001, 0b0000, 0b0001, 0b111>;
+def : RWSysReg<"TRCIMSPEC2", 0b10, 0b001, 0b0000, 0b0010, 0b111>;
+def : RWSysReg<"TRCIMSPEC3", 0b10, 0b001, 0b0000, 0b0011, 0b111>;
+def : RWSysReg<"TRCIMSPEC4", 0b10, 0b001, 0b0000, 0b0100, 0b111>;
+def : RWSysReg<"TRCIMSPEC5", 0b10, 0b001, 0b0000, 0b0101, 0b111>;
+def : RWSysReg<"TRCIMSPEC6", 0b10, 0b001, 0b0000, 0b0110, 0b111>;
+def : RWSysReg<"TRCIMSPEC7", 0b10, 0b001, 0b0000, 0b0111, 0b111>;
+def : RWSysReg<"TRCRSCTLR2", 0b10, 0b001, 0b0001, 0b0010, 0b000>;
+def : RWSysReg<"TRCRSCTLR3", 0b10, 0b001, 0b0001, 0b0011, 0b000>;
+def : RWSysReg<"TRCRSCTLR4", 0b10, 0b001, 0b0001, 0b0100, 0b000>;
+def : RWSysReg<"TRCRSCTLR5", 0b10, 0b001, 0b0001, 0b0101, 0b000>;
+def : RWSysReg<"TRCRSCTLR6", 0b10, 0b001, 0b0001, 0b0110, 0b000>;
+def : RWSysReg<"TRCRSCTLR7", 0b10, 0b001, 0b0001, 0b0111, 0b000>;
+def : RWSysReg<"TRCRSCTLR8", 0b10, 0b001, 0b0001, 0b1000, 0b000>;
+def : RWSysReg<"TRCRSCTLR9", 0b10, 0b001, 0b0001, 0b1001, 0b000>;
+def : RWSysReg<"TRCRSCTLR10", 0b10, 0b001, 0b0001, 0b1010, 0b000>;
+def : RWSysReg<"TRCRSCTLR11", 0b10, 0b001, 0b0001, 0b1011, 0b000>;
+def : RWSysReg<"TRCRSCTLR12", 0b10, 0b001, 0b0001, 0b1100, 0b000>;
+def : RWSysReg<"TRCRSCTLR13", 0b10, 0b001, 0b0001, 0b1101, 0b000>;
+def : RWSysReg<"TRCRSCTLR14", 0b10, 0b001, 0b0001, 0b1110, 0b000>;
+def : RWSysReg<"TRCRSCTLR15", 0b10, 0b001, 0b0001, 0b1111, 0b000>;
+def : RWSysReg<"TRCRSCTLR16", 0b10, 0b001, 0b0001, 0b0000, 0b001>;
+def : RWSysReg<"TRCRSCTLR17", 0b10, 0b001, 0b0001, 0b0001, 0b001>;
+def : RWSysReg<"TRCRSCTLR18", 0b10, 0b001, 0b0001, 0b0010, 0b001>;
+def : RWSysReg<"TRCRSCTLR19", 0b10, 0b001, 0b0001, 0b0011, 0b001>;
+def : RWSysReg<"TRCRSCTLR20", 0b10, 0b001, 0b0001, 0b0100, 0b001>;
+def : RWSysReg<"TRCRSCTLR21", 0b10, 0b001, 0b0001, 0b0101, 0b001>;
+def : RWSysReg<"TRCRSCTLR22", 0b10, 0b001, 0b0001, 0b0110, 0b001>;
+def : RWSysReg<"TRCRSCTLR23", 0b10, 0b001, 0b0001, 0b0111, 0b001>;
+def : RWSysReg<"TRCRSCTLR24", 0b10, 0b001, 0b0001, 0b1000, 0b001>;
+def : RWSysReg<"TRCRSCTLR25", 0b10, 0b001, 0b0001, 0b1001, 0b001>;
+def : RWSysReg<"TRCRSCTLR26", 0b10, 0b001, 0b0001, 0b1010, 0b001>;
+def : RWSysReg<"TRCRSCTLR27", 0b10, 0b001, 0b0001, 0b1011, 0b001>;
+def : RWSysReg<"TRCRSCTLR28", 0b10, 0b001, 0b0001, 0b1100, 0b001>;
+def : RWSysReg<"TRCRSCTLR29", 0b10, 0b001, 0b0001, 0b1101, 0b001>;
+def : RWSysReg<"TRCRSCTLR30", 0b10, 0b001, 0b0001, 0b1110, 0b001>;
+def : RWSysReg<"TRCRSCTLR31", 0b10, 0b001, 0b0001, 0b1111, 0b001>;
+def : RWSysReg<"TRCSSCCR0", 0b10, 0b001, 0b0001, 0b0000, 0b010>;
+def : RWSysReg<"TRCSSCCR1", 0b10, 0b001, 0b0001, 0b0001, 0b010>;
+def : RWSysReg<"TRCSSCCR2", 0b10, 0b001, 0b0001, 0b0010, 0b010>;
+def : RWSysReg<"TRCSSCCR3", 0b10, 0b001, 0b0001, 0b0011, 0b010>;
+def : RWSysReg<"TRCSSCCR4", 0b10, 0b001, 0b0001, 0b0100, 0b010>;
+def : RWSysReg<"TRCSSCCR5", 0b10, 0b001, 0b0001, 0b0101, 0b010>;
+def : RWSysReg<"TRCSSCCR6", 0b10, 0b001, 0b0001, 0b0110, 0b010>;
+def : RWSysReg<"TRCSSCCR7", 0b10, 0b001, 0b0001, 0b0111, 0b010>;
+def : RWSysReg<"TRCSSCSR0", 0b10, 0b001, 0b0001, 0b1000, 0b010>;
+def : RWSysReg<"TRCSSCSR1", 0b10, 0b001, 0b0001, 0b1001, 0b010>;
+def : RWSysReg<"TRCSSCSR2", 0b10, 0b001, 0b0001, 0b1010, 0b010>;
+def : RWSysReg<"TRCSSCSR3", 0b10, 0b001, 0b0001, 0b1011, 0b010>;
+def : RWSysReg<"TRCSSCSR4", 0b10, 0b001, 0b0001, 0b1100, 0b010>;
+def : RWSysReg<"TRCSSCSR5", 0b10, 0b001, 0b0001, 0b1101, 0b010>;
+def : RWSysReg<"TRCSSCSR6", 0b10, 0b001, 0b0001, 0b1110, 0b010>;
+def : RWSysReg<"TRCSSCSR7", 0b10, 0b001, 0b0001, 0b1111, 0b010>;
+def : RWSysReg<"TRCSSPCICR0", 0b10, 0b001, 0b0001, 0b0000, 0b011>;
+def : RWSysReg<"TRCSSPCICR1", 0b10, 0b001, 0b0001, 0b0001, 0b011>;
+def : RWSysReg<"TRCSSPCICR2", 0b10, 0b001, 0b0001, 0b0010, 0b011>;
+def : RWSysReg<"TRCSSPCICR3", 0b10, 0b001, 0b0001, 0b0011, 0b011>;
+def : RWSysReg<"TRCSSPCICR4", 0b10, 0b001, 0b0001, 0b0100, 0b011>;
+def : RWSysReg<"TRCSSPCICR5", 0b10, 0b001, 0b0001, 0b0101, 0b011>;
+def : RWSysReg<"TRCSSPCICR6", 0b10, 0b001, 0b0001, 0b0110, 0b011>;
+def : RWSysReg<"TRCSSPCICR7", 0b10, 0b001, 0b0001, 0b0111, 0b011>;
+def : RWSysReg<"TRCPDCR", 0b10, 0b001, 0b0001, 0b0100, 0b100>;
+def : RWSysReg<"TRCACVR0", 0b10, 0b001, 0b0010, 0b0000, 0b000>;
+def : RWSysReg<"TRCACVR1", 0b10, 0b001, 0b0010, 0b0010, 0b000>;
+def : RWSysReg<"TRCACVR2", 0b10, 0b001, 0b0010, 0b0100, 0b000>;
+def : RWSysReg<"TRCACVR3", 0b10, 0b001, 0b0010, 0b0110, 0b000>;
+def : RWSysReg<"TRCACVR4", 0b10, 0b001, 0b0010, 0b1000, 0b000>;
+def : RWSysReg<"TRCACVR5", 0b10, 0b001, 0b0010, 0b1010, 0b000>;
+def : RWSysReg<"TRCACVR6", 0b10, 0b001, 0b0010, 0b1100, 0b000>;
+def : RWSysReg<"TRCACVR7", 0b10, 0b001, 0b0010, 0b1110, 0b000>;
+def : RWSysReg<"TRCACVR8", 0b10, 0b001, 0b0010, 0b0000, 0b001>;
+def : RWSysReg<"TRCACVR9", 0b10, 0b001, 0b0010, 0b0010, 0b001>;
+def : RWSysReg<"TRCACVR10", 0b10, 0b001, 0b0010, 0b0100, 0b001>;
+def : RWSysReg<"TRCACVR11", 0b10, 0b001, 0b0010, 0b0110, 0b001>;
+def : RWSysReg<"TRCACVR12", 0b10, 0b001, 0b0010, 0b1000, 0b001>;
+def : RWSysReg<"TRCACVR13", 0b10, 0b001, 0b0010, 0b1010, 0b001>;
+def : RWSysReg<"TRCACVR14", 0b10, 0b001, 0b0010, 0b1100, 0b001>;
+def : RWSysReg<"TRCACVR15", 0b10, 0b001, 0b0010, 0b1110, 0b001>;
+def : RWSysReg<"TRCACATR0", 0b10, 0b001, 0b0010, 0b0000, 0b010>;
+def : RWSysReg<"TRCACATR1", 0b10, 0b001, 0b0010, 0b0010, 0b010>;
+def : RWSysReg<"TRCACATR2", 0b10, 0b001, 0b0010, 0b0100, 0b010>;
+def : RWSysReg<"TRCACATR3", 0b10, 0b001, 0b0010, 0b0110, 0b010>;
+def : RWSysReg<"TRCACATR4", 0b10, 0b001, 0b0010, 0b1000, 0b010>;
+def : RWSysReg<"TRCACATR5", 0b10, 0b001, 0b0010, 0b1010, 0b010>;
+def : RWSysReg<"TRCACATR6", 0b10, 0b001, 0b0010, 0b1100, 0b010>;
+def : RWSysReg<"TRCACATR7", 0b10, 0b001, 0b0010, 0b1110, 0b010>;
+def : RWSysReg<"TRCACATR8", 0b10, 0b001, 0b0010, 0b0000, 0b011>;
+def : RWSysReg<"TRCACATR9", 0b10, 0b001, 0b0010, 0b0010, 0b011>;
+def : RWSysReg<"TRCACATR10", 0b10, 0b001, 0b0010, 0b0100, 0b011>;
+def : RWSysReg<"TRCACATR11", 0b10, 0b001, 0b0010, 0b0110, 0b011>;
+def : RWSysReg<"TRCACATR12", 0b10, 0b001, 0b0010, 0b1000, 0b011>;
+def : RWSysReg<"TRCACATR13", 0b10, 0b001, 0b0010, 0b1010, 0b011>;
+def : RWSysReg<"TRCACATR14", 0b10, 0b001, 0b0010, 0b1100, 0b011>;
+def : RWSysReg<"TRCACATR15", 0b10, 0b001, 0b0010, 0b1110, 0b011>;
+def : RWSysReg<"TRCDVCVR0", 0b10, 0b001, 0b0010, 0b0000, 0b100>;
+def : RWSysReg<"TRCDVCVR1", 0b10, 0b001, 0b0010, 0b0100, 0b100>;
+def : RWSysReg<"TRCDVCVR2", 0b10, 0b001, 0b0010, 0b1000, 0b100>;
+def : RWSysReg<"TRCDVCVR3", 0b10, 0b001, 0b0010, 0b1100, 0b100>;
+def : RWSysReg<"TRCDVCVR4", 0b10, 0b001, 0b0010, 0b0000, 0b101>;
+def : RWSysReg<"TRCDVCVR5", 0b10, 0b001, 0b0010, 0b0100, 0b101>;
+def : RWSysReg<"TRCDVCVR6", 0b10, 0b001, 0b0010, 0b1000, 0b101>;
+def : RWSysReg<"TRCDVCVR7", 0b10, 0b001, 0b0010, 0b1100, 0b101>;
+def : RWSysReg<"TRCDVCMR0", 0b10, 0b001, 0b0010, 0b0000, 0b110>;
+def : RWSysReg<"TRCDVCMR1", 0b10, 0b001, 0b0010, 0b0100, 0b110>;
+def : RWSysReg<"TRCDVCMR2", 0b10, 0b001, 0b0010, 0b1000, 0b110>;
+def : RWSysReg<"TRCDVCMR3", 0b10, 0b001, 0b0010, 0b1100, 0b110>;
+def : RWSysReg<"TRCDVCMR4", 0b10, 0b001, 0b0010, 0b0000, 0b111>;
+def : RWSysReg<"TRCDVCMR5", 0b10, 0b001, 0b0010, 0b0100, 0b111>;
+def : RWSysReg<"TRCDVCMR6", 0b10, 0b001, 0b0010, 0b1000, 0b111>;
+def : RWSysReg<"TRCDVCMR7", 0b10, 0b001, 0b0010, 0b1100, 0b111>;
+def : RWSysReg<"TRCCIDCVR0", 0b10, 0b001, 0b0011, 0b0000, 0b000>;
+def : RWSysReg<"TRCCIDCVR1", 0b10, 0b001, 0b0011, 0b0010, 0b000>;
+def : RWSysReg<"TRCCIDCVR2", 0b10, 0b001, 0b0011, 0b0100, 0b000>;
+def : RWSysReg<"TRCCIDCVR3", 0b10, 0b001, 0b0011, 0b0110, 0b000>;
+def : RWSysReg<"TRCCIDCVR4", 0b10, 0b001, 0b0011, 0b1000, 0b000>;
+def : RWSysReg<"TRCCIDCVR5", 0b10, 0b001, 0b0011, 0b1010, 0b000>;
+def : RWSysReg<"TRCCIDCVR6", 0b10, 0b001, 0b0011, 0b1100, 0b000>;
+def : RWSysReg<"TRCCIDCVR7", 0b10, 0b001, 0b0011, 0b1110, 0b000>;
+def : RWSysReg<"TRCVMIDCVR0", 0b10, 0b001, 0b0011, 0b0000, 0b001>;
+def : RWSysReg<"TRCVMIDCVR1", 0b10, 0b001, 0b0011, 0b0010, 0b001>;
+def : RWSysReg<"TRCVMIDCVR2", 0b10, 0b001, 0b0011, 0b0100, 0b001>;
+def : RWSysReg<"TRCVMIDCVR3", 0b10, 0b001, 0b0011, 0b0110, 0b001>;
+def : RWSysReg<"TRCVMIDCVR4", 0b10, 0b001, 0b0011, 0b1000, 0b001>;
+def : RWSysReg<"TRCVMIDCVR5", 0b10, 0b001, 0b0011, 0b1010, 0b001>;
+def : RWSysReg<"TRCVMIDCVR6", 0b10, 0b001, 0b0011, 0b1100, 0b001>;
+def : RWSysReg<"TRCVMIDCVR7", 0b10, 0b001, 0b0011, 0b1110, 0b001>;
+def : RWSysReg<"TRCCIDCCTLR0", 0b10, 0b001, 0b0011, 0b0000, 0b010>;
+def : RWSysReg<"TRCCIDCCTLR1", 0b10, 0b001, 0b0011, 0b0001, 0b010>;
+def : RWSysReg<"TRCVMIDCCTLR0", 0b10, 0b001, 0b0011, 0b0010, 0b010>;
+def : RWSysReg<"TRCVMIDCCTLR1", 0b10, 0b001, 0b0011, 0b0011, 0b010>;
+def : RWSysReg<"TRCITCTRL", 0b10, 0b001, 0b0111, 0b0000, 0b100>;
+def : RWSysReg<"TRCCLAIMSET", 0b10, 0b001, 0b0111, 0b1000, 0b110>;
+def : RWSysReg<"TRCCLAIMCLR", 0b10, 0b001, 0b0111, 0b1001, 0b110>;
+
+// GICv3 registers
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"ICC_BPR1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b011>;
+def : RWSysReg<"ICC_BPR0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b011>;
+def : RWSysReg<"ICC_PMR_EL1", 0b11, 0b000, 0b0100, 0b0110, 0b000>;
+def : RWSysReg<"ICC_CTLR_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b100>;
+def : RWSysReg<"ICC_CTLR_EL3", 0b11, 0b110, 0b1100, 0b1100, 0b100>;
+def : RWSysReg<"ICC_SRE_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b101>;
+def : RWSysReg<"ICC_SRE_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b101>;
+def : RWSysReg<"ICC_SRE_EL3", 0b11, 0b110, 0b1100, 0b1100, 0b101>;
+def : RWSysReg<"ICC_IGRPEN0_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b110>;
+def : RWSysReg<"ICC_IGRPEN1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b111>;
+def : RWSysReg<"ICC_IGRPEN1_EL3", 0b11, 0b110, 0b1100, 0b1100, 0b111>;
+def : RWSysReg<"ICC_SEIEN_EL1", 0b11, 0b000, 0b1100, 0b1101, 0b000>;
+def : RWSysReg<"ICC_AP0R0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b100>;
+def : RWSysReg<"ICC_AP0R1_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b101>;
+def : RWSysReg<"ICC_AP0R2_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b110>;
+def : RWSysReg<"ICC_AP0R3_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b111>;
+def : RWSysReg<"ICC_AP1R0_EL1", 0b11, 0b000, 0b1100, 0b1001, 0b000>;
+def : RWSysReg<"ICC_AP1R1_EL1", 0b11, 0b000, 0b1100, 0b1001, 0b001>;
+def : RWSysReg<"ICC_AP1R2_EL1", 0b11, 0b000, 0b1100, 0b1001, 0b010>;
+def : RWSysReg<"ICC_AP1R3_EL1", 0b11, 0b000, 0b1100, 0b1001, 0b011>;
+def : RWSysReg<"ICH_AP0R0_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b000>;
+def : RWSysReg<"ICH_AP0R1_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b001>;
+def : RWSysReg<"ICH_AP0R2_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b010>;
+def : RWSysReg<"ICH_AP0R3_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b011>;
+def : RWSysReg<"ICH_AP1R0_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b000>;
+def : RWSysReg<"ICH_AP1R1_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b001>;
+def : RWSysReg<"ICH_AP1R2_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b010>;
+def : RWSysReg<"ICH_AP1R3_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b011>;
+def : RWSysReg<"ICH_HCR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b000>;
+def : RWSysReg<"ICH_MISR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b010>;
+def : RWSysReg<"ICH_VMCR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b111>;
+def : RWSysReg<"ICH_VSEIR_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b100>;
+def : RWSysReg<"ICH_LR0_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b000>;
+def : RWSysReg<"ICH_LR1_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b001>;
+def : RWSysReg<"ICH_LR2_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b010>;
+def : RWSysReg<"ICH_LR3_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b011>;
+def : RWSysReg<"ICH_LR4_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b100>;
+def : RWSysReg<"ICH_LR5_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b101>;
+def : RWSysReg<"ICH_LR6_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b110>;
+def : RWSysReg<"ICH_LR7_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b111>;
+def : RWSysReg<"ICH_LR8_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b000>;
+def : RWSysReg<"ICH_LR9_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b001>;
+def : RWSysReg<"ICH_LR10_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b010>;
+def : RWSysReg<"ICH_LR11_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b011>;
+def : RWSysReg<"ICH_LR12_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b100>;
+def : RWSysReg<"ICH_LR13_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b101>;
+def : RWSysReg<"ICH_LR14_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b110>;
+def : RWSysReg<"ICH_LR15_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b111>;
+
+// v8.1a "Privileged Access Never" extension-specific system registers
+let Requires = [{ {AArch64::HasV8_1aOps} }] in
+def : RWSysReg<"PAN", 0b11, 0b000, 0b0100, 0b0010, 0b011>;
+
+// v8.1a "Limited Ordering Regions" extension-specific system registers
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::HasV8_1aOps} }] in {
+def : RWSysReg<"LORSA_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b000>;
+def : RWSysReg<"LOREA_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b001>;
+def : RWSysReg<"LORN_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b010>;
+def : RWSysReg<"LORC_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b011>;
+}
+
+// v8.1a "Virtualization hos extensions" system registers
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::HasV8_1aOps} }] in {
+def : RWSysReg<"TTBR1_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b001>;
+def : RWSysReg<"CONTEXTIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b001>;
+def : RWSysReg<"CNTHV_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b000>;
+def : RWSysReg<"CNTHV_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b010>;
+def : RWSysReg<"CNTHV_CTL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b001>;
+def : RWSysReg<"SCTLR_EL12", 0b11, 0b101, 0b0001, 0b0000, 0b000>;
+def : RWSysReg<"CPACR_EL12", 0b11, 0b101, 0b0001, 0b0000, 0b010>;
+def : RWSysReg<"TTBR0_EL12", 0b11, 0b101, 0b0010, 0b0000, 0b000>;
+def : RWSysReg<"TTBR1_EL12", 0b11, 0b101, 0b0010, 0b0000, 0b001>;
+def : RWSysReg<"TCR_EL12", 0b11, 0b101, 0b0010, 0b0000, 0b010>;
+def : RWSysReg<"AFSR0_EL12", 0b11, 0b101, 0b0101, 0b0001, 0b000>;
+def : RWSysReg<"AFSR1_EL12", 0b11, 0b101, 0b0101, 0b0001, 0b001>;
+def : RWSysReg<"ESR_EL12", 0b11, 0b101, 0b0101, 0b0010, 0b000>;
+def : RWSysReg<"FAR_EL12", 0b11, 0b101, 0b0110, 0b0000, 0b000>;
+def : RWSysReg<"MAIR_EL12", 0b11, 0b101, 0b1010, 0b0010, 0b000>;
+def : RWSysReg<"AMAIR_EL12", 0b11, 0b101, 0b1010, 0b0011, 0b000>;
+def : RWSysReg<"VBAR_EL12", 0b11, 0b101, 0b1100, 0b0000, 0b000>;
+def : RWSysReg<"CONTEXTIDR_EL12", 0b11, 0b101, 0b1101, 0b0000, 0b001>;
+def : RWSysReg<"CNTKCTL_EL12", 0b11, 0b101, 0b1110, 0b0001, 0b000>;
+def : RWSysReg<"CNTP_TVAL_EL02", 0b11, 0b101, 0b1110, 0b0010, 0b000>;
+def : RWSysReg<"CNTP_CTL_EL02", 0b11, 0b101, 0b1110, 0b0010, 0b001>;
+def : RWSysReg<"CNTP_CVAL_EL02", 0b11, 0b101, 0b1110, 0b0010, 0b010>;
+def : RWSysReg<"CNTV_TVAL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b000>;
+def : RWSysReg<"CNTV_CTL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b001>;
+def : RWSysReg<"CNTV_CVAL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b010>;
+def : RWSysReg<"SPSR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b000>;
+def : RWSysReg<"ELR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b001>;
+}
+// v8.2a registers
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::HasV8_2aOps} }] in
+def : RWSysReg<"UAO", 0b11, 0b000, 0b0100, 0b0010, 0b100>;
+
+// v8.2a "Statistical Profiling extension" registers
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureSPE} }] in {
+def : RWSysReg<"PMBLIMITR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b000>;
+def : RWSysReg<"PMBPTR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b001>;
+def : RWSysReg<"PMBSR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b011>;
+def : RWSysReg<"PMBIDR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b111>;
+def : RWSysReg<"PMSCR_EL2", 0b11, 0b100, 0b1001, 0b1001, 0b000>;
+def : RWSysReg<"PMSCR_EL12", 0b11, 0b101, 0b1001, 0b1001, 0b000>;
+def : RWSysReg<"PMSCR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b000>;
+def : RWSysReg<"PMSICR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b010>;
+def : RWSysReg<"PMSIRR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b011>;
+def : RWSysReg<"PMSFCR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b100>;
+def : RWSysReg<"PMSEVFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b101>;
+def : RWSysReg<"PMSLATFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b110>;
+def : RWSysReg<"PMSIDR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b111>;
+}
+
+// v8.2a "RAS extension" registers
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureRAS} }] in {
+def : RWSysReg<"ERRSELR_EL1", 0b11, 0b000, 0b0101, 0b0011, 0b001>;
+def : RWSysReg<"ERXCTLR_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b001>;
+def : RWSysReg<"ERXSTATUS_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b010>;
+def : RWSysReg<"ERXADDR_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b011>;
+def : RWSysReg<"ERXMISC0_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b000>;
+def : RWSysReg<"ERXMISC1_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b001>;
+def : RWSysReg<"DISR_EL1", 0b11, 0b000, 0b1100, 0b0001, 0b001>;
+def : RWSysReg<"VDISR_EL2", 0b11, 0b100, 0b1100, 0b0001, 0b001>;
+def : RWSysReg<"VSESR_EL2", 0b11, 0b100, 0b0101, 0b0010, 0b011>;
+}
+
+// v8.3a "Pointer authentication extension" registers
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::HasV8_3aOps} }] in {
+def : RWSysReg<"APIAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b000>;
+def : RWSysReg<"APIAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b001>;
+def : RWSysReg<"APIBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b010>;
+def : RWSysReg<"APIBKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b011>;
+def : RWSysReg<"APDAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b000>;
+def : RWSysReg<"APDAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b001>;
+def : RWSysReg<"APDBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b010>;
+def : RWSysReg<"APDBKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b011>;
+def : RWSysReg<"APGAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b000>;
+def : RWSysReg<"APGAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b001>;
+}
+
+let Requires = [{ {AArch64::HasV8_4aOps} }] in {
+
+// v8.4a "Virtualization secure second stage translation" registers
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"VSTCR_EL2" , 0b11, 0b100, 0b0010, 0b0110, 0b010>;
+def : RWSysReg<"VSTTBR_EL2", 0b11, 0b100, 0b0010, 0b0110, 0b000>;
+
+// v8.4a "Virtualization timer" registers
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"CNTHVS_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0100, 0b000>;
+def : RWSysReg<"CNTHVS_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0100, 0b010>;
+def : RWSysReg<"CNTHVS_CTL_EL2", 0b11, 0b100, 0b1110, 0b0100, 0b001>;
+def : RWSysReg<"CNTHPS_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b000>;
+def : RWSysReg<"CNTHPS_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b010>;
+def : RWSysReg<"CNTHPS_CTL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b001>;
+
+// v8.4a "Virtualization debug state" registers
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"SDER32_EL2", 0b11, 0b100, 0b0001, 0b0011, 0b001>;
+
+// v8.4a RAS registers
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"ERXPFGCTL_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b101>;
+def : RWSysReg<"ERXPFGCDN_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b110>;
+def : RWSysReg<"ERXTS_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b111>;
+def : RWSysReg<"ERXMISC2_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b010>;
+def : RWSysReg<"ERXMISC3_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b011>;
+def : ROSysReg<"ERXPFGF_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b100>;
+
+// v8.4a MPAM registers
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"MPAM0_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b001>;
+def : RWSysReg<"MPAM1_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b000>;
+def : RWSysReg<"MPAM2_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b000>;
+def : RWSysReg<"MPAM3_EL3", 0b11, 0b110, 0b1010, 0b0101, 0b000>;
+def : RWSysReg<"MPAM1_EL12", 0b11, 0b101, 0b1010, 0b0101, 0b000>;
+def : RWSysReg<"MPAMHCR_EL2", 0b11, 0b100, 0b1010, 0b0100, 0b000>;
+def : RWSysReg<"MPAMVPMV_EL2", 0b11, 0b100, 0b1010, 0b0100, 0b001>;
+def : RWSysReg<"MPAMVPM0_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b000>;
+def : RWSysReg<"MPAMVPM1_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b001>;
+def : RWSysReg<"MPAMVPM2_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b010>;
+def : RWSysReg<"MPAMVPM3_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b011>;
+def : RWSysReg<"MPAMVPM4_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b100>;
+def : RWSysReg<"MPAMVPM5_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b101>;
+def : RWSysReg<"MPAMVPM6_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b110>;
+def : RWSysReg<"MPAMVPM7_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b111>;
+def : ROSysReg<"MPAMIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b100>;
+
+// v8.4a Activitiy monitor registers
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"AMCR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b000>;
+def : ROSysReg<"AMCFGR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b001>;
+def : ROSysReg<"AMCGCR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b010>;
+def : RWSysReg<"AMUSERENR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b011>;
+def : RWSysReg<"AMCNTENCLR0_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b100>;
+def : RWSysReg<"AMCNTENSET0_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b101>;
+def : RWSysReg<"AMEVCNTR00_EL0", 0b11, 0b011, 0b1101, 0b0100, 0b000>;
+def : RWSysReg<"AMEVCNTR01_EL0", 0b11, 0b011, 0b1101, 0b0100, 0b001>;
+def : RWSysReg<"AMEVCNTR02_EL0", 0b11, 0b011, 0b1101, 0b0100, 0b010>;
+def : RWSysReg<"AMEVCNTR03_EL0", 0b11, 0b011, 0b1101, 0b0100, 0b011>;
+def : ROSysReg<"AMEVTYPER00_EL0", 0b11, 0b011, 0b1101, 0b0110, 0b000>;
+def : ROSysReg<"AMEVTYPER01_EL0", 0b11, 0b011, 0b1101, 0b0110, 0b001>;
+def : ROSysReg<"AMEVTYPER02_EL0", 0b11, 0b011, 0b1101, 0b0110, 0b010>;
+def : ROSysReg<"AMEVTYPER03_EL0", 0b11, 0b011, 0b1101, 0b0110, 0b011>;
+def : RWSysReg<"AMCNTENCLR1_EL0", 0b11, 0b011, 0b1101, 0b0011, 0b000>;
+def : RWSysReg<"AMCNTENSET1_EL0", 0b11, 0b011, 0b1101, 0b0011, 0b001>;
+def : RWSysReg<"AMEVCNTR10_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b000>;
+def : RWSysReg<"AMEVCNTR11_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b001>;
+def : RWSysReg<"AMEVCNTR12_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b010>;
+def : RWSysReg<"AMEVCNTR13_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b011>;
+def : RWSysReg<"AMEVCNTR14_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b100>;
+def : RWSysReg<"AMEVCNTR15_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b101>;
+def : RWSysReg<"AMEVCNTR16_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b110>;
+def : RWSysReg<"AMEVCNTR17_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b111>;
+def : RWSysReg<"AMEVCNTR18_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b000>;
+def : RWSysReg<"AMEVCNTR19_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b001>;
+def : RWSysReg<"AMEVCNTR110_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b010>;
+def : RWSysReg<"AMEVCNTR111_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b011>;
+def : RWSysReg<"AMEVCNTR112_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b100>;
+def : RWSysReg<"AMEVCNTR113_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b101>;
+def : RWSysReg<"AMEVCNTR114_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b110>;
+def : RWSysReg<"AMEVCNTR115_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b111>;
+def : RWSysReg<"AMEVTYPER10_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b000>;
+def : RWSysReg<"AMEVTYPER11_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b001>;
+def : RWSysReg<"AMEVTYPER12_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b010>;
+def : RWSysReg<"AMEVTYPER13_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b011>;
+def : RWSysReg<"AMEVTYPER14_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b100>;
+def : RWSysReg<"AMEVTYPER15_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b101>;
+def : RWSysReg<"AMEVTYPER16_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b110>;
+def : RWSysReg<"AMEVTYPER17_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b111>;
+def : RWSysReg<"AMEVTYPER18_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b000>;
+def : RWSysReg<"AMEVTYPER19_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b001>;
+def : RWSysReg<"AMEVTYPER110_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b010>;
+def : RWSysReg<"AMEVTYPER111_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b011>;
+def : RWSysReg<"AMEVTYPER112_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b100>;
+def : RWSysReg<"AMEVTYPER113_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b101>;
+def : RWSysReg<"AMEVTYPER114_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b110>;
+def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>;
+
+// v8.4a Trace Extension registers
+//
+// Please note that the 8.4 spec also defines these registers:
+// TRCIDR1, ID_DFR0_EL1, ID_AA64DFR0_EL1, MDSCR_EL1, MDCR_EL2, and MDCR_EL3,
+// but they are already defined above.
+//
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"TRFCR_EL1", 0b11, 0b000, 0b0001, 0b0010, 0b001>;
+def : RWSysReg<"TRFCR_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b001>;
+def : RWSysReg<"TRFCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b001>;
+
+// v8.4a Timining insensitivity of data processing instructions
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"DIT", 0b11, 0b011, 0b0100, 0b0010, 0b101>;
+
+// v8.4a Enhanced Support for Nested Virtualization
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"VNCR_EL2", 0b11, 0b100, 0b0010, 0b0010, 0b000>;
+
+} // HasV8_4aOps
+
+// SVE control registers
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::FeatureSVE} }] in {
+def : RWSysReg<"ZCR_EL1", 0b11, 0b000, 0b0001, 0b0010, 0b000>;
+def : RWSysReg<"ZCR_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b000>;
+def : RWSysReg<"ZCR_EL3", 0b11, 0b110, 0b0001, 0b0010, 0b000>;
+def : RWSysReg<"ZCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b000>;
+}
+
+// Cyclone specific system registers
+// Op0 Op1 CRn CRm Op2
+let Requires = [{ {AArch64::ProcCyclone} }] in
+def : RWSysReg<"CPM_IOACC_CTL_EL3", 0b11, 0b111, 0b1111, 0b0010, 0b000>;
diff --git a/capstone/suite/synctools/tablegen/AArch64/SVEInstrFormats.td b/capstone/suite/synctools/tablegen/AArch64/SVEInstrFormats.td
new file mode 100644
index 000000000..7a8dd8bc5
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/AArch64/SVEInstrFormats.td
@@ -0,0 +1,4456 @@
+//=-- SVEInstrFormats.td - AArch64 SVE Instruction classes -*- tablegen -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Scalable Vector Extension (SVE) Instruction Class Definitions.
+//
+//===----------------------------------------------------------------------===//
+
+def SVEPatternOperand : AsmOperandClass {
+ let Name = "SVEPattern";
+ let ParserMethod = "tryParseSVEPattern";
+ let PredicateMethod = "isSVEPattern";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "InvalidSVEPattern";
+}
+
+def sve_pred_enum : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) < 32);
+ }]> {
+
+ let PrintMethod = "printSVEPattern";
+ let ParserMatchClass = SVEPatternOperand;
+}
+
+def SVEPrefetchOperand : AsmOperandClass {
+ let Name = "SVEPrefetch";
+ let ParserMethod = "tryParsePrefetch<true>";
+ let PredicateMethod = "isPrefetch";
+ let RenderMethod = "addPrefetchOperands";
+}
+
+def sve_prfop : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) <= 15);
+ }]> {
+ let PrintMethod = "printPrefetchOp<true>";
+ let ParserMatchClass = SVEPrefetchOperand;
+}
+
+class SVELogicalImmOperand<int Width> : AsmOperandClass {
+ let Name = "SVELogicalImm" # Width;
+ let DiagnosticType = "LogicalSecondSource";
+ let PredicateMethod = "isLogicalImm<int" # Width # "_t>";
+ let RenderMethod = "addLogicalImmOperands<int" # Width # "_t>";
+}
+
+def sve_logical_imm8 : Operand<i64> {
+ let ParserMatchClass = SVELogicalImmOperand<8>;
+ let PrintMethod = "printLogicalImm<int8_t>";
+
+ let MCOperandPredicate = [{
+ if (!MCOp.isImm())
+ return false;
+ int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64);
+ return AArch64_AM::isSVEMaskOfIdenticalElements<int8_t>(Val);
+ }];
+}
+
+def sve_logical_imm16 : Operand<i64> {
+ let ParserMatchClass = SVELogicalImmOperand<16>;
+ let PrintMethod = "printLogicalImm<int16_t>";
+
+ let MCOperandPredicate = [{
+ if (!MCOp.isImm())
+ return false;
+ int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64);
+ return AArch64_AM::isSVEMaskOfIdenticalElements<int16_t>(Val);
+ }];
+}
+
+def sve_logical_imm32 : Operand<i64> {
+ let ParserMatchClass = SVELogicalImmOperand<32>;
+ let PrintMethod = "printLogicalImm<int32_t>";
+
+ let MCOperandPredicate = [{
+ if (!MCOp.isImm())
+ return false;
+ int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64);
+ return AArch64_AM::isSVEMaskOfIdenticalElements<int32_t>(Val);
+ }];
+}
+
+class SVEPreferredLogicalImmOperand<int Width> : AsmOperandClass {
+ let Name = "SVEPreferredLogicalImm" # Width;
+ let PredicateMethod = "isSVEPreferredLogicalImm<int" # Width # "_t>";
+ let RenderMethod = "addLogicalImmOperands<int" # Width # "_t>";
+}
+
+def sve_preferred_logical_imm16 : Operand<i64> {
+ let ParserMatchClass = SVEPreferredLogicalImmOperand<16>;
+ let PrintMethod = "printSVELogicalImm<int16_t>";
+
+ let MCOperandPredicate = [{
+ if (!MCOp.isImm())
+ return false;
+ int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64);
+ return AArch64_AM::isSVEMaskOfIdenticalElements<int16_t>(Val) &&
+ AArch64_AM::isSVEMoveMaskPreferredLogicalImmediate(Val);
+ }];
+}
+
+def sve_preferred_logical_imm32 : Operand<i64> {
+ let ParserMatchClass = SVEPreferredLogicalImmOperand<32>;
+ let PrintMethod = "printSVELogicalImm<int32_t>";
+
+ let MCOperandPredicate = [{
+ if (!MCOp.isImm())
+ return false;
+ int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64);
+ return AArch64_AM::isSVEMaskOfIdenticalElements<int32_t>(Val) &&
+ AArch64_AM::isSVEMoveMaskPreferredLogicalImmediate(Val);
+ }];
+}
+
+def sve_preferred_logical_imm64 : Operand<i64> {
+ let ParserMatchClass = SVEPreferredLogicalImmOperand<64>;
+ let PrintMethod = "printSVELogicalImm<int64_t>";
+
+ let MCOperandPredicate = [{
+ if (!MCOp.isImm())
+ return false;
+ int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64);
+ return AArch64_AM::isSVEMaskOfIdenticalElements<int64_t>(Val) &&
+ AArch64_AM::isSVEMoveMaskPreferredLogicalImmediate(Val);
+ }];
+}
+
+class SVELogicalImmNotOperand<int Width> : AsmOperandClass {
+ let Name = "SVELogicalImm" # Width # "Not";
+ let DiagnosticType = "LogicalSecondSource";
+ let PredicateMethod = "isLogicalImm<int" # Width # "_t>";
+ let RenderMethod = "addLogicalImmNotOperands<int" # Width # "_t>";
+}
+
+def sve_logical_imm8_not : Operand<i64> {
+ let ParserMatchClass = SVELogicalImmNotOperand<8>;
+}
+
+def sve_logical_imm16_not : Operand<i64> {
+ let ParserMatchClass = SVELogicalImmNotOperand<16>;
+}
+
+def sve_logical_imm32_not : Operand<i64> {
+ let ParserMatchClass = SVELogicalImmNotOperand<32>;
+}
+
+class SVEShiftedImmOperand<int ElementWidth, string Infix, string Predicate>
+ : AsmOperandClass {
+ let Name = "SVE" # Infix # "Imm" # ElementWidth;
+ let DiagnosticType = "Invalid" # Name;
+ let RenderMethod = "addImmWithOptionalShiftOperands<8>";
+ let ParserMethod = "tryParseImmWithOptionalShift";
+ let PredicateMethod = Predicate;
+}
+
+def SVECpyImmOperand8 : SVEShiftedImmOperand<8, "Cpy", "isSVECpyImm<int8_t>">;
+def SVECpyImmOperand16 : SVEShiftedImmOperand<16, "Cpy", "isSVECpyImm<int16_t>">;
+def SVECpyImmOperand32 : SVEShiftedImmOperand<32, "Cpy", "isSVECpyImm<int32_t>">;
+def SVECpyImmOperand64 : SVEShiftedImmOperand<64, "Cpy", "isSVECpyImm<int64_t>">;
+
+def SVEAddSubImmOperand8 : SVEShiftedImmOperand<8, "AddSub", "isSVEAddSubImm<int8_t>">;
+def SVEAddSubImmOperand16 : SVEShiftedImmOperand<16, "AddSub", "isSVEAddSubImm<int16_t>">;
+def SVEAddSubImmOperand32 : SVEShiftedImmOperand<32, "AddSub", "isSVEAddSubImm<int32_t>">;
+def SVEAddSubImmOperand64 : SVEShiftedImmOperand<64, "AddSub", "isSVEAddSubImm<int64_t>">;
+
+class imm8_opt_lsl<int ElementWidth, string printType,
+ AsmOperandClass OpndClass, code Predicate>
+ : Operand<i32>, ImmLeaf<i32, Predicate> {
+ let EncoderMethod = "getImm8OptLsl";
+ let DecoderMethod = "DecodeImm8OptLsl<" # ElementWidth # ">";
+ let PrintMethod = "printImm8OptLsl<" # printType # ">";
+ let ParserMatchClass = OpndClass;
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+def cpy_imm8_opt_lsl_i8 : imm8_opt_lsl<8, "int8_t", SVECpyImmOperand8, [{
+ return AArch64_AM::isSVECpyImm<int8_t>(Imm);
+}]>;
+def cpy_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "int16_t", SVECpyImmOperand16, [{
+ return AArch64_AM::isSVECpyImm<int16_t>(Imm);
+}]>;
+def cpy_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "int32_t", SVECpyImmOperand32, [{
+ return AArch64_AM::isSVECpyImm<int32_t>(Imm);
+}]>;
+def cpy_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "int64_t", SVECpyImmOperand64, [{
+ return AArch64_AM::isSVECpyImm<int64_t>(Imm);
+}]>;
+
+def addsub_imm8_opt_lsl_i8 : imm8_opt_lsl<8, "uint8_t", SVEAddSubImmOperand8, [{
+ return AArch64_AM::isSVEAddSubImm<int8_t>(Imm);
+}]>;
+def addsub_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "uint16_t", SVEAddSubImmOperand16, [{
+ return AArch64_AM::isSVEAddSubImm<int16_t>(Imm);
+}]>;
+def addsub_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "uint32_t", SVEAddSubImmOperand32, [{
+ return AArch64_AM::isSVEAddSubImm<int32_t>(Imm);
+}]>;
+def addsub_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "uint64_t", SVEAddSubImmOperand64, [{
+ return AArch64_AM::isSVEAddSubImm<int64_t>(Imm);
+}]>;
+
+class SVEExactFPImm<string Suffix, string ValA, string ValB> : AsmOperandClass {
+ let Name = "SVEExactFPImmOperand" # Suffix;
+ let DiagnosticType = "Invalid" # Name;
+ let ParserMethod = "tryParseFPImm<false>";
+ let PredicateMethod = "isExactFPImm<" # ValA # ", " # ValB # ">";
+ let RenderMethod = "addExactFPImmOperands<" # ValA # ", " # ValB # ">";
+}
+
+class SVEExactFPImmOperand<string Suffix, string ValA, string ValB> : Operand<i32> {
+ let PrintMethod = "printExactFPImm<" # ValA # ", " # ValB # ">";
+ let ParserMatchClass = SVEExactFPImm<Suffix, ValA, ValB>;
+}
+
+def sve_fpimm_half_one
+ : SVEExactFPImmOperand<"HalfOne", "AArch64ExactFPImm::half",
+ "AArch64ExactFPImm::one">;
+def sve_fpimm_half_two
+ : SVEExactFPImmOperand<"HalfTwo", "AArch64ExactFPImm::half",
+ "AArch64ExactFPImm::two">;
+def sve_fpimm_zero_one
+ : SVEExactFPImmOperand<"ZeroOne", "AArch64ExactFPImm::zero",
+ "AArch64ExactFPImm::one">;
+
+def sve_incdec_imm : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
+}]> {
+ let ParserMatchClass = Imm1_16Operand;
+ let EncoderMethod = "getSVEIncDecImm";
+ let DecoderMethod = "DecodeSVEIncDecImm";
+}
+
+//===----------------------------------------------------------------------===//
+// SVE PTrue - These are used extensively throughout the pattern matching so
+// it's important we define them first.
+//===----------------------------------------------------------------------===//
+
+class sve_int_ptrue<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty>
+: I<(outs pprty:$Pd), (ins sve_pred_enum:$pattern),
+ asm, "\t$Pd, $pattern",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<5> pattern;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-19} = 0b011;
+ let Inst{18-17} = opc{2-1};
+ let Inst{16} = opc{0};
+ let Inst{15-10} = 0b111000;
+ let Inst{9-5} = pattern;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = Pd;
+
+ let Defs = !if(!eq (opc{0}, 1), [NZCV], []);
+}
+
+multiclass sve_int_ptrue<bits<3> opc, string asm> {
+ def _B : sve_int_ptrue<0b00, opc, asm, PPR8>;
+ def _H : sve_int_ptrue<0b01, opc, asm, PPR16>;
+ def _S : sve_int_ptrue<0b10, opc, asm, PPR32>;
+ def _D : sve_int_ptrue<0b11, opc, asm, PPR64>;
+
+ def : InstAlias<asm # "\t$Pd",
+ (!cast<Instruction>(NAME # _B) PPR8:$Pd, 0b11111), 1>;
+ def : InstAlias<asm # "\t$Pd",
+ (!cast<Instruction>(NAME # _H) PPR16:$Pd, 0b11111), 1>;
+ def : InstAlias<asm # "\t$Pd",
+ (!cast<Instruction>(NAME # _S) PPR32:$Pd, 0b11111), 1>;
+ def : InstAlias<asm # "\t$Pd",
+ (!cast<Instruction>(NAME # _D) PPR64:$Pd, 0b11111), 1>;
+}
+
+let Predicates = [HasSVE] in {
+ defm PTRUE : sve_int_ptrue<0b000, "ptrue">;
+ defm PTRUES : sve_int_ptrue<0b001, "ptrues">;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Predicate Misc Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_pfalse<bits<6> opc, string asm>
+: I<(outs PPR8:$Pd), (ins),
+ asm, "\t$Pd",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = opc{5-4};
+ let Inst{21-19} = 0b011;
+ let Inst{18-16} = opc{3-1};
+ let Inst{15-10} = 0b111001;
+ let Inst{9} = opc{0};
+ let Inst{8-4} = 0b00000;
+ let Inst{3-0} = Pd;
+}
+
+class sve_int_ptest<bits<6> opc, string asm>
+: I<(outs), (ins PPRAny:$Pg, PPR8:$Pn),
+ asm, "\t$Pg, $Pn",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pg;
+ bits<4> Pn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = opc{5-4};
+ let Inst{21-19} = 0b010;
+ let Inst{18-16} = opc{3-1};
+ let Inst{15-14} = 0b11;
+ let Inst{13-10} = Pg;
+ let Inst{9} = opc{0};
+ let Inst{8-5} = Pn;
+ let Inst{4-0} = 0b00000;
+
+ let Defs = [NZCV];
+}
+
+class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm,
+ PPRRegOp pprty>
+: I<(outs pprty:$Pdn), (ins PPRAny:$Pg, pprty:$_Pdn),
+ asm, "\t$Pdn, $Pg, $_Pdn",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pdn;
+ bits<4> Pg;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-19} = 0b011;
+ let Inst{18-16} = opc{4-2};
+ let Inst{15-11} = 0b11000;
+ let Inst{10-9} = opc{1-0};
+ let Inst{8-5} = Pg;
+ let Inst{4} = 0;
+ let Inst{3-0} = Pdn;
+
+ let Constraints = "$Pdn = $_Pdn";
+ let Defs = [NZCV];
+}
+
+multiclass sve_int_pfirst<bits<5> opc, string asm> {
+ def : sve_int_pfirst_next<0b01, opc, asm, PPR8>;
+}
+
+multiclass sve_int_pnext<bits<5> opc, string asm> {
+ def _B : sve_int_pfirst_next<0b00, opc, asm, PPR8>;
+ def _H : sve_int_pfirst_next<0b01, opc, asm, PPR16>;
+ def _S : sve_int_pfirst_next<0b10, opc, asm, PPR32>;
+ def _D : sve_int_pfirst_next<0b11, opc, asm, PPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Predicate Count Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_count_r<bits<2> sz8_64, bits<5> opc, string asm,
+ RegisterOperand dty, PPRRegOp pprty, RegisterOperand sty>
+: I<(outs dty:$Rdn), (ins pprty:$Pg, sty:$_Rdn),
+ asm, "\t$Rdn, $Pg",
+ "",
+ []>, Sched<[]> {
+ bits<5> Rdn;
+ bits<4> Pg;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-19} = 0b101;
+ let Inst{18-16} = opc{4-2};
+ let Inst{15-11} = 0b10001;
+ let Inst{10-9} = opc{1-0};
+ let Inst{8-5} = Pg;
+ let Inst{4-0} = Rdn;
+
+ // Signed 32bit forms require their GPR operand printed.
+ let AsmString = !if(!eq(opc{4,2-0}, 0b0000),
+ !strconcat(asm, "\t$Rdn, $Pg, $_Rdn"),
+ !strconcat(asm, "\t$Rdn, $Pg"));
+ let Constraints = "$Rdn = $_Rdn";
+}
+
+multiclass sve_int_count_r_s32<bits<5> opc, string asm> {
+ def _B : sve_int_count_r<0b00, opc, asm, GPR64z, PPR8, GPR64as32>;
+ def _H : sve_int_count_r<0b01, opc, asm, GPR64z, PPR16, GPR64as32>;
+ def _S : sve_int_count_r<0b10, opc, asm, GPR64z, PPR32, GPR64as32>;
+ def _D : sve_int_count_r<0b11, opc, asm, GPR64z, PPR64, GPR64as32>;
+}
+
+multiclass sve_int_count_r_u32<bits<5> opc, string asm> {
+ def _B : sve_int_count_r<0b00, opc, asm, GPR32z, PPR8, GPR32z>;
+ def _H : sve_int_count_r<0b01, opc, asm, GPR32z, PPR16, GPR32z>;
+ def _S : sve_int_count_r<0b10, opc, asm, GPR32z, PPR32, GPR32z>;
+ def _D : sve_int_count_r<0b11, opc, asm, GPR32z, PPR64, GPR32z>;
+}
+
+multiclass sve_int_count_r_x64<bits<5> opc, string asm> {
+ def _B : sve_int_count_r<0b00, opc, asm, GPR64z, PPR8, GPR64z>;
+ def _H : sve_int_count_r<0b01, opc, asm, GPR64z, PPR16, GPR64z>;
+ def _S : sve_int_count_r<0b10, opc, asm, GPR64z, PPR32, GPR64z>;
+ def _D : sve_int_count_r<0b11, opc, asm, GPR64z, PPR64, GPR64z>;
+}
+
+class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, PPRAny:$Pg),
+ asm, "\t$Zdn, $Pg",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pg;
+ bits<5> Zdn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-19} = 0b101;
+ let Inst{18-16} = opc{4-2};
+ let Inst{15-11} = 0b10000;
+ let Inst{10-9} = opc{1-0};
+ let Inst{8-5} = Pg;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve_int_count_v<bits<5> opc, string asm> {
+ def _H : sve_int_count_v<0b01, opc, asm, ZPR16>;
+ def _S : sve_int_count_v<0b10, opc, asm, ZPR32>;
+ def _D : sve_int_count_v<0b11, opc, asm, ZPR64>;
+}
+
+class sve_int_pcount_pred<bits<2> sz8_64, bits<4> opc, string asm,
+ PPRRegOp pprty>
+: I<(outs GPR64:$Rd), (ins PPRAny:$Pg, pprty:$Pn),
+ asm, "\t$Rd, $Pg, $Pn",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pg;
+ bits<4> Pn;
+ bits<5> Rd;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-19} = 0b100;
+ let Inst{18-16} = opc{3-1};
+ let Inst{15-14} = 0b10;
+ let Inst{13-10} = Pg;
+ let Inst{9} = opc{0};
+ let Inst{8-5} = Pn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass sve_int_pcount_pred<bits<4> opc, string asm> {
+ def _B : sve_int_pcount_pred<0b00, opc, asm, PPR8>;
+ def _H : sve_int_pcount_pred<0b01, opc, asm, PPR16>;
+ def _S : sve_int_pcount_pred<0b10, opc, asm, PPR32>;
+ def _D : sve_int_pcount_pred<0b11, opc, asm, PPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Element Count Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_count<bits<3> opc, string asm>
+: I<(outs GPR64:$Rd), (ins sve_pred_enum:$pattern, sve_incdec_imm:$imm4),
+ asm, "\t$Rd, $pattern, mul $imm4",
+ "",
+ []>, Sched<[]> {
+ bits<5> Rd;
+ bits<4> imm4;
+ bits<5> pattern;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = opc{2-1};
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = imm4;
+ let Inst{15-11} = 0b11100;
+ let Inst{10} = opc{0};
+ let Inst{9-5} = pattern;
+ let Inst{4-0} = Rd;
+}
+
+multiclass sve_int_count<bits<3> opc, string asm> {
+ def NAME : sve_int_count<opc, asm>;
+
+ def : InstAlias<asm # "\t$Rd, $pattern",
+ (!cast<Instruction>(NAME) GPR64:$Rd, sve_pred_enum:$pattern, 1), 1>;
+ def : InstAlias<asm # "\t$Rd",
+ (!cast<Instruction>(NAME) GPR64:$Rd, 0b11111, 1), 2>;
+}
+
+class sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4),
+ asm, "\t$Zdn, $pattern, mul $imm4",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zdn;
+ bits<5> pattern;
+ bits<4> imm4;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = opc{4-3};
+ let Inst{21} = 0b1;
+ let Inst{20} = opc{2};
+ let Inst{19-16} = imm4;
+ let Inst{15-12} = 0b1100;
+ let Inst{11-10} = opc{1-0};
+ let Inst{9-5} = pattern;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty> {
+ def NAME : sve_int_countvlv<opc, asm, zprty>;
+
+ def : InstAlias<asm # "\t$Zdn, $pattern",
+ (!cast<Instruction>(NAME) zprty:$Zdn, sve_pred_enum:$pattern, 1), 1>;
+ def : InstAlias<asm # "\t$Zdn",
+ (!cast<Instruction>(NAME) zprty:$Zdn, 0b11111, 1), 2>;
+}
+
+class sve_int_pred_pattern_a<bits<3> opc, string asm>
+: I<(outs GPR64:$Rdn), (ins GPR64:$_Rdn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4),
+ asm, "\t$Rdn, $pattern, mul $imm4",
+ "",
+ []>, Sched<[]> {
+ bits<5> Rdn;
+ bits<5> pattern;
+ bits<4> imm4;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = opc{2-1};
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = imm4;
+ let Inst{15-11} = 0b11100;
+ let Inst{10} = opc{0};
+ let Inst{9-5} = pattern;
+ let Inst{4-0} = Rdn;
+
+ let Constraints = "$Rdn = $_Rdn";
+}
+
+multiclass sve_int_pred_pattern_a<bits<3> opc, string asm> {
+ def NAME : sve_int_pred_pattern_a<opc, asm>;
+
+ def : InstAlias<asm # "\t$Rdn, $pattern",
+ (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1), 1>;
+ def : InstAlias<asm # "\t$Rdn",
+ (!cast<Instruction>(NAME) GPR64:$Rdn, 0b11111, 1), 2>;
+}
+
+class sve_int_pred_pattern_b<bits<5> opc, string asm, RegisterOperand dt,
+ RegisterOperand st>
+: I<(outs dt:$Rdn), (ins st:$_Rdn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4),
+ asm, "\t$Rdn, $pattern, mul $imm4",
+ "",
+ []>, Sched<[]> {
+ bits<5> Rdn;
+ bits<5> pattern;
+ bits<4> imm4;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = opc{4-3};
+ let Inst{21} = 0b1;
+ let Inst{20} = opc{2};
+ let Inst{19-16} = imm4;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-10} = opc{1-0};
+ let Inst{9-5} = pattern;
+ let Inst{4-0} = Rdn;
+
+ // Signed 32bit forms require their GPR operand printed.
+ let AsmString = !if(!eq(opc{2,0}, 0b00),
+ !strconcat(asm, "\t$Rdn, $_Rdn, $pattern, mul $imm4"),
+ !strconcat(asm, "\t$Rdn, $pattern, mul $imm4"));
+
+ let Constraints = "$Rdn = $_Rdn";
+}
+
+multiclass sve_int_pred_pattern_b_s32<bits<5> opc, string asm> {
+ def NAME : sve_int_pred_pattern_b<opc, asm, GPR64z, GPR64as32>;
+
+ def : InstAlias<asm # "\t$Rd, $Rn, $pattern",
+ (!cast<Instruction>(NAME) GPR64z:$Rd, GPR64as32:$Rn, sve_pred_enum:$pattern, 1), 1>;
+ def : InstAlias<asm # "\t$Rd, $Rn",
+ (!cast<Instruction>(NAME) GPR64z:$Rd, GPR64as32:$Rn, 0b11111, 1), 2>;
+}
+
+multiclass sve_int_pred_pattern_b_u32<bits<5> opc, string asm> {
+ def NAME : sve_int_pred_pattern_b<opc, asm, GPR32z, GPR32z>;
+
+ def : InstAlias<asm # "\t$Rdn, $pattern",
+ (!cast<Instruction>(NAME) GPR32z:$Rdn, sve_pred_enum:$pattern, 1), 1>;
+ def : InstAlias<asm # "\t$Rdn",
+ (!cast<Instruction>(NAME) GPR32z:$Rdn, 0b11111, 1), 2>;
+}
+
+multiclass sve_int_pred_pattern_b_x64<bits<5> opc, string asm> {
+ def NAME : sve_int_pred_pattern_b<opc, asm, GPR64z, GPR64z>;
+
+ def : InstAlias<asm # "\t$Rdn, $pattern",
+ (!cast<Instruction>(NAME) GPR64z:$Rdn, sve_pred_enum:$pattern, 1), 1>;
+ def : InstAlias<asm # "\t$Rdn",
+ (!cast<Instruction>(NAME) GPR64z:$Rdn, 0b11111, 1), 2>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Permute - Cross Lane Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_perm_dup_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
+ RegisterClass srcRegType>
+: I<(outs zprty:$Zd), (ins srcRegType:$Rn),
+ asm, "\t$Zd, $Rn",
+ "",
+ []>, Sched<[]> {
+ bits<5> Rn;
+ bits<5> Zd;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-10} = 0b100000001110;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_perm_dup_r<string asm> {
+ def _B : sve_int_perm_dup_r<0b00, asm, ZPR8, GPR32sp>;
+ def _H : sve_int_perm_dup_r<0b01, asm, ZPR16, GPR32sp>;
+ def _S : sve_int_perm_dup_r<0b10, asm, ZPR32, GPR32sp>;
+ def _D : sve_int_perm_dup_r<0b11, asm, ZPR64, GPR64sp>;
+
+ def : InstAlias<"mov $Zd, $Rn",
+ (!cast<Instruction>(NAME # _B) ZPR8:$Zd, GPR32sp:$Rn), 1>;
+ def : InstAlias<"mov $Zd, $Rn",
+ (!cast<Instruction>(NAME # _H) ZPR16:$Zd, GPR32sp:$Rn), 1>;
+ def : InstAlias<"mov $Zd, $Rn",
+ (!cast<Instruction>(NAME # _S) ZPR32:$Zd, GPR32sp:$Rn), 1>;
+ def : InstAlias<"mov $Zd, $Rn",
+ (!cast<Instruction>(NAME # _D) ZPR64:$Zd, GPR64sp:$Rn), 1>;
+}
+
+class sve_int_perm_dup_i<bits<5> tsz, Operand immtype, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$idx),
+ asm, "\t$Zd, $Zn$idx",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<7> idx;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = {?,?}; // imm3h
+ let Inst{21} = 0b1;
+ let Inst{20-16} = tsz;
+ let Inst{15-10} = 0b001000;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_perm_dup_i<string asm> {
+ def _B : sve_int_perm_dup_i<{?,?,?,?,1}, sve_elm_idx_extdup_b, asm, ZPR8> {
+ let Inst{23-22} = idx{5-4};
+ let Inst{20-17} = idx{3-0};
+ }
+ def _H : sve_int_perm_dup_i<{?,?,?,1,0}, sve_elm_idx_extdup_h, asm, ZPR16> {
+ let Inst{23-22} = idx{4-3};
+ let Inst{20-18} = idx{2-0};
+ }
+ def _S : sve_int_perm_dup_i<{?,?,1,0,0}, sve_elm_idx_extdup_s, asm, ZPR32> {
+ let Inst{23-22} = idx{3-2};
+ let Inst{20-19} = idx{1-0};
+ }
+ def _D : sve_int_perm_dup_i<{?,1,0,0,0}, sve_elm_idx_extdup_d, asm, ZPR64> {
+ let Inst{23-22} = idx{2-1};
+ let Inst{20} = idx{0};
+ }
+ def _Q : sve_int_perm_dup_i<{1,0,0,0,0}, sve_elm_idx_extdup_q, asm, ZPR128> {
+ let Inst{23-22} = idx{1-0};
+ }
+
+ def : InstAlias<"mov $Zd, $Zn$idx",
+ (!cast<Instruction>(NAME # _B) ZPR8:$Zd, ZPR8:$Zn, sve_elm_idx_extdup_b:$idx), 1>;
+ def : InstAlias<"mov $Zd, $Zn$idx",
+ (!cast<Instruction>(NAME # _H) ZPR16:$Zd, ZPR16:$Zn, sve_elm_idx_extdup_h:$idx), 1>;
+ def : InstAlias<"mov $Zd, $Zn$idx",
+ (!cast<Instruction>(NAME # _S) ZPR32:$Zd, ZPR32:$Zn, sve_elm_idx_extdup_s:$idx), 1>;
+ def : InstAlias<"mov $Zd, $Zn$idx",
+ (!cast<Instruction>(NAME # _D) ZPR64:$Zd, ZPR64:$Zn, sve_elm_idx_extdup_d:$idx), 1>;
+ def : InstAlias<"mov $Zd, $Zn$idx",
+ (!cast<Instruction>(NAME # _Q) ZPR128:$Zd, ZPR128:$Zn, sve_elm_idx_extdup_q:$idx), 1>;
+ def : InstAlias<"mov $Zd, $Bn",
+ (!cast<Instruction>(NAME # _B) ZPR8:$Zd, FPR8asZPR:$Bn, 0), 2>;
+ def : InstAlias<"mov $Zd, $Hn",
+ (!cast<Instruction>(NAME # _H) ZPR16:$Zd, FPR16asZPR:$Hn, 0), 2>;
+ def : InstAlias<"mov $Zd, $Sn",
+ (!cast<Instruction>(NAME # _S) ZPR32:$Zd, FPR32asZPR:$Sn, 0), 2>;
+ def : InstAlias<"mov $Zd, $Dn",
+ (!cast<Instruction>(NAME # _D) ZPR64:$Zd, FPR64asZPR:$Dn, 0), 2>;
+ def : InstAlias<"mov $Zd, $Qn",
+ (!cast<Instruction>(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>;
+}
+
+class sve_int_perm_tbl<bits<2> sz8_64, string asm, ZPRRegOp zprty,
+ RegisterOperand VecList>
+: I<(outs zprty:$Zd), (ins VecList:$Zn, zprty:$Zm),
+ asm, "\t$Zd, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-10} = 0b001100;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_perm_tbl<string asm> {
+ def _B : sve_int_perm_tbl<0b00, asm, ZPR8, Z_b>;
+ def _H : sve_int_perm_tbl<0b01, asm, ZPR16, Z_h>;
+ def _S : sve_int_perm_tbl<0b10, asm, ZPR32, Z_s>;
+ def _D : sve_int_perm_tbl<0b11, asm, ZPR64, Z_d>;
+
+ def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
+ (!cast<Instruction>(NAME # _B) ZPR8:$Zd, ZPR8:$Zn, ZPR8:$Zm), 0>;
+ def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
+ (!cast<Instruction>(NAME # _H) ZPR16:$Zd, ZPR16:$Zn, ZPR16:$Zm), 0>;
+ def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
+ (!cast<Instruction>(NAME # _S) ZPR32:$Zd, ZPR32:$Zn, ZPR32:$Zm), 0>;
+ def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
+ (!cast<Instruction>(NAME # _D) ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zm), 0>;
+}
+
+class sve_int_perm_reverse_z<bits<2> sz8_64, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn),
+ asm, "\t$Zd, $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-10} = 0b111000001110;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_perm_reverse_z<string asm> {
+ def _B : sve_int_perm_reverse_z<0b00, asm, ZPR8>;
+ def _H : sve_int_perm_reverse_z<0b01, asm, ZPR16>;
+ def _S : sve_int_perm_reverse_z<0b10, asm, ZPR32>;
+ def _D : sve_int_perm_reverse_z<0b11, asm, ZPR64>;
+}
+
+class sve_int_perm_reverse_p<bits<2> sz8_64, string asm, PPRRegOp pprty>
+: I<(outs pprty:$Pd), (ins pprty:$Pn),
+ asm, "\t$Pd, $Pn",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<4> Pn;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-9} = 0b1101000100000;
+ let Inst{8-5} = Pn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = Pd;
+}
+
+multiclass sve_int_perm_reverse_p<string asm> {
+ def _B : sve_int_perm_reverse_p<0b00, asm, PPR8>;
+ def _H : sve_int_perm_reverse_p<0b01, asm, PPR16>;
+ def _S : sve_int_perm_reverse_p<0b10, asm, PPR32>;
+ def _D : sve_int_perm_reverse_p<0b11, asm, PPR64>;
+}
+
+class sve_int_perm_unpk<bits<2> sz16_64, bits<2> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn),
+ asm, "\t$Zd, $Zn",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz16_64;
+ let Inst{21-18} = 0b1100;
+ let Inst{17-16} = opc;
+ let Inst{15-10} = 0b001110;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_perm_unpk<bits<2> opc, string asm> {
+ def _H : sve_int_perm_unpk<0b01, opc, asm, ZPR16, ZPR8>;
+ def _S : sve_int_perm_unpk<0b10, opc, asm, ZPR32, ZPR16>;
+ def _D : sve_int_perm_unpk<0b11, opc, asm, ZPR64, ZPR32>;
+}
+
+class sve_int_perm_insrs<bits<2> sz8_64, string asm, ZPRRegOp zprty,
+ RegisterClass srcRegType>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, srcRegType:$Rm),
+ asm, "\t$Zdn, $Rm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Rm;
+ bits<5> Zdn;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-10} = 0b100100001110;
+ let Inst{9-5} = Rm;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve_int_perm_insrs<string asm> {
+ def _B : sve_int_perm_insrs<0b00, asm, ZPR8, GPR32>;
+ def _H : sve_int_perm_insrs<0b01, asm, ZPR16, GPR32>;
+ def _S : sve_int_perm_insrs<0b10, asm, ZPR32, GPR32>;
+ def _D : sve_int_perm_insrs<0b11, asm, ZPR64, GPR64>;
+}
+
+class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty,
+ RegisterClass srcRegType>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, srcRegType:$Vm),
+ asm, "\t$Zdn, $Vm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Vm;
+ bits<5> Zdn;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-10} = 0b110100001110;
+ let Inst{9-5} = Vm;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve_int_perm_insrv<string asm> {
+ def _B : sve_int_perm_insrv<0b00, asm, ZPR8, FPR8>;
+ def _H : sve_int_perm_insrv<0b01, asm, ZPR16, FPR16>;
+ def _S : sve_int_perm_insrv<0b10, asm, ZPR32, FPR32>;
+ def _D : sve_int_perm_insrv<0b11, asm, ZPR64, FPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Permute - Extract Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_perm_extract_i<string asm>
+: I<(outs ZPR8:$Zdn), (ins ZPR8:$_Zdn, ZPR8:$Zm, imm0_255:$imm8),
+ asm, "\t$Zdn, $_Zdn, $Zm, $imm8",
+ "", []>, Sched<[]> {
+ bits<5> Zdn;
+ bits<5> Zm;
+ bits<8> imm8;
+ let Inst{31-21} = 0b00000101001;
+ let Inst{20-16} = imm8{7-3};
+ let Inst{15-13} = 0b000;
+ let Inst{12-10} = imm8{2-0};
+ let Inst{9-5} = Zm;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Vector Select Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_sel_vvv<bits<2> sz8_64, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins PPRAny:$Pg, zprty:$Zn, zprty:$Zm),
+ asm, "\t$Zd, $Pg, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pg;
+ bits<5> Zd;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-14} = 0b11;
+ let Inst{13-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_sel_vvv<string asm> {
+ def _B : sve_int_sel_vvv<0b00, asm, ZPR8>;
+ def _H : sve_int_sel_vvv<0b01, asm, ZPR16>;
+ def _S : sve_int_sel_vvv<0b10, asm, ZPR32>;
+ def _D : sve_int_sel_vvv<0b11, asm, ZPR64>;
+
+ def : InstAlias<"mov $Zd, $Pg/m, $Zn",
+ (!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, ZPR8:$Zn, ZPR8:$Zd), 1>;
+ def : InstAlias<"mov $Zd, $Pg/m, $Zn",
+ (!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, ZPR16:$Zn, ZPR16:$Zd), 1>;
+ def : InstAlias<"mov $Zd, $Pg/m, $Zn",
+ (!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, ZPR32:$Zn, ZPR32:$Zd), 1>;
+ def : InstAlias<"mov $Zd, $Pg/m, $Zn",
+ (!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, ZPR64:$Zn, ZPR64:$Zd), 1>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Predicate Logical Operations Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_pred_log<bits<4> opc, string asm>
+: I<(outs PPR8:$Pd), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$Pm),
+ asm, "\t$Pd, $Pg/z, $Pn, $Pm",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<4> Pg;
+ bits<4> Pm;
+ bits<4> Pn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = opc{3-2};
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = Pm;
+ let Inst{15-14} = 0b01;
+ let Inst{13-10} = Pg;
+ let Inst{9} = opc{1};
+ let Inst{8-5} = Pn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+
+ // SEL has no predication qualifier.
+ let AsmString = !if(!eq(opc, 0b0011),
+ !strconcat(asm, "\t$Pd, $Pg, $Pn, $Pm"),
+ !strconcat(asm, "\t$Pd, $Pg/z, $Pn, $Pm"));
+
+ let Defs = !if(!eq (opc{2}, 1), [NZCV], []);
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Logical Mask Immediate Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_log_imm<bits<2> opc, string asm>
+: I<(outs ZPR64:$Zdn), (ins ZPR64:$_Zdn, logical_imm64:$imms13),
+ asm, "\t$Zdn, $_Zdn, $imms13",
+ "", []>, Sched<[]> {
+ bits<5> Zdn;
+ bits<13> imms13;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = opc;
+ let Inst{21-18} = 0b0000;
+ let Inst{17-5} = imms13;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DecoderMethod = "DecodeSVELogicalImmInstruction";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve_int_log_imm<bits<2> opc, string asm, string alias> {
+ def NAME : sve_int_log_imm<opc, asm>;
+
+ def : InstAlias<asm # "\t$Zdn, $Zdn, $imm",
+ (!cast<Instruction>(NAME) ZPR8:$Zdn, sve_logical_imm8:$imm), 4>;
+ def : InstAlias<asm # "\t$Zdn, $Zdn, $imm",
+ (!cast<Instruction>(NAME) ZPR16:$Zdn, sve_logical_imm16:$imm), 3>;
+ def : InstAlias<asm # "\t$Zdn, $Zdn, $imm",
+ (!cast<Instruction>(NAME) ZPR32:$Zdn, sve_logical_imm32:$imm), 2>;
+
+ def : InstAlias<alias # "\t$Zdn, $Zdn, $imm",
+ (!cast<Instruction>(NAME) ZPR8:$Zdn, sve_logical_imm8_not:$imm), 0>;
+ def : InstAlias<alias # "\t$Zdn, $Zdn, $imm",
+ (!cast<Instruction>(NAME) ZPR16:$Zdn, sve_logical_imm16_not:$imm), 0>;
+ def : InstAlias<alias # "\t$Zdn, $Zdn, $imm",
+ (!cast<Instruction>(NAME) ZPR32:$Zdn, sve_logical_imm32_not:$imm), 0>;
+ def : InstAlias<alias # "\t$Zdn, $Zdn, $imm",
+ (!cast<Instruction>(NAME) ZPR64:$Zdn, logical_imm64_not:$imm), 0>;
+}
+
+class sve_int_dup_mask_imm<string asm>
+: I<(outs ZPR64:$Zd), (ins logical_imm64:$imms),
+ asm, "\t$Zd, $imms",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<13> imms;
+ let Inst{31-18} = 0b00000101110000;
+ let Inst{17-5} = imms;
+ let Inst{4-0} = Zd;
+
+ let isReMaterializable = 1;
+ let DecoderMethod = "DecodeSVELogicalImmInstruction";
+}
+
+multiclass sve_int_dup_mask_imm<string asm> {
+ def NAME : sve_int_dup_mask_imm<asm>;
+
+ def : InstAlias<"dupm $Zd, $imm",
+ (!cast<Instruction>(NAME) ZPR8:$Zd, sve_logical_imm8:$imm), 4>;
+ def : InstAlias<"dupm $Zd, $imm",
+ (!cast<Instruction>(NAME) ZPR16:$Zd, sve_logical_imm16:$imm), 3>;
+ def : InstAlias<"dupm $Zd, $imm",
+ (!cast<Instruction>(NAME) ZPR32:$Zd, sve_logical_imm32:$imm), 2>;
+
+ // All Zd.b forms have a CPY/DUP equivalent, hence no byte alias here.
+ def : InstAlias<"mov $Zd, $imm",
+ (!cast<Instruction>(NAME) ZPR16:$Zd, sve_preferred_logical_imm16:$imm), 7>;
+ def : InstAlias<"mov $Zd, $imm",
+ (!cast<Instruction>(NAME) ZPR32:$Zd, sve_preferred_logical_imm32:$imm), 6>;
+ def : InstAlias<"mov $Zd, $imm",
+ (!cast<Instruction>(NAME) ZPR64:$Zd, sve_preferred_logical_imm64:$imm), 5>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Arithmetic - Unpredicated Group.
+//===----------------------------------------------------------------------===//
+
+class sve_int_bin_cons_arit_0<bits<2> sz8_64, bits<3> opc, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
+ asm, "\t$Zd, $Zn, $Zm",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-13} = 0b000;
+ let Inst{12-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_bin_cons_arit_0<bits<3> opc, string asm> {
+ def _B : sve_int_bin_cons_arit_0<0b00, opc, asm, ZPR8>;
+ def _H : sve_int_bin_cons_arit_0<0b01, opc, asm, ZPR16>;
+ def _S : sve_int_bin_cons_arit_0<0b10, opc, asm, ZPR32>;
+ def _D : sve_int_bin_cons_arit_0<0b11, opc, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Floating Point Arithmetic - Predicated Group
+//===----------------------------------------------------------------------===//
+
+class sve_fp_2op_i_p_zds<bits<2> sz, bits<3> opc, string asm,
+ ZPRRegOp zprty,
+ Operand imm_ty>
+: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, imm_ty:$i1),
+ asm, "\t$Zdn, $Pg/m, $_Zdn, $i1",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zdn;
+ bit i1;
+ let Inst{31-24} = 0b01100101;
+ let Inst{23-22} = sz;
+ let Inst{21-19} = 0b011;
+ let Inst{18-16} = opc;
+ let Inst{15-13} = 0b100;
+ let Inst{12-10} = Pg;
+ let Inst{9-6} = 0b0000;
+ let Inst{5} = i1;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_fp_2op_i_p_zds<bits<3> opc, string asm, Operand imm_ty> {
+ def _H : sve_fp_2op_i_p_zds<0b01, opc, asm, ZPR16, imm_ty>;
+ def _S : sve_fp_2op_i_p_zds<0b10, opc, asm, ZPR32, imm_ty>;
+ def _D : sve_fp_2op_i_p_zds<0b11, opc, asm, ZPR64, imm_ty>;
+}
+
+class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm),
+ asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zdn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b01100101;
+ let Inst{23-22} = sz;
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = opc;
+ let Inst{15-13} = 0b100;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zm;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_fp_2op_p_zds<bits<4> opc, string asm> {
+ def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>;
+ def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>;
+ def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>;
+}
+
+class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm0_7:$imm3),
+ asm, "\t$Zdn, $_Zdn, $Zm, $imm3",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zdn;
+ bits<5> Zm;
+ bits<3> imm3;
+ let Inst{31-24} = 0b01100101;
+ let Inst{23-22} = sz;
+ let Inst{21-19} = 0b010;
+ let Inst{18-16} = imm3;
+ let Inst{15-10} = 0b100000;
+ let Inst{9-5} = Zm;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve_fp_ftmad<string asm> {
+ def _H : sve_fp_ftmad<0b01, asm, ZPR16>;
+ def _S : sve_fp_ftmad<0b10, asm, ZPR32>;
+ def _D : sve_fp_ftmad<0b11, asm, ZPR64>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Floating Point Arithmetic - Unpredicated Group
+//===----------------------------------------------------------------------===//
+
+class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
+ asm, "\t$Zd, $Zn, $Zm",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-24} = 0b01100101;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Zm;
+ let Inst{15-13} = 0b000;
+ let Inst{12-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_fp_3op_u_zd<bits<3> opc, string asm> {
+ def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>;
+ def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>;
+ def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Floating Point Fused Multiply-Add Group
+//===----------------------------------------------------------------------===//
+
+class sve_fp_3op_p_zds_a<bits<2> sz, bits<2> opc, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zda), (ins PPR3bAny:$Pg, zprty:$_Zda, zprty:$Zn, zprty:$Zm),
+ asm, "\t$Zda, $Pg/m, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zda;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-24} = 0b01100101;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15} = 0b0;
+ let Inst{14-13} = opc;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm> {
+ def _H : sve_fp_3op_p_zds_a<0b01, opc, asm, ZPR16>;
+ def _S : sve_fp_3op_p_zds_a<0b10, opc, asm, ZPR32>;
+ def _D : sve_fp_3op_p_zds_a<0b11, opc, asm, ZPR64>;
+}
+
+class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm, zprty:$Za),
+ asm, "\t$Zdn, $Pg/m, $Zm, $Za",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Za;
+ bits<5> Zdn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b01100101;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Za;
+ let Inst{15} = 0b1;
+ let Inst{14-13} = opc;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zm;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm> {
+ def _H : sve_fp_3op_p_zds_b<0b01, opc, asm, ZPR16>;
+ def _S : sve_fp_3op_p_zds_b<0b10, opc, asm, ZPR32>;
+ def _D : sve_fp_3op_p_zds_b<0b11, opc, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Floating Point Multiply-Add - Indexed Group
+//===----------------------------------------------------------------------===//
+
+class sve_fp_fma_by_indexed_elem<bits<2> sz, bit opc, string asm,
+ ZPRRegOp zprty1,
+ ZPRRegOp zprty2, Operand itype>
+: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty1:$Zn, zprty2:$Zm, itype:$iop),
+ asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ let Inst{31-24} = 0b01100100;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{15-11} = 0;
+ let Inst{10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve_fp_fma_by_indexed_elem<bit opc, string asm> {
+ def _H : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, VectorIndexH> {
+ bits<3> Zm;
+ bits<3> iop;
+ let Inst{22} = iop{2};
+ let Inst{20-19} = iop{1-0};
+ let Inst{18-16} = Zm;
+ }
+ def _S : sve_fp_fma_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR3b32, VectorIndexS> {
+ bits<3> Zm;
+ bits<2> iop;
+ let Inst{20-19} = iop;
+ let Inst{18-16} = Zm;
+ }
+ def _D : sve_fp_fma_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR4b64, VectorIndexD> {
+ bits<4> Zm;
+ bit iop;
+ let Inst{20} = iop;
+ let Inst{19-16} = Zm;
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Floating Point Multiply - Indexed Group
+//===----------------------------------------------------------------------===//
+
+class sve_fp_fmul_by_indexed_elem<bits<2> sz, string asm, ZPRRegOp zprty,
+ ZPRRegOp zprty2, Operand itype>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty2:$Zm, itype:$iop),
+ asm, "\t$Zd, $Zn, $Zm$iop", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b01100100;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{15-10} = 0b001000;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_fp_fmul_by_indexed_elem<string asm> {
+ def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, asm, ZPR16, ZPR3b16, VectorIndexH> {
+ bits<3> Zm;
+ bits<3> iop;
+ let Inst{22} = iop{2};
+ let Inst{20-19} = iop{1-0};
+ let Inst{18-16} = Zm;
+ }
+ def _S : sve_fp_fmul_by_indexed_elem<0b10, asm, ZPR32, ZPR3b32, VectorIndexS> {
+ bits<3> Zm;
+ bits<2> iop;
+ let Inst{20-19} = iop;
+ let Inst{18-16} = Zm;
+ }
+ def _D : sve_fp_fmul_by_indexed_elem<0b11, asm, ZPR64, ZPR4b64, VectorIndexD> {
+ bits<4> Zm;
+ bit iop;
+ let Inst{20} = iop;
+ let Inst{19-16} = Zm;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Floating Point Complex Multiply-Add Group
+//===----------------------------------------------------------------------===//
+
+class sve_fp_fcmla<bits<2> sz, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zda), (ins PPR3bAny:$Pg, zprty:$_Zda, zprty:$Zn, zprty:$Zm,
+ complexrotateop:$imm),
+ asm, "\t$Zda, $Pg/m, $Zn, $Zm, $imm",
+ "", []>, Sched<[]> {
+ bits<5> Zda;
+ bits<3> Pg;
+ bits<5> Zn;
+ bits<5> Zm;
+ bits<2> imm;
+ let Inst{31-24} = 0b01100100;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0;
+ let Inst{20-16} = Zm;
+ let Inst{15} = 0;
+ let Inst{14-13} = imm;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_fp_fcmla<string asm> {
+ def _H : sve_fp_fcmla<0b01, asm, ZPR16>;
+ def _S : sve_fp_fcmla<0b10, asm, ZPR32>;
+ def _D : sve_fp_fcmla<0b11, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Floating Point Complex Multiply-Add - Indexed Group
+//===----------------------------------------------------------------------===//
+
+class sve_fp_fcmla_by_indexed_elem<bits<2> sz, string asm,
+ ZPRRegOp zprty,
+ ZPRRegOp zprty2, Operand itype>
+: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, zprty2:$Zm, itype:$iop,
+ complexrotateop:$imm),
+ asm, "\t$Zda, $Zn, $Zm$iop, $imm",
+ "", []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<2> imm;
+ let Inst{31-24} = 0b01100100;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{15-12} = 0b0001;
+ let Inst{11-10} = imm;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve_fp_fcmla_by_indexed_elem<string asm> {
+ def _H : sve_fp_fcmla_by_indexed_elem<0b10, asm, ZPR16, ZPR3b16, VectorIndexS> {
+ bits<3> Zm;
+ bits<2> iop;
+ let Inst{20-19} = iop;
+ let Inst{18-16} = Zm;
+ }
+ def _S : sve_fp_fcmla_by_indexed_elem<0b11, asm, ZPR32, ZPR4b32, VectorIndexD> {
+ bits<4> Zm;
+ bits<1> iop;
+ let Inst{20} = iop;
+ let Inst{19-16} = Zm;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Floating Point Complex Addition Group
+//===----------------------------------------------------------------------===//
+
+class sve_fp_fcadd<bits<2> sz, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm,
+ complexrotateopodd:$imm),
+ asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm, $imm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zdn;
+ bits<5> Zm;
+ bits<3> Pg;
+ bit imm;
+ let Inst{31-24} = 0b01100100;
+ let Inst{23-22} = sz;
+ let Inst{21-17} = 0;
+ let Inst{16} = imm;
+ let Inst{15-13} = 0b100;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zm;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_fp_fcadd<string asm> {
+ def _H : sve_fp_fcadd<0b01, asm, ZPR16>;
+ def _S : sve_fp_fcadd<0b10, asm, ZPR32>;
+ def _D : sve_fp_fcadd<0b11, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Stack Allocation Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_arith_vl<bit opc, string asm>
+: I<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, simm6_32b:$imm6),
+ asm, "\t$Rd, $Rn, $imm6",
+ "",
+ []>, Sched<[]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<6> imm6;
+ let Inst{31-23} = 0b000001000;
+ let Inst{22} = opc;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rn;
+ let Inst{15-11} = 0b01010;
+ let Inst{10-5} = imm6;
+ let Inst{4-0} = Rd;
+}
+
+class sve_int_read_vl_a<bit op, bits<5> opc2, string asm>
+: I<(outs GPR64:$Rd), (ins simm6_32b:$imm6),
+ asm, "\t$Rd, $imm6",
+ "",
+ []>, Sched<[]> {
+ bits<5> Rd;
+ bits<6> imm6;
+ let Inst{31-23} = 0b000001001;
+ let Inst{22} = op;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = opc2{4-0};
+ let Inst{15-11} = 0b01010;
+ let Inst{10-5} = imm6;
+ let Inst{4-0} = Rd;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Permute - In Lane Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_perm_bin_perm_zz<bits<3> opc, bits<2> sz8_64, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
+ asm, "\t$Zd, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-13} = 0b011;
+ let Inst{12-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm> {
+ def _B : sve_int_perm_bin_perm_zz<opc, 0b00, asm, ZPR8>;
+ def _H : sve_int_perm_bin_perm_zz<opc, 0b01, asm, ZPR16>;
+ def _S : sve_int_perm_bin_perm_zz<opc, 0b10, asm, ZPR32>;
+ def _D : sve_int_perm_bin_perm_zz<opc, 0b11, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Floating Point Unary Operations Group
+//===----------------------------------------------------------------------===//
+
+class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
+ RegisterOperand o_zprtype, ElementSizeEnum size>
+: I<(outs o_zprtype:$Zd), (ins i_zprtype:$_Zd, PPR3bAny:$Pg, i_zprtype:$Zn),
+ asm, "\t$Zd, $Pg/m, $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b01100101;
+ let Inst{23-22} = opc{6-5};
+ let Inst{21} = 0b0;
+ let Inst{20-16} = opc{4-0};
+ let Inst{15-13} = 0b101;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = size;
+}
+
+multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm> {
+ def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>;
+ def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
+ def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Floating Point Unary Operations - Unpredicated Group
+//===----------------------------------------------------------------------===//
+
+class sve_fp_2op_u_zd<bits<2> sz, bits<3> opc, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn),
+ asm, "\t$Zd, $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b01100101;
+ let Inst{23-22} = sz;
+ let Inst{21-19} = 0b001;
+ let Inst{18-16} = opc;
+ let Inst{15-10} = 0b001100;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_fp_2op_u_zd<bits<3> opc, string asm> {
+ def _H : sve_fp_2op_u_zd<0b01, opc, asm, ZPR16>;
+ def _S : sve_fp_2op_u_zd<0b10, opc, asm, ZPR32>;
+ def _D : sve_fp_2op_u_zd<0b11, opc, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Arithmetic - Binary Predicated Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_bin_pred_arit_log<bits<2> sz8_64, bits<2> fmt, bits<3> opc,
+ string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm),
+ asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm", "", []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zdn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b0;
+ let Inst{20-19} = fmt;
+ let Inst{18-16} = opc;
+ let Inst{15-13} = 0b000;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zm;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_int_bin_pred_log<bits<3> opc, string asm> {
+ def _B : sve_int_bin_pred_arit_log<0b00, 0b11, opc, asm, ZPR8>;
+ def _H : sve_int_bin_pred_arit_log<0b01, 0b11, opc, asm, ZPR16>;
+ def _S : sve_int_bin_pred_arit_log<0b10, 0b11, opc, asm, ZPR32>;
+ def _D : sve_int_bin_pred_arit_log<0b11, 0b11, opc, asm, ZPR64>;
+}
+
+multiclass sve_int_bin_pred_arit_0<bits<3> opc, string asm> {
+ def _B : sve_int_bin_pred_arit_log<0b00, 0b00, opc, asm, ZPR8>;
+ def _H : sve_int_bin_pred_arit_log<0b01, 0b00, opc, asm, ZPR16>;
+ def _S : sve_int_bin_pred_arit_log<0b10, 0b00, opc, asm, ZPR32>;
+ def _D : sve_int_bin_pred_arit_log<0b11, 0b00, opc, asm, ZPR64>;
+}
+
+multiclass sve_int_bin_pred_arit_1<bits<3> opc, string asm> {
+ def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>;
+ def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>;
+ def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>;
+ def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>;
+}
+
+multiclass sve_int_bin_pred_arit_2<bits<3> opc, string asm> {
+ def _B : sve_int_bin_pred_arit_log<0b00, 0b10, opc, asm, ZPR8>;
+ def _H : sve_int_bin_pred_arit_log<0b01, 0b10, opc, asm, ZPR16>;
+ def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>;
+ def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>;
+}
+
+// Special case for divides which are not defined for 8b/16b elements.
+multiclass sve_int_bin_pred_arit_2_div<bits<3> opc, string asm> {
+ def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>;
+ def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Multiply-Add Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_mladdsub_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm, zprty:$Za),
+ asm, "\t$Zdn, $Pg/m, $Zm, $Za",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zdn;
+ bits<5> Za;
+ bits<5> Zm;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Zm;
+ let Inst{15-14} = 0b11;
+ let Inst{13} = opc;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Za;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_int_mladdsub_vvv_pred<bits<1> opc, string asm> {
+ def _B : sve_int_mladdsub_vvv_pred<0b00, opc, asm, ZPR8>;
+ def _H : sve_int_mladdsub_vvv_pred<0b01, opc, asm, ZPR16>;
+ def _S : sve_int_mladdsub_vvv_pred<0b10, opc, asm, ZPR32>;
+ def _D : sve_int_mladdsub_vvv_pred<0b11, opc, asm, ZPR64>;
+}
+
+class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zda), (ins PPR3bAny:$Pg, zprty:$_Zda, zprty:$Zn, zprty:$Zm),
+ asm, "\t$Zda, $Pg/m, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zda;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Zm;
+ let Inst{15-14} = 0b01;
+ let Inst{13} = opc;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm> {
+ def _B : sve_int_mlas_vvv_pred<0b00, opc, asm, ZPR8>;
+ def _H : sve_int_mlas_vvv_pred<0b01, opc, asm, ZPR16>;
+ def _S : sve_int_mlas_vvv_pred<0b10, opc, asm, ZPR32>;
+ def _D : sve_int_mlas_vvv_pred<0b11, opc, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Dot Product Group
+//===----------------------------------------------------------------------===//
+
+class sve_intx_dot<bit sz, bit U, string asm, ZPRRegOp zprty1,
+ ZPRRegOp zprty2>
+: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm), asm,
+ "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-23} = 0b010001001;
+ let Inst{22} = sz;
+ let Inst{21} = 0;
+ let Inst{20-16} = Zm;
+ let Inst{15-11} = 0;
+ let Inst{10} = U;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty1.ElementSize;
+}
+
+multiclass sve_intx_dot<bit opc, string asm> {
+ def _S : sve_intx_dot<0b0, opc, asm, ZPR32, ZPR8>;
+ def _D : sve_intx_dot<0b1, opc, asm, ZPR64, ZPR16>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Dot Product Group - Indexed Group
+//===----------------------------------------------------------------------===//
+
+class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2,
+ ZPRRegOp zprty3, Operand itype>
+: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop),
+ asm, "\t$Zda, $Zn, $Zm$iop",
+ "", []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ let Inst{31-23} = 0b010001001;
+ let Inst{22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{15-11} = 0;
+ let Inst{10} = U;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm> {
+ def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
+ bits<2> iop;
+ bits<3> Zm;
+ let Inst{20-19} = iop;
+ let Inst{18-16} = Zm;
+ }
+ def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
+ bits<1> iop;
+ bits<4> Zm;
+ let Inst{20} = iop;
+ let Inst{19-16} = Zm;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Arithmetic - Unary Predicated Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_un_pred_arit<bits<2> sz8_64, bits<4> opc,
+ string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn),
+ asm, "\t$Zd, $Pg/m, $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-20} = 0b01;
+ let Inst{19} = opc{0};
+ let Inst{18-16} = opc{3-1};
+ let Inst{15-13} = 0b101;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm> {
+ def _B : sve_int_un_pred_arit<0b00, { opc, 0b0 }, asm, ZPR8>;
+ def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>;
+ def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
+ def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
+}
+
+multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm> {
+ def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>;
+ def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
+ def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
+}
+
+multiclass sve_int_un_pred_arit_0_w<bits<3> opc, string asm> {
+ def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
+ def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
+}
+
+multiclass sve_int_un_pred_arit_0_d<bits<3> opc, string asm> {
+ def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
+}
+
+multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm> {
+ def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>;
+ def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>;
+ def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
+ def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
+}
+
+multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm> {
+ def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>;
+ def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
+ def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Wide Immediate - Unpredicated Group
+//===----------------------------------------------------------------------===//
+class sve_int_dup_imm<bits<2> sz8_64, string asm,
+ ZPRRegOp zprty, Operand immtype>
+: I<(outs zprty:$Zd), (ins immtype:$imm),
+ asm, "\t$Zd, $imm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<9> imm;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-14} = 0b11100011;
+ let Inst{13} = imm{8}; // sh
+ let Inst{12-5} = imm{7-0}; // imm8
+ let Inst{4-0} = Zd;
+
+ let isReMaterializable = 1;
+}
+
+multiclass sve_int_dup_imm<string asm> {
+ def _B : sve_int_dup_imm<0b00, asm, ZPR8, cpy_imm8_opt_lsl_i8>;
+ def _H : sve_int_dup_imm<0b01, asm, ZPR16, cpy_imm8_opt_lsl_i16>;
+ def _S : sve_int_dup_imm<0b10, asm, ZPR32, cpy_imm8_opt_lsl_i32>;
+ def _D : sve_int_dup_imm<0b11, asm, ZPR64, cpy_imm8_opt_lsl_i64>;
+
+ def : InstAlias<"mov $Zd, $imm",
+ (!cast<Instruction>(NAME # _B) ZPR8:$Zd, cpy_imm8_opt_lsl_i8:$imm), 1>;
+ def : InstAlias<"mov $Zd, $imm",
+ (!cast<Instruction>(NAME # _H) ZPR16:$Zd, cpy_imm8_opt_lsl_i16:$imm), 1>;
+ def : InstAlias<"mov $Zd, $imm",
+ (!cast<Instruction>(NAME # _S) ZPR32:$Zd, cpy_imm8_opt_lsl_i32:$imm), 1>;
+ def : InstAlias<"mov $Zd, $imm",
+ (!cast<Instruction>(NAME # _D) ZPR64:$Zd, cpy_imm8_opt_lsl_i64:$imm), 1>;
+
+ def : InstAlias<"fmov $Zd, #0.0",
+ (!cast<Instruction>(NAME # _H) ZPR16:$Zd, 0, 0), 1>;
+ def : InstAlias<"fmov $Zd, #0.0",
+ (!cast<Instruction>(NAME # _S) ZPR32:$Zd, 0, 0), 1>;
+ def : InstAlias<"fmov $Zd, #0.0",
+ (!cast<Instruction>(NAME # _D) ZPR64:$Zd, 0, 0), 1>;
+}
+
+class sve_int_dup_fpimm<bits<2> sz8_64, Operand fpimmtype,
+ string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins fpimmtype:$imm8),
+ asm, "\t$Zd, $imm8",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<8> imm8;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-14} = 0b11100111;
+ let Inst{13} = 0b0;
+ let Inst{12-5} = imm8;
+ let Inst{4-0} = Zd;
+
+ let isReMaterializable = 1;
+}
+
+multiclass sve_int_dup_fpimm<string asm> {
+ def _H : sve_int_dup_fpimm<0b01, fpimm16, asm, ZPR16>;
+ def _S : sve_int_dup_fpimm<0b10, fpimm32, asm, ZPR32>;
+ def _D : sve_int_dup_fpimm<0b11, fpimm64, asm, ZPR64>;
+
+ def : InstAlias<"fmov $Zd, $imm8",
+ (!cast<Instruction>(NAME # _H) ZPR16:$Zd, fpimm16:$imm8), 1>;
+ def : InstAlias<"fmov $Zd, $imm8",
+ (!cast<Instruction>(NAME # _S) ZPR32:$Zd, fpimm32:$imm8), 1>;
+ def : InstAlias<"fmov $Zd, $imm8",
+ (!cast<Instruction>(NAME # _D) ZPR64:$Zd, fpimm64:$imm8), 1>;
+}
+
+class sve_int_arith_imm0<bits<2> sz8_64, bits<3> opc, string asm,
+ ZPRRegOp zprty, Operand immtype>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, immtype:$imm),
+ asm, "\t$Zdn, $_Zdn, $imm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zdn;
+ bits<9> imm;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-19} = 0b100;
+ let Inst{18-16} = opc;
+ let Inst{15-14} = 0b11;
+ let Inst{13} = imm{8}; // sh
+ let Inst{12-5} = imm{7-0}; // imm8
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve_int_arith_imm0<bits<3> opc, string asm> {
+ def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8, addsub_imm8_opt_lsl_i8>;
+ def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>;
+ def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>;
+ def _D : sve_int_arith_imm0<0b11, opc, asm, ZPR64, addsub_imm8_opt_lsl_i64>;
+}
+
+class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
+ ZPRRegOp zprty, Operand immtype>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, immtype:$imm),
+ asm, "\t$Zdn, $_Zdn, $imm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zdn;
+ bits<8> imm;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-16} = opc;
+ let Inst{15-13} = 0b110;
+ let Inst{12-5} = imm;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve_int_arith_imm1<bits<2> opc, string asm, Operand immtype> {
+ def _B : sve_int_arith_imm<0b00, { 0b1010, opc }, asm, ZPR8, immtype>;
+ def _H : sve_int_arith_imm<0b01, { 0b1010, opc }, asm, ZPR16, immtype>;
+ def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, immtype>;
+ def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, immtype>;
+}
+
+multiclass sve_int_arith_imm2<string asm> {
+ def _B : sve_int_arith_imm<0b00, 0b110000, asm, ZPR8, simm8>;
+ def _H : sve_int_arith_imm<0b01, 0b110000, asm, ZPR16, simm8>;
+ def _S : sve_int_arith_imm<0b10, 0b110000, asm, ZPR32, simm8>;
+ def _D : sve_int_arith_imm<0b11, 0b110000, asm, ZPR64, simm8>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Bitwise Logical - Unpredicated Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_bin_cons_log<bits<2> opc, string asm>
+: I<(outs ZPR64:$Zd), (ins ZPR64:$Zn, ZPR64:$Zm),
+ asm, "\t$Zd, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = opc{1-0};
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-10} = 0b001100;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Wide Immediate - Predicated Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_dup_fpimm_pred<bits<2> sz, Operand fpimmtype,
+ string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPRAny:$Pg, fpimmtype:$imm8),
+ asm, "\t$Zd, $Pg/m, $imm8",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pg;
+ bits<5> Zd;
+ bits<8> imm8;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz;
+ let Inst{21-20} = 0b01;
+ let Inst{19-16} = Pg;
+ let Inst{15-13} = 0b110;
+ let Inst{12-5} = imm8;
+ let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_int_dup_fpimm_pred<string asm> {
+ def _H : sve_int_dup_fpimm_pred<0b01, fpimm16, asm, ZPR16>;
+ def _S : sve_int_dup_fpimm_pred<0b10, fpimm32, asm, ZPR32>;
+ def _D : sve_int_dup_fpimm_pred<0b11, fpimm64, asm, ZPR64>;
+
+ def : InstAlias<"fmov $Zd, $Pg/m, $imm8",
+ (!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, fpimm16:$imm8), 1>;
+ def : InstAlias<"fmov $Zd, $Pg/m, $imm8",
+ (!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, fpimm32:$imm8), 1>;
+ def : InstAlias<"fmov $Zd, $Pg/m, $imm8",
+ (!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, fpimm64:$imm8), 1>;
+}
+
+class sve_int_dup_imm_pred<bits<2> sz8_64, bit m, string asm,
+ ZPRRegOp zprty, string pred_qual, dag iops>
+: I<(outs zprty:$Zd), iops,
+ asm, "\t$Zd, $Pg"#pred_qual#", $imm",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<4> Pg;
+ bits<9> imm;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-20} = 0b01;
+ let Inst{19-16} = Pg;
+ let Inst{15} = 0b0;
+ let Inst{14} = m;
+ let Inst{13} = imm{8}; // sh
+ let Inst{12-5} = imm{7-0}; // imm8
+ let Inst{4-0} = Zd;
+
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_int_dup_imm_pred_merge<string asm> {
+ let Constraints = "$Zd = $_Zd" in {
+ def _B : sve_int_dup_imm_pred<0b00, 1, asm, ZPR8, "/m", (ins ZPR8:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm)>;
+ def _H : sve_int_dup_imm_pred<0b01, 1, asm, ZPR16, "/m", (ins ZPR16:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm)>;
+ def _S : sve_int_dup_imm_pred<0b10, 1, asm, ZPR32, "/m", (ins ZPR32:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm)>;
+ def _D : sve_int_dup_imm_pred<0b11, 1, asm, ZPR64, "/m", (ins ZPR64:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm)>;
+ }
+
+ def : InstAlias<"mov $Zd, $Pg/m, $imm",
+ (!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm), 1>;
+ def : InstAlias<"mov $Zd, $Pg/m, $imm",
+ (!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm), 1>;
+ def : InstAlias<"mov $Zd, $Pg/m, $imm",
+ (!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm), 1>;
+ def : InstAlias<"mov $Zd, $Pg/m, $imm",
+ (!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm), 1>;
+
+ def : InstAlias<"fmov $Zd, $Pg/m, #0.0",
+ (!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, 0, 0), 0>;
+ def : InstAlias<"fmov $Zd, $Pg/m, #0.0",
+ (!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, 0, 0), 0>;
+ def : InstAlias<"fmov $Zd, $Pg/m, #0.0",
+ (!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, 0, 0), 0>;
+}
+
+multiclass sve_int_dup_imm_pred_zero<string asm> {
+ def _B : sve_int_dup_imm_pred<0b00, 0, asm, ZPR8, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm)>;
+ def _H : sve_int_dup_imm_pred<0b01, 0, asm, ZPR16, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm)>;
+ def _S : sve_int_dup_imm_pred<0b10, 0, asm, ZPR32, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm)>;
+ def _D : sve_int_dup_imm_pred<0b11, 0, asm, ZPR64, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm)>;
+
+ def : InstAlias<"mov $Zd, $Pg/z, $imm",
+ (!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm), 1>;
+ def : InstAlias<"mov $Zd, $Pg/z, $imm",
+ (!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm), 1>;
+ def : InstAlias<"mov $Zd, $Pg/z, $imm",
+ (!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm), 1>;
+ def : InstAlias<"mov $Zd, $Pg/z, $imm",
+ (!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm), 1>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Compare - Vectors Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_cmp<bit cmp_1, bits<2> sz8_64, bits<3> opc, string asm,
+ PPRRegOp pprty, ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty1:$Zn, zprty2:$Zm),
+ asm, "\t$Pd, $Pg/z, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<3> Pg;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00100100;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Zm;
+ let Inst{15} = opc{2};
+ let Inst{14} = cmp_1;
+ let Inst{13} = opc{1};
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+
+ let Defs = [NZCV];
+}
+
+multiclass sve_int_cmp_0<bits<3> opc, string asm> {
+ def _B : sve_int_cmp<0b0, 0b00, opc, asm, PPR8, ZPR8, ZPR8>;
+ def _H : sve_int_cmp<0b0, 0b01, opc, asm, PPR16, ZPR16, ZPR16>;
+ def _S : sve_int_cmp<0b0, 0b10, opc, asm, PPR32, ZPR32, ZPR32>;
+ def _D : sve_int_cmp<0b0, 0b11, opc, asm, PPR64, ZPR64, ZPR64>;
+}
+
+multiclass sve_int_cmp_0_wide<bits<3> opc, string asm> {
+ def _B : sve_int_cmp<0b0, 0b00, opc, asm, PPR8, ZPR8, ZPR64>;
+ def _H : sve_int_cmp<0b0, 0b01, opc, asm, PPR16, ZPR16, ZPR64>;
+ def _S : sve_int_cmp<0b0, 0b10, opc, asm, PPR32, ZPR32, ZPR64>;
+}
+
+multiclass sve_int_cmp_1_wide<bits<3> opc, string asm> {
+ def _B : sve_int_cmp<0b1, 0b00, opc, asm, PPR8, ZPR8, ZPR64>;
+ def _H : sve_int_cmp<0b1, 0b01, opc, asm, PPR16, ZPR16, ZPR64>;
+ def _S : sve_int_cmp<0b1, 0b10, opc, asm, PPR32, ZPR32, ZPR64>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Compare - Signed Immediate Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_scmp_vi<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
+ ZPRRegOp zprty,
+ Operand immtype>
+: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn, immtype:$imm5),
+ asm, "\t$Pd, $Pg/z, $Zn, $imm5",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<3> Pg;
+ bits<5> Zn;
+ bits<5> imm5;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = imm5;
+ let Inst{15} = opc{2};
+ let Inst{14} = 0b0;
+ let Inst{13} = opc{1};
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+
+ let Defs = [NZCV];
+}
+
+multiclass sve_int_scmp_vi<bits<3> opc, string asm> {
+ def _B : sve_int_scmp_vi<0b00, opc, asm, PPR8, ZPR8, simm5_32b>;
+ def _H : sve_int_scmp_vi<0b01, opc, asm, PPR16, ZPR16, simm5_32b>;
+ def _S : sve_int_scmp_vi<0b10, opc, asm, PPR32, ZPR32, simm5_32b>;
+ def _D : sve_int_scmp_vi<0b11, opc, asm, PPR64, ZPR64, simm5_64b>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Compare - Unsigned Immediate Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_ucmp_vi<bits<2> sz8_64, bits<2> opc, string asm, PPRRegOp pprty,
+ ZPRRegOp zprty, Operand immtype>
+: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn, immtype:$imm7),
+ asm, "\t$Pd, $Pg/z, $Zn, $imm7",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<3> Pg;
+ bits<5> Zn;
+ bits<7> imm7;
+ let Inst{31-24} = 0b00100100;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 1;
+ let Inst{20-14} = imm7;
+ let Inst{13} = opc{1};
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+
+ let Defs = [NZCV];
+}
+
+multiclass sve_int_ucmp_vi<bits<2> opc, string asm> {
+ def _B : sve_int_ucmp_vi<0b00, opc, asm, PPR8, ZPR8, imm0_127>;
+ def _H : sve_int_ucmp_vi<0b01, opc, asm, PPR16, ZPR16, imm0_127>;
+ def _S : sve_int_ucmp_vi<0b10, opc, asm, PPR32, ZPR32, imm0_127>;
+ def _D : sve_int_ucmp_vi<0b11, opc, asm, PPR64, ZPR64, imm0_127>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Compare - Scalars Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_cterm<bit sz, bit opc, string asm, RegisterClass rt>
+: I<(outs), (ins rt:$Rn, rt:$Rm),
+ asm, "\t$Rn, $Rm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Rm;
+ bits<5> Rn;
+ let Inst{31-23} = 0b001001011;
+ let Inst{22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-10} = 0b001000;
+ let Inst{9-5} = Rn;
+ let Inst{4} = opc;
+ let Inst{3-0} = 0b0000;
+
+ let Defs = [NZCV];
+}
+
+class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
+ RegisterClass gprty, PPRRegOp pprty>
+: I<(outs pprty:$Pd), (ins gprty:$Rn, gprty:$Rm),
+ asm, "\t$Pd, $Rn, $Rm",
+ "", []>, Sched<[]> {
+ bits<4> Pd;
+ bits<5> Rm;
+ bits<5> Rn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = 0b000;
+ let Inst{12-10} = opc{3-1};
+ let Inst{9-5} = Rn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+
+ let Defs = [NZCV];
+}
+
+multiclass sve_int_while4_rr<bits<3> opc, string asm> {
+ def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>;
+ def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>;
+ def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>;
+ def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64>;
+}
+
+multiclass sve_int_while8_rr<bits<3> opc, string asm> {
+ def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>;
+ def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>;
+ def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>;
+ def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Floating Point Fast Reduction Group
+//===----------------------------------------------------------------------===//
+
+class sve_fp_fast_red<bits<2> sz, bits<3> opc, string asm,
+ ZPRRegOp zprty, RegisterClass dstRegClass>
+: I<(outs dstRegClass:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn),
+ asm, "\t$Vd, $Pg, $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zn;
+ bits<5> Vd;
+ bits<3> Pg;
+ let Inst{31-24} = 0b01100101;
+ let Inst{23-22} = sz;
+ let Inst{21-19} = 0b000;
+ let Inst{18-16} = opc;
+ let Inst{15-13} = 0b001;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Vd;
+}
+
+multiclass sve_fp_fast_red<bits<3> opc, string asm> {
+ def _H : sve_fp_fast_red<0b01, opc, asm, ZPR16, FPR16>;
+ def _S : sve_fp_fast_red<0b10, opc, asm, ZPR32, FPR32>;
+ def _D : sve_fp_fast_red<0b11, opc, asm, ZPR64, FPR64>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Floating Point Accumulating Reduction Group
+//===----------------------------------------------------------------------===//
+
+class sve_fp_2op_p_vd<bits<2> sz, bits<3> opc, string asm,
+ ZPRRegOp zprty, RegisterClass dstRegClass>
+: I<(outs dstRegClass:$Vdn), (ins PPR3bAny:$Pg, dstRegClass:$_Vdn, zprty:$Zm),
+ asm, "\t$Vdn, $Pg, $_Vdn, $Zm",
+ "",
+ []>,
+ Sched<[]> {
+ bits<3> Pg;
+ bits<5> Vdn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b01100101;
+ let Inst{23-22} = sz;
+ let Inst{21-19} = 0b011;
+ let Inst{18-16} = opc;
+ let Inst{15-13} = 0b001;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zm;
+ let Inst{4-0} = Vdn;
+
+ let Constraints = "$Vdn = $_Vdn";
+}
+
+multiclass sve_fp_2op_p_vd<bits<3> opc, string asm> {
+ def _H : sve_fp_2op_p_vd<0b01, opc, asm, ZPR16, FPR16>;
+ def _S : sve_fp_2op_p_vd<0b10, opc, asm, ZPR32, FPR32>;
+ def _D : sve_fp_2op_p_vd<0b11, opc, asm, ZPR64, FPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Floating Point Compare - Vectors Group
+//===----------------------------------------------------------------------===//
+
+class sve_fp_3op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty,
+ ZPRRegOp zprty>
+: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn, zprty:$Zm),
+ asm, "\t$Pd, $Pg/z, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<3> Pg;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-24} = 0b01100101;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Zm;
+ let Inst{15} = opc{2};
+ let Inst{14} = 0b1;
+ let Inst{13} = opc{1};
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+}
+
+multiclass sve_fp_3op_p_pd<bits<3> opc, string asm> {
+ def _H : sve_fp_3op_p_pd<0b01, opc, asm, PPR16, ZPR16>;
+ def _S : sve_fp_3op_p_pd<0b10, opc, asm, PPR32, ZPR32>;
+ def _D : sve_fp_3op_p_pd<0b11, opc, asm, PPR64, ZPR64>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Floating Point Compare - with Zero Group
+//===----------------------------------------------------------------------===//
+
+class sve_fp_2op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty,
+ ZPRRegOp zprty>
+: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn),
+ asm, "\t$Pd, $Pg/z, $Zn, #0.0",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<3> Pg;
+ bits<5> Zn;
+ let Inst{31-24} = 0b01100101;
+ let Inst{23-22} = sz;
+ let Inst{21-18} = 0b0100;
+ let Inst{17-16} = opc{2-1};
+ let Inst{15-13} = 0b001;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+}
+
+multiclass sve_fp_2op_p_pd<bits<3> opc, string asm> {
+ def _H : sve_fp_2op_p_pd<0b01, opc, asm, PPR16, ZPR16>;
+ def _S : sve_fp_2op_p_pd<0b10, opc, asm, PPR32, ZPR32>;
+ def _D : sve_fp_2op_p_pd<0b11, opc, asm, PPR64, ZPR64>;
+}
+
+
+//===----------------------------------------------------------------------===//
+//SVE Index Generation Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_index_ii<bits<2> sz8_64, string asm, ZPRRegOp zprty,
+ Operand imm_ty>
+: I<(outs zprty:$Zd), (ins imm_ty:$imm5, imm_ty:$imm5b),
+ asm, "\t$Zd, $imm5, $imm5b",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> imm5;
+ bits<5> imm5b;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = imm5b;
+ let Inst{15-10} = 0b010000;
+ let Inst{9-5} = imm5;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_index_ii<string asm> {
+ def _B : sve_int_index_ii<0b00, asm, ZPR8, simm5_32b>;
+ def _H : sve_int_index_ii<0b01, asm, ZPR16, simm5_32b>;
+ def _S : sve_int_index_ii<0b10, asm, ZPR32, simm5_32b>;
+ def _D : sve_int_index_ii<0b11, asm, ZPR64, simm5_64b>;
+}
+
+class sve_int_index_ir<bits<2> sz8_64, string asm, ZPRRegOp zprty,
+ RegisterClass srcRegType, Operand imm_ty>
+: I<(outs zprty:$Zd), (ins imm_ty:$imm5, srcRegType:$Rm),
+ asm, "\t$Zd, $imm5, $Rm",
+ "", []>, Sched<[]> {
+ bits<5> Rm;
+ bits<5> Zd;
+ bits<5> imm5;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-10} = 0b010010;
+ let Inst{9-5} = imm5;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_index_ir<string asm> {
+ def _B : sve_int_index_ir<0b00, asm, ZPR8, GPR32, simm5_32b>;
+ def _H : sve_int_index_ir<0b01, asm, ZPR16, GPR32, simm5_32b>;
+ def _S : sve_int_index_ir<0b10, asm, ZPR32, GPR32, simm5_32b>;
+ def _D : sve_int_index_ir<0b11, asm, ZPR64, GPR64, simm5_64b>;
+}
+
+class sve_int_index_ri<bits<2> sz8_64, string asm, ZPRRegOp zprty,
+ RegisterClass srcRegType, Operand imm_ty>
+: I<(outs zprty:$Zd), (ins srcRegType:$Rn, imm_ty:$imm5),
+ asm, "\t$Zd, $Rn, $imm5",
+ "", []>, Sched<[]> {
+ bits<5> Rn;
+ bits<5> Zd;
+ bits<5> imm5;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = imm5;
+ let Inst{15-10} = 0b010001;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_index_ri<string asm> {
+ def _B : sve_int_index_ri<0b00, asm, ZPR8, GPR32, simm5_32b>;
+ def _H : sve_int_index_ri<0b01, asm, ZPR16, GPR32, simm5_32b>;
+ def _S : sve_int_index_ri<0b10, asm, ZPR32, GPR32, simm5_32b>;
+ def _D : sve_int_index_ri<0b11, asm, ZPR64, GPR64, simm5_64b>;
+}
+
+class sve_int_index_rr<bits<2> sz8_64, string asm, ZPRRegOp zprty,
+ RegisterClass srcRegType>
+: I<(outs zprty:$Zd), (ins srcRegType:$Rn, srcRegType:$Rm),
+ asm, "\t$Zd, $Rn, $Rm",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Rm;
+ bits<5> Rn;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-10} = 0b010011;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_index_rr<string asm> {
+ def _B : sve_int_index_rr<0b00, asm, ZPR8, GPR32>;
+ def _H : sve_int_index_rr<0b01, asm, ZPR16, GPR32>;
+ def _S : sve_int_index_rr<0b10, asm, ZPR32, GPR32>;
+ def _D : sve_int_index_rr<0b11, asm, ZPR64, GPR64>;
+}
+//
+//===----------------------------------------------------------------------===//
+// SVE Bitwise Shift - Predicated Group
+//===----------------------------------------------------------------------===//
+class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm,
+ ZPRRegOp zprty, Operand immtype,
+ ElementSizeEnum size>
+: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm),
+ asm, "\t$Zdn, $Pg/m, $_Zdn, $imm",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zdn;
+ bits<6> imm;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = tsz8_64{3-2};
+ let Inst{21-19} = 0b000;
+ let Inst{18-16} = opc;
+ let Inst{15-13} = 0b100;
+ let Inst{12-10} = Pg;
+ let Inst{9-8} = tsz8_64{1-0};
+ let Inst{7-5} = imm{2-0}; // imm3
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = size;
+}
+
+multiclass sve_int_bin_pred_shift_imm_left<bits<3> opc, string asm> {
+ def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8,
+ ElementSizeB>;
+ def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16,
+ ElementSizeH> {
+ let Inst{8} = imm{3};
+ }
+ def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32,
+ ElementSizeS> {
+ let Inst{9-8} = imm{4-3};
+ }
+ def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64,
+ ElementSizeD> {
+ let Inst{22} = imm{5};
+ let Inst{9-8} = imm{4-3};
+ }
+}
+
+multiclass sve_int_bin_pred_shift_imm_right<bits<3> opc, string asm> {
+ def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8,
+ ElementSizeB>;
+ def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16,
+ ElementSizeH> {
+ let Inst{8} = imm{3};
+ }
+ def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32,
+ ElementSizeS> {
+ let Inst{9-8} = imm{4-3};
+ }
+ def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64,
+ ElementSizeD> {
+ let Inst{22} = imm{5};
+ let Inst{9-8} = imm{4-3};
+ }
+}
+
+class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
+ string asm, ZPRRegOp zprty, ZPRRegOp zprty2>
+: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty2:$Zm),
+ asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zdn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-20} = 0b01;
+ let Inst{19} = wide;
+ let Inst{18-16} = opc;
+ let Inst{15-13} = 0b100;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zm;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_int_bin_pred_shift<bits<3> opc, string asm> {
+ def _B : sve_int_bin_pred_shift<0b00, 0b0, opc, asm, ZPR8, ZPR8>;
+ def _H : sve_int_bin_pred_shift<0b01, 0b0, opc, asm, ZPR16, ZPR16>;
+ def _S : sve_int_bin_pred_shift<0b10, 0b0, opc, asm, ZPR32, ZPR32>;
+ def _D : sve_int_bin_pred_shift<0b11, 0b0, opc, asm, ZPR64, ZPR64>;
+}
+
+multiclass sve_int_bin_pred_shift_wide<bits<3> opc, string asm> {
+ def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>;
+ def _H : sve_int_bin_pred_shift<0b01, 0b1, opc, asm, ZPR16, ZPR64>;
+ def _S : sve_int_bin_pred_shift<0b10, 0b1, opc, asm, ZPR32, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Shift - Unpredicated Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_bin_cons_shift_wide<bits<2> sz8_64, bits<2> opc, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, ZPR64:$Zm),
+ asm, "\t$Zd, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-12} = 0b1000;
+ let Inst{11-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_bin_cons_shift_wide<bits<2> opc, string asm> {
+ def _B : sve_int_bin_cons_shift_wide<0b00, opc, asm, ZPR8>;
+ def _H : sve_int_bin_cons_shift_wide<0b01, opc, asm, ZPR16>;
+ def _S : sve_int_bin_cons_shift_wide<0b10, opc, asm, ZPR32>;
+}
+
+class sve_int_bin_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
+ ZPRRegOp zprty, Operand immtype>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm),
+ asm, "\t$Zd, $Zn, $imm",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<6> imm;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = tsz8_64{3-2};
+ let Inst{21} = 0b1;
+ let Inst{20-19} = tsz8_64{1-0};
+ let Inst{18-16} = imm{2-0}; // imm3
+ let Inst{15-12} = 0b1001;
+ let Inst{11-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_bin_cons_shift_imm_left<bits<2> opc, string asm> {
+ def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
+ def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
+ let Inst{19} = imm{3};
+ }
+ def _S : sve_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
+ let Inst{20-19} = imm{4-3};
+ }
+ def _D : sve_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
+ let Inst{22} = imm{5};
+ let Inst{20-19} = imm{4-3};
+ }
+}
+
+multiclass sve_int_bin_cons_shift_imm_right<bits<2> opc, string asm> {
+ def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
+ def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
+ let Inst{19} = imm{3};
+ }
+ def _S : sve_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
+ let Inst{20-19} = imm{4-3};
+ }
+ def _D : sve_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
+ let Inst{22} = imm{5};
+ let Inst{20-19} = imm{4-3};
+ }
+}
+//===----------------------------------------------------------------------===//
+// SVE Memory - Store Group
+//===----------------------------------------------------------------------===//
+
+class sve_mem_cst_si<bits<2> msz, bits<2> esz, string asm,
+ RegisterOperand VecList>
+: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4),
+ asm, "\t$Zt, $Pg, [$Rn, $imm4, mul vl]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rn;
+ bits<5> Zt;
+ bits<4> imm4;
+ let Inst{31-25} = 0b1110010;
+ let Inst{24-23} = msz;
+ let Inst{22-21} = esz;
+ let Inst{20} = 0;
+ let Inst{19-16} = imm4;
+ let Inst{15-13} = 0b111;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayStore = 1;
+}
+
+multiclass sve_mem_cst_si<bits<2> msz, bits<2> esz, string asm,
+ RegisterOperand listty, ZPRRegOp zprty>
+{
+ def NAME : sve_mem_cst_si<msz, esz, asm, listty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $imm4, mul vl]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn]",
+ (!cast<Instruction>(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
+}
+
+class sve_mem_est_si<bits<2> sz, bits<2> nregs, RegisterOperand VecList,
+ string asm, Operand immtype>
+: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, immtype:$imm4),
+ asm, "\t$Zt, $Pg, [$Rn, $imm4, mul vl]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rn;
+ bits<5> Zt;
+ bits<4> imm4;
+ let Inst{31-25} = 0b1110010;
+ let Inst{24-23} = sz;
+ let Inst{22-21} = nregs;
+ let Inst{20} = 1;
+ let Inst{19-16} = imm4;
+ let Inst{15-13} = 0b111;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayStore = 1;
+}
+
+multiclass sve_mem_est_si<bits<2> sz, bits<2> nregs, RegisterOperand VecList,
+ string asm, Operand immtype> {
+ def NAME : sve_mem_est_si<sz, nregs, VecList, asm, immtype>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn]",
+ (!cast<Instruction>(NAME) VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
+}
+
+class sve_mem_est_ss<bits<2> sz, bits<2> nregs, RegisterOperand VecList,
+ string asm, RegisterOperand gprty>
+: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm),
+ asm, "\t$Zt, $Pg, [$Rn, $Rm]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rm;
+ bits<5> Rn;
+ bits<5> Zt;
+ let Inst{31-25} = 0b1110010;
+ let Inst{24-23} = sz;
+ let Inst{22-21} = nregs;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = 0b011;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayStore = 1;
+}
+
+class sve_mem_cst_ss_base<bits<4> dtype, string asm,
+ RegisterOperand listty, RegisterOperand gprty>
+: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm),
+ asm, "\t$Zt, $Pg, [$Rn, $Rm]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rm;
+ bits<5> Rn;
+ bits<5> Zt;
+ let Inst{31-25} = 0b1110010;
+ let Inst{24-21} = dtype;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = 0b010;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayStore = 1;
+}
+
+multiclass sve_mem_cst_ss<bits<4> dtype, string asm,
+ RegisterOperand listty, ZPRRegOp zprty,
+ RegisterOperand gprty> {
+ def NAME : sve_mem_cst_ss_base<dtype, asm, listty, gprty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Rm]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
+}
+
+class sve_mem_cstnt_si<bits<2> msz, string asm, RegisterOperand VecList>
+: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4),
+ asm, "\t$Zt, $Pg, [$Rn, $imm4, mul vl]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rn;
+ bits<5> Zt;
+ bits<4> imm4;
+ let Inst{31-25} = 0b1110010;
+ let Inst{24-23} = msz;
+ let Inst{22-20} = 0b001;
+ let Inst{19-16} = imm4;
+ let Inst{15-13} = 0b111;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayStore = 1;
+}
+
+multiclass sve_mem_cstnt_si<bits<2> msz, string asm, RegisterOperand listty,
+ ZPRRegOp zprty> {
+ def NAME : sve_mem_cstnt_si<msz, asm, listty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $imm4, mul vl]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn]",
+ (!cast<Instruction>(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
+}
+
+class sve_mem_cstnt_ss_base<bits<2> msz, string asm, RegisterOperand listty,
+ RegisterOperand gprty>
+: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm),
+ asm, "\t$Zt, $Pg, [$Rn, $Rm]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rm;
+ bits<5> Rn;
+ bits<5> Zt;
+ let Inst{31-25} = 0b1110010;
+ let Inst{24-23} = msz;
+ let Inst{22-21} = 0b00;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = 0b011;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayStore = 1;
+}
+
+multiclass sve_mem_cstnt_ss<bits<2> msz, string asm, RegisterOperand listty,
+ ZPRRegOp zprty, RegisterOperand gprty> {
+ def NAME : sve_mem_cstnt_ss_base<msz, asm, listty, gprty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Rm]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
+}
+
+class sve_mem_sst_sv<bits<3> opc, bit xs, bit scaled, string asm,
+ RegisterOperand VecList, RegisterOperand zprext>
+: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm),
+ asm, "\t$Zt, $Pg, [$Rn, $Zm]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rn;
+ bits<5> Zm;
+ bits<5> Zt;
+ let Inst{31-25} = 0b1110010;
+ let Inst{24-22} = opc;
+ let Inst{21} = scaled;
+ let Inst{20-16} = Zm;
+ let Inst{15} = 0b1;
+ let Inst{14} = xs;
+ let Inst{13} = 0;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayStore = 1;
+}
+
+multiclass sve_mem_sst_sv_32_scaled<bits<3> opc, string asm,
+ RegisterOperand listty,
+ ZPRRegOp zprty,
+ RegisterOperand sxtw_opnd,
+ RegisterOperand uxtw_opnd > {
+ def _UXTW_SCALED : sve_mem_sst_sv<opc, 0, 1, asm, listty, uxtw_opnd>;
+ def _SXTW_SCALED : sve_mem_sst_sv<opc, 1, 1, asm, listty, sxtw_opnd>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _UXTW_SCALED) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _SXTW_SCALED) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+}
+
+multiclass sve_mem_sst_sv_32_unscaled<bits<3> opc, string asm,
+ RegisterOperand listty,
+ ZPRRegOp zprty,
+ RegisterOperand sxtw_opnd,
+ RegisterOperand uxtw_opnd> {
+ def _UXTW : sve_mem_sst_sv<opc, 0, 0, asm, listty, uxtw_opnd>;
+ def _SXTW : sve_mem_sst_sv<opc, 1, 0, asm, listty, sxtw_opnd>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _UXTW) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _SXTW) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+}
+
+class sve_mem_sst_sv2<bits<2> msz, bit scaled, string asm,
+ RegisterOperand zprext>
+: I<(outs), (ins Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm),
+ asm, "\t$Zt, $Pg, [$Rn, $Zm]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rn;
+ bits<5> Zm;
+ bits<5> Zt;
+ let Inst{31-25} = 0b1110010;
+ let Inst{24-23} = msz;
+ let Inst{22} = 0b0;
+ let Inst{21} = scaled;
+ let Inst{20-16} = Zm;
+ let Inst{15-13} = 0b101;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayStore = 1;
+}
+
+multiclass sve_mem_sst_sv_64_scaled<bits<2> msz, string asm,
+ RegisterOperand zprext> {
+ def "" : sve_mem_sst_sv2<msz, 1, asm, zprext>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>;
+
+}
+
+multiclass sve_mem_sst_sv_64_unscaled<bits<2> msz, string asm> {
+ def "" : sve_mem_sst_sv2<msz, 0, asm, ZPR64ExtLSL8>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>;
+}
+
+class sve_mem_sst_vi<bits<3> opc, string asm, ZPRRegOp zprty,
+ RegisterOperand VecList, Operand imm_ty>
+: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, zprty:$Zn, imm_ty:$imm5),
+ asm, "\t$Zt, $Pg, [$Zn, $imm5]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> imm5;
+ bits<5> Zn;
+ bits<5> Zt;
+ let Inst{31-25} = 0b1110010;
+ let Inst{24-23} = opc{2-1};
+ let Inst{22} = 0b1;
+ let Inst{21} = opc{0};
+ let Inst{20-16} = imm5;
+ let Inst{15-13} = 0b101;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zt;
+
+ let mayStore = 1;
+}
+
+multiclass sve_mem_sst_vi_ptrs<bits<3> opc, string asm, RegisterOperand listty,
+ ZPRRegOp zprty, Operand imm_ty> {
+ def _IMM : sve_mem_sst_vi<opc, asm, zprty, listty, imm_ty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
+ (!cast<Instruction>(NAME # _IMM) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, 0), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $imm5]",
+ (!cast<Instruction>(NAME # _IMM) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, imm_ty:$imm5), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
+ (!cast<Instruction>(NAME # _IMM) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, 0), 1>;
+}
+
+class sve_mem_z_spill<string asm>
+: I<(outs), (ins ZPRAny:$Zt, GPR64sp:$Rn, simm9:$imm9),
+ asm, "\t$Zt, [$Rn, $imm9, mul vl]",
+ "",
+ []>, Sched<[]> {
+ bits<5> Rn;
+ bits<5> Zt;
+ bits<9> imm9;
+ let Inst{31-22} = 0b1110010110;
+ let Inst{21-16} = imm9{8-3};
+ let Inst{15-13} = 0b010;
+ let Inst{12-10} = imm9{2-0};
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayStore = 1;
+}
+
+multiclass sve_mem_z_spill<string asm> {
+ def NAME : sve_mem_z_spill<asm>;
+
+ def : InstAlias<asm # "\t$Zt, [$Rn]",
+ (!cast<Instruction>(NAME) ZPRAny:$Zt, GPR64sp:$Rn, 0), 1>;
+}
+
+class sve_mem_p_spill<string asm>
+: I<(outs), (ins PPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9),
+ asm, "\t$Pt, [$Rn, $imm9, mul vl]",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pt;
+ bits<5> Rn;
+ bits<9> imm9;
+ let Inst{31-22} = 0b1110010110;
+ let Inst{21-16} = imm9{8-3};
+ let Inst{15-13} = 0b000;
+ let Inst{12-10} = imm9{2-0};
+ let Inst{9-5} = Rn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = Pt;
+
+ let mayStore = 1;
+}
+
+multiclass sve_mem_p_spill<string asm> {
+ def NAME : sve_mem_p_spill<asm>;
+
+ def : InstAlias<asm # "\t$Pt, [$Rn]",
+ (!cast<Instruction>(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Permute - Predicates Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_perm_bin_perm_pp<bits<3> opc, bits<2> sz8_64, string asm,
+ PPRRegOp pprty>
+: I<(outs pprty:$Pd), (ins pprty:$Pn, pprty:$Pm),
+ asm, "\t$Pd, $Pn, $Pm",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<4> Pm;
+ bits<4> Pn;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = Pm;
+ let Inst{15-13} = 0b010;
+ let Inst{12-10} = opc;
+ let Inst{9} = 0b0;
+ let Inst{8-5} = Pn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = Pd;
+}
+
+multiclass sve_int_perm_bin_perm_pp<bits<3> opc, string asm> {
+ def _B : sve_int_perm_bin_perm_pp<opc, 0b00, asm, PPR8>;
+ def _H : sve_int_perm_bin_perm_pp<opc, 0b01, asm, PPR16>;
+ def _S : sve_int_perm_bin_perm_pp<opc, 0b10, asm, PPR32>;
+ def _D : sve_int_perm_bin_perm_pp<opc, 0b11, asm, PPR64>;
+}
+
+class sve_int_perm_punpk<bit opc, string asm>
+: I<(outs PPR16:$Pd), (ins PPR8:$Pn),
+ asm, "\t$Pd, $Pn",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<4> Pn;
+ let Inst{31-17} = 0b000001010011000;
+ let Inst{16} = opc;
+ let Inst{15-9} = 0b0100000;
+ let Inst{8-5} = Pn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = Pd;
+}
+
+class sve_int_rdffr_pred<bit s, string asm>
+: I<(outs PPR8:$Pd), (ins PPRAny:$Pg),
+ asm, "\t$Pd, $Pg/z",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<4> Pg;
+ let Inst{31-23} = 0b001001010;
+ let Inst{22} = s;
+ let Inst{21-9} = 0b0110001111000;
+ let Inst{8-5} = Pg;
+ let Inst{4} = 0;
+ let Inst{3-0} = Pd;
+
+ let Defs = !if(!eq (s, 1), [NZCV], []);
+ let Uses = [FFR];
+}
+
+class sve_int_rdffr_unpred<string asm> : I<
+ (outs PPR8:$Pd), (ins),
+ asm, "\t$Pd",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ let Inst{31-4} = 0b0010010100011001111100000000;
+ let Inst{3-0} = Pd;
+
+ let Uses = [FFR];
+}
+
+class sve_int_wrffr<string asm>
+: I<(outs), (ins PPR8:$Pn),
+ asm, "\t$Pn",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pn;
+ let Inst{31-9} = 0b00100101001010001001000;
+ let Inst{8-5} = Pn;
+ let Inst{4-0} = 0b00000;
+
+ let hasSideEffects = 1;
+ let Defs = [FFR];
+}
+
+class sve_int_setffr<string asm>
+: I<(outs), (ins),
+ asm, "",
+ "",
+ []>, Sched<[]> {
+ let Inst{31-0} = 0b00100101001011001001000000000000;
+
+ let hasSideEffects = 1;
+ let Defs = [FFR];
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Permute Vector - Predicated Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_perm_clast_rz<bits<2> sz8_64, bit ab, string asm,
+ ZPRRegOp zprty, RegisterClass rt>
+: I<(outs rt:$Rdn), (ins PPR3bAny:$Pg, rt:$_Rdn, zprty:$Zm),
+ asm, "\t$Rdn, $Pg, $_Rdn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rdn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-17} = 0b11000;
+ let Inst{16} = ab;
+ let Inst{15-13} = 0b101;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zm;
+ let Inst{4-0} = Rdn;
+
+ let Constraints = "$Rdn = $_Rdn";
+}
+
+multiclass sve_int_perm_clast_rz<bit ab, string asm> {
+ def _B : sve_int_perm_clast_rz<0b00, ab, asm, ZPR8, GPR32>;
+ def _H : sve_int_perm_clast_rz<0b01, ab, asm, ZPR16, GPR32>;
+ def _S : sve_int_perm_clast_rz<0b10, ab, asm, ZPR32, GPR32>;
+ def _D : sve_int_perm_clast_rz<0b11, ab, asm, ZPR64, GPR64>;
+}
+
+class sve_int_perm_clast_vz<bits<2> sz8_64, bit ab, string asm,
+ ZPRRegOp zprty, RegisterClass rt>
+: I<(outs rt:$Vdn), (ins PPR3bAny:$Pg, rt:$_Vdn, zprty:$Zm),
+ asm, "\t$Vdn, $Pg, $_Vdn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Vdn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-17} = 0b10101;
+ let Inst{16} = ab;
+ let Inst{15-13} = 0b100;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zm;
+ let Inst{4-0} = Vdn;
+
+ let Constraints = "$Vdn = $_Vdn";
+}
+
+multiclass sve_int_perm_clast_vz<bit ab, string asm> {
+ def _B : sve_int_perm_clast_vz<0b00, ab, asm, ZPR8, FPR8>;
+ def _H : sve_int_perm_clast_vz<0b01, ab, asm, ZPR16, FPR16>;
+ def _S : sve_int_perm_clast_vz<0b10, ab, asm, ZPR32, FPR32>;
+ def _D : sve_int_perm_clast_vz<0b11, ab, asm, ZPR64, FPR64>;
+}
+
+class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm),
+ asm, "\t$Zdn, $Pg, $_Zdn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zdn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-17} = 0b10100;
+ let Inst{16} = ab;
+ let Inst{15-13} = 0b100;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zm;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve_int_perm_clast_zz<bit ab, string asm> {
+ def _B : sve_int_perm_clast_zz<0b00, ab, asm, ZPR8>;
+ def _H : sve_int_perm_clast_zz<0b01, ab, asm, ZPR16>;
+ def _S : sve_int_perm_clast_zz<0b10, ab, asm, ZPR32>;
+ def _D : sve_int_perm_clast_zz<0b11, ab, asm, ZPR64>;
+}
+
+class sve_int_perm_last_r<bits<2> sz8_64, bit ab, string asm,
+ ZPRRegOp zprty, RegisterClass resultRegType>
+: I<(outs resultRegType:$Rd), (ins PPR3bAny:$Pg, zprty:$Zn),
+ asm, "\t$Rd, $Pg, $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-17} = 0b10000;
+ let Inst{16} = ab;
+ let Inst{15-13} = 0b101;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass sve_int_perm_last_r<bit ab, string asm> {
+ def _B : sve_int_perm_last_r<0b00, ab, asm, ZPR8, GPR32>;
+ def _H : sve_int_perm_last_r<0b01, ab, asm, ZPR16, GPR32>;
+ def _S : sve_int_perm_last_r<0b10, ab, asm, ZPR32, GPR32>;
+ def _D : sve_int_perm_last_r<0b11, ab, asm, ZPR64, GPR64>;
+}
+
+class sve_int_perm_last_v<bits<2> sz8_64, bit ab, string asm,
+ ZPRRegOp zprty, RegisterClass dstRegtype>
+: I<(outs dstRegtype:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn),
+ asm, "\t$Vd, $Pg, $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Vd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-17} = 0b10001;
+ let Inst{16} = ab;
+ let Inst{15-13} = 0b100;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Vd;
+}
+
+multiclass sve_int_perm_last_v<bit ab, string asm> {
+ def _B : sve_int_perm_last_v<0b00, ab, asm, ZPR8, FPR8>;
+ def _H : sve_int_perm_last_v<0b01, ab, asm, ZPR16, FPR16>;
+ def _S : sve_int_perm_last_v<0b10, ab, asm, ZPR32, FPR32>;
+ def _D : sve_int_perm_last_v<0b11, ab, asm, ZPR64, FPR64>;
+}
+
+class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm),
+ asm, "\t$Zdn, $Pg, $_Zdn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zdn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-13} = 0b101100100;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zm;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve_int_perm_splice<string asm> {
+ def _B : sve_int_perm_splice<0b00, asm, ZPR8>;
+ def _H : sve_int_perm_splice<0b01, asm, ZPR16>;
+ def _S : sve_int_perm_splice<0b10, asm, ZPR32>;
+ def _D : sve_int_perm_splice<0b11, asm, ZPR64>;
+}
+
+class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn),
+ asm, "\t$Zd, $Pg/m, $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<3> Pg;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-18} = 0b1001;
+ let Inst{17-16} = opc;
+ let Inst{15-13} = 0b100;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_int_perm_rev_rbit<string asm> {
+ def _B : sve_int_perm_rev<0b00, 0b11, asm, ZPR8>;
+ def _H : sve_int_perm_rev<0b01, 0b11, asm, ZPR16>;
+ def _S : sve_int_perm_rev<0b10, 0b11, asm, ZPR32>;
+ def _D : sve_int_perm_rev<0b11, 0b11, asm, ZPR64>;
+}
+
+multiclass sve_int_perm_rev_revb<string asm> {
+ def _H : sve_int_perm_rev<0b01, 0b00, asm, ZPR16>;
+ def _S : sve_int_perm_rev<0b10, 0b00, asm, ZPR32>;
+ def _D : sve_int_perm_rev<0b11, 0b00, asm, ZPR64>;
+}
+
+multiclass sve_int_perm_rev_revh<string asm> {
+ def _S : sve_int_perm_rev<0b10, 0b01, asm, ZPR32>;
+ def _D : sve_int_perm_rev<0b11, 0b01, asm, ZPR64>;
+}
+
+multiclass sve_int_perm_rev_revw<string asm> {
+ def _D : sve_int_perm_rev<0b11, 0b10, asm, ZPR64>;
+}
+
+class sve_int_perm_cpy_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
+ RegisterClass srcRegType>
+: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, srcRegType:$Rn),
+ asm, "\t$Zd, $Pg/m, $Rn",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rn;
+ bits<5> Zd;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-13} = 0b101000101;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_int_perm_cpy_r<string asm> {
+ def _B : sve_int_perm_cpy_r<0b00, asm, ZPR8, GPR32sp>;
+ def _H : sve_int_perm_cpy_r<0b01, asm, ZPR16, GPR32sp>;
+ def _S : sve_int_perm_cpy_r<0b10, asm, ZPR32, GPR32sp>;
+ def _D : sve_int_perm_cpy_r<0b11, asm, ZPR64, GPR64sp>;
+
+ def : InstAlias<"mov $Zd, $Pg/m, $Rn",
+ (!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPR3bAny:$Pg, GPR32sp:$Rn), 1>;
+ def : InstAlias<"mov $Zd, $Pg/m, $Rn",
+ (!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPR3bAny:$Pg, GPR32sp:$Rn), 1>;
+ def : InstAlias<"mov $Zd, $Pg/m, $Rn",
+ (!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPR3bAny:$Pg, GPR32sp:$Rn), 1>;
+ def : InstAlias<"mov $Zd, $Pg/m, $Rn",
+ (!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPR3bAny:$Pg, GPR64sp:$Rn), 1>;
+}
+
+class sve_int_perm_cpy_v<bits<2> sz8_64, string asm, ZPRRegOp zprty,
+ RegisterClass srcRegtype>
+: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, srcRegtype:$Vn),
+ asm, "\t$Zd, $Pg/m, $Vn",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Vn;
+ bits<5> Zd;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-13} = 0b100000100;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Vn;
+ let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_int_perm_cpy_v<string asm> {
+ def _B : sve_int_perm_cpy_v<0b00, asm, ZPR8, FPR8>;
+ def _H : sve_int_perm_cpy_v<0b01, asm, ZPR16, FPR16>;
+ def _S : sve_int_perm_cpy_v<0b10, asm, ZPR32, FPR32>;
+ def _D : sve_int_perm_cpy_v<0b11, asm, ZPR64, FPR64>;
+
+ def : InstAlias<"mov $Zd, $Pg/m, $Vn",
+ (!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPR3bAny:$Pg, FPR8:$Vn), 1>;
+ def : InstAlias<"mov $Zd, $Pg/m, $Vn",
+ (!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPR3bAny:$Pg, FPR16:$Vn), 1>;
+ def : InstAlias<"mov $Zd, $Pg/m, $Vn",
+ (!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPR3bAny:$Pg, FPR32:$Vn), 1>;
+ def : InstAlias<"mov $Zd, $Pg/m, $Vn",
+ (!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPR3bAny:$Pg, FPR64:$Vn), 1>;
+}
+
+class sve_int_perm_compact<bit sz, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn),
+ asm, "\t$Zd, $Pg, $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-23} = 0b000001011;
+ let Inst{22} = sz;
+ let Inst{21-13} = 0b100001100;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_perm_compact<string asm> {
+ def _S : sve_int_perm_compact<0b0, asm, ZPR32>;
+ def _D : sve_int_perm_compact<0b1, asm, ZPR64>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Memory - Contiguous Load Group
+//===----------------------------------------------------------------------===//
+
+class sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
+ RegisterOperand VecList>
+: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4),
+ asm, "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rn;
+ bits<5> Zt;
+ bits<4> imm4;
+ let Inst{31-25} = 0b1010010;
+ let Inst{24-21} = dtype;
+ let Inst{20} = nf;
+ let Inst{19-16} = imm4;
+ let Inst{15-13} = 0b101;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+ let Uses = !if(!eq(nf, 1), [FFR], []);
+ let Defs = !if(!eq(nf, 1), [FFR], []);
+}
+
+multiclass sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
+ RegisterOperand listty, ZPRRegOp zprty> {
+ def _REAL : sve_mem_cld_si_base<dtype, nf, asm, listty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
+ (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]",
+ (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
+ (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
+}
+
+multiclass sve_mem_cld_si<bits<4> dtype, string asm, RegisterOperand listty,
+ ZPRRegOp zprty>
+: sve_mem_cld_si_base<dtype, 0, asm, listty, zprty>;
+
+class sve_mem_cldnt_si_base<bits<2> msz, string asm, RegisterOperand VecList>
+: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4),
+ asm, "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zt;
+ bits<3> Pg;
+ bits<5> Rn;
+ bits<4> imm4;
+ let Inst{31-25} = 0b1010010;
+ let Inst{24-23} = msz;
+ let Inst{22-20} = 0b000;
+ let Inst{19-16} = imm4;
+ let Inst{15-13} = 0b111;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+}
+
+multiclass sve_mem_cldnt_si<bits<2> msz, string asm, RegisterOperand listty,
+ ZPRRegOp zprty> {
+ def NAME : sve_mem_cldnt_si_base<msz, asm, listty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
+ (!cast<Instruction>(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
+}
+
+class sve_mem_cldnt_ss_base<bits<2> msz, string asm, RegisterOperand VecList,
+ RegisterOperand gprty>
+: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm),
+ asm, "\t$Zt, $Pg/z, [$Rn, $Rm]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rm;
+ bits<5> Rn;
+ bits<5> Zt;
+ let Inst{31-25} = 0b1010010;
+ let Inst{24-23} = msz;
+ let Inst{22-21} = 0b00;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = 0b110;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+}
+
+multiclass sve_mem_cldnt_ss<bits<2> msz, string asm, RegisterOperand listty,
+ ZPRRegOp zprty, RegisterOperand gprty> {
+ def NAME : sve_mem_cldnt_ss_base<msz, asm, listty, gprty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Rm]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
+}
+
+class sve_mem_ldqr_si<bits<2> sz, string asm, RegisterOperand VecList>
+: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s16:$imm4),
+ asm, "\t$Zt, $Pg/z, [$Rn, $imm4]", "", []>, Sched<[]> {
+ bits<5> Zt;
+ bits<5> Rn;
+ bits<3> Pg;
+ bits<4> imm4;
+ let Inst{31-25} = 0b1010010;
+ let Inst{24-23} = sz;
+ let Inst{22-20} = 0;
+ let Inst{19-16} = imm4;
+ let Inst{15-13} = 0b001;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+}
+
+multiclass sve_mem_ldqr_si<bits<2> sz, string asm, RegisterOperand listty,
+ ZPRRegOp zprty> {
+ def NAME : sve_mem_ldqr_si<sz, asm, listty>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
+ (!cast<Instruction>(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $imm4]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s16:$imm4), 0>;
+}
+
+class sve_mem_ldqr_ss<bits<2> sz, string asm, RegisterOperand VecList,
+ RegisterOperand gprty>
+: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm),
+ asm, "\t$Zt, $Pg/z, [$Rn, $Rm]", "", []>, Sched<[]> {
+ bits<5> Zt;
+ bits<3> Pg;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-25} = 0b1010010;
+ let Inst{24-23} = sz;
+ let Inst{22-21} = 0;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = 0;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+}
+
+multiclass sve_mem_ldqr_ss<bits<2> sz, string asm, RegisterOperand listty,
+ ZPRRegOp zprty, RegisterOperand gprty> {
+ def NAME : sve_mem_ldqr_ss<sz, asm, listty, gprty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Rm]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
+}
+
+class sve_mem_ld_dup<bits<2> dtypeh, bits<2> dtypel, string asm,
+ RegisterOperand VecList, Operand immtype>
+: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, immtype:$imm6),
+ asm, "\t$Zt, $Pg/z, [$Rn, $imm6]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rn;
+ bits<5> Zt;
+ bits<6> imm6;
+ let Inst{31-25} = 0b1000010;
+ let Inst{24-23} = dtypeh;
+ let Inst{22} = 1;
+ let Inst{21-16} = imm6;
+ let Inst{15} = 0b1;
+ let Inst{14-13} = dtypel;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+}
+
+multiclass sve_mem_ld_dup<bits<2> dtypeh, bits<2> dtypel, string asm,
+ RegisterOperand zlistty, ZPRRegOp zprty, Operand immtype> {
+ def NAME : sve_mem_ld_dup<dtypeh, dtypel, asm, zlistty, immtype>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $imm6]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, immtype:$imm6), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
+ (!cast<Instruction>(NAME) zlistty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
+}
+
+class sve_mem_cld_ss_base<bits<4> dtype, bit ff, dag iops, string asm,
+ RegisterOperand VecList>
+: I<(outs VecList:$Zt), iops,
+ asm, "\t$Zt, $Pg/z, [$Rn, $Rm]",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zt;
+ bits<3> Pg;
+ bits<5> Rm;
+ bits<5> Rn;
+ let Inst{31-25} = 0b1010010;
+ let Inst{24-21} = dtype;
+ let Inst{20-16} = Rm;
+ let Inst{15-14} = 0b01;
+ let Inst{13} = ff;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+ let Uses = !if(!eq(ff, 1), [FFR], []);
+ let Defs = !if(!eq(ff, 1), [FFR], []);
+}
+
+multiclass sve_mem_cld_ss<bits<4> dtype, string asm, RegisterOperand listty,
+ ZPRRegOp zprty, RegisterOperand gprty> {
+ def "" : sve_mem_cld_ss_base<dtype, 0, (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm),
+ asm, listty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Rm]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
+}
+
+multiclass sve_mem_cldff_ss<bits<4> dtype, string asm, RegisterOperand listty,
+ ZPRRegOp zprty, RegisterOperand gprty> {
+ def _REAL : sve_mem_cld_ss_base<dtype, 1, (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm),
+ asm, listty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Rm]",
+ (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
+ (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
+ (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
+}
+
+multiclass sve_mem_cldnf_si<bits<4> dtype, string asm, RegisterOperand listty,
+ ZPRRegOp zprty>
+: sve_mem_cld_si_base<dtype, 1, asm, listty, zprty>;
+
+class sve_mem_eld_si<bits<2> sz, bits<2> nregs, RegisterOperand VecList,
+ string asm, Operand immtype>
+: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, immtype:$imm4),
+ asm, "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zt;
+ bits<3> Pg;
+ bits<5> Rn;
+ bits<4> imm4;
+ let Inst{31-25} = 0b1010010;
+ let Inst{24-23} = sz;
+ let Inst{22-21} = nregs;
+ let Inst{20} = 0;
+ let Inst{19-16} = imm4;
+ let Inst{15-13} = 0b111;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+}
+
+multiclass sve_mem_eld_si<bits<2> sz, bits<2> nregs, RegisterOperand VecList,
+ string asm, Operand immtype> {
+ def NAME : sve_mem_eld_si<sz, nregs, VecList, asm, immtype>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
+ (!cast<Instruction>(NAME) VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
+}
+
+class sve_mem_eld_ss<bits<2> sz, bits<2> nregs, RegisterOperand VecList,
+ string asm, RegisterOperand gprty>
+: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm),
+ asm, "\t$Zt, $Pg/z, [$Rn, $Rm]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rm;
+ bits<5> Rn;
+ bits<5> Zt;
+ let Inst{31-25} = 0b1010010;
+ let Inst{24-23} = sz;
+ let Inst{22-21} = nregs;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = 0b110;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Memory - 32-bit Gather and Unsized Contiguous Group
+//===----------------------------------------------------------------------===//
+
+// bit xs is '1' if offsets are signed
+// bit scaled is '1' if the offsets are scaled
+class sve_mem_32b_gld_sv<bits<4> opc, bit xs, bit scaled, string asm,
+ RegisterOperand zprext>
+: I<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm),
+ asm, "\t$Zt, $Pg/z, [$Rn, $Zm]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rn;
+ bits<5> Zm;
+ bits<5> Zt;
+ let Inst{31-25} = 0b1000010;
+ let Inst{24-23} = opc{3-2};
+ let Inst{22} = xs;
+ let Inst{21} = scaled;
+ let Inst{20-16} = Zm;
+ let Inst{15} = 0b0;
+ let Inst{14-13} = opc{1-0};
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+ let Defs = !if(!eq(opc{0}, 1), [FFR], []);
+ let Uses = !if(!eq(opc{0}, 1), [FFR], []);
+}
+
+multiclass sve_mem_32b_gld_sv_32_scaled<bits<4> opc, string asm,
+ RegisterOperand sxtw_opnd,
+ RegisterOperand uxtw_opnd> {
+ def _UXTW_SCALED_REAL : sve_mem_32b_gld_sv<opc, 0, 1, asm, uxtw_opnd>;
+ def _SXTW_SCALED_REAL : sve_mem_32b_gld_sv<opc, 1, 1, asm, sxtw_opnd>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _UXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _SXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+}
+
+multiclass sve_mem_32b_gld_vs_32_unscaled<bits<4> opc, string asm,
+ RegisterOperand sxtw_opnd,
+ RegisterOperand uxtw_opnd> {
+ def _UXTW_REAL : sve_mem_32b_gld_sv<opc, 0, 0, asm, uxtw_opnd>;
+ def _SXTW_REAL : sve_mem_32b_gld_sv<opc, 1, 0, asm, sxtw_opnd>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _UXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _SXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+}
+
+
+class sve_mem_32b_gld_vi<bits<4> opc, string asm, Operand imm_ty>
+: I<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5),
+ asm, "\t$Zt, $Pg/z, [$Zn, $imm5]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zn;
+ bits<5> Zt;
+ bits<5> imm5;
+ let Inst{31-25} = 0b1000010;
+ let Inst{24-23} = opc{3-2};
+ let Inst{22-21} = 0b01;
+ let Inst{20-16} = imm5;
+ let Inst{15} = 0b1;
+ let Inst{14-13} = opc{1-0};
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+ let Defs = !if(!eq(opc{0}, 1), [FFR], []);
+ let Uses = !if(!eq(opc{0}, 1), [FFR], []);
+}
+
+multiclass sve_mem_32b_gld_vi_32_ptrs<bits<4> opc, string asm, Operand imm_ty> {
+ def _IMM_REAL : sve_mem_32b_gld_vi<opc, asm, imm_ty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
+ (!cast<Instruction>(NAME # _IMM_REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $imm5]",
+ (!cast<Instruction>(NAME # _IMM_REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
+ (!cast<Instruction>(NAME # _IMM_REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>;
+}
+
+class sve_mem_prfm_si<bits<2> msz, string asm>
+: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, simm6s1:$imm6),
+ asm, "\t$prfop, $Pg, [$Rn, $imm6, mul vl]",
+ "",
+ []>, Sched<[]> {
+ bits<5> Rn;
+ bits<3> Pg;
+ bits<6> imm6;
+ bits<4> prfop;
+ let Inst{31-22} = 0b1000010111;
+ let Inst{21-16} = imm6;
+ let Inst{15} = 0b0;
+ let Inst{14-13} = msz;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = prfop;
+
+ let hasSideEffects = 1;
+}
+
+multiclass sve_mem_prfm_si<bits<2> msz, string asm> {
+ def NAME : sve_mem_prfm_si<msz, asm>;
+
+ def : InstAlias<asm # "\t$prfop, $Pg, [$Rn]",
+ (!cast<Instruction>(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
+}
+
+class sve_mem_prfm_ss<bits<3> opc, string asm, RegisterOperand gprty>
+: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm),
+ asm, "\t$prfop, $Pg, [$Rn, $Rm]",
+ "",
+ []>, Sched<[]> {
+ bits<5> Rm;
+ bits<5> Rn;
+ bits<3> Pg;
+ bits<4> prfop;
+ let Inst{31-25} = 0b1000010;
+ let Inst{24-23} = opc{2-1};
+ let Inst{22-21} = 0b00;
+ let Inst{20-16} = Rm;
+ let Inst{15} = 0b1;
+ let Inst{14} = opc{0};
+ let Inst{13} = 0b0;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = prfop;
+
+ let hasSideEffects = 1;
+}
+
+class sve_mem_32b_prfm_sv<bits<2> msz, bit xs, string asm,
+ RegisterOperand zprext>
+: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm),
+ asm, "\t$prfop, $Pg, [$Rn, $Zm]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rn;
+ bits<5> Zm;
+ bits<4> prfop;
+ let Inst{31-23} = 0b100001000;
+ let Inst{22} = xs;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15} = 0b0;
+ let Inst{14-13} = msz;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = prfop;
+
+ let hasSideEffects = 1;
+}
+
+multiclass sve_mem_32b_prfm_sv_scaled<bits<2> msz, string asm,
+ RegisterOperand sxtw_opnd,
+ RegisterOperand uxtw_opnd> {
+ def _UXTW_SCALED : sve_mem_32b_prfm_sv<msz, 0, asm, uxtw_opnd>;
+ def _SXTW_SCALED : sve_mem_32b_prfm_sv<msz, 1, asm, sxtw_opnd>;
+}
+
+class sve_mem_32b_prfm_vi<bits<2> msz, string asm, Operand imm_ty>
+: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5),
+ asm, "\t$prfop, $Pg, [$Zn, $imm5]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zn;
+ bits<5> imm5;
+ bits<4> prfop;
+ let Inst{31-25} = 0b1000010;
+ let Inst{24-23} = msz;
+ let Inst{22-21} = 0b00;
+ let Inst{20-16} = imm5;
+ let Inst{15-13} = 0b111;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = prfop;
+}
+
+multiclass sve_mem_32b_prfm_vi<bits<2> msz, string asm, Operand imm_ty> {
+ def NAME : sve_mem_32b_prfm_vi<msz, asm, imm_ty>;
+
+ def : InstAlias<asm # "\t$prfop, $Pg, [$Zn]",
+ (!cast<Instruction>(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>;
+}
+
+class sve_mem_z_fill<string asm>
+: I<(outs ZPRAny:$Zt), (ins GPR64sp:$Rn, simm9:$imm9),
+ asm, "\t$Zt, [$Rn, $imm9, mul vl]",
+ "",
+ []>, Sched<[]> {
+ bits<5> Rn;
+ bits<5> Zt;
+ bits<9> imm9;
+ let Inst{31-22} = 0b1000010110;
+ let Inst{21-16} = imm9{8-3};
+ let Inst{15-13} = 0b010;
+ let Inst{12-10} = imm9{2-0};
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+}
+
+multiclass sve_mem_z_fill<string asm> {
+ def NAME : sve_mem_z_fill<asm>;
+
+ def : InstAlias<asm # "\t$Zt, [$Rn]",
+ (!cast<Instruction>(NAME) ZPRAny:$Zt, GPR64sp:$Rn, 0), 1>;
+}
+
+class sve_mem_p_fill<string asm>
+: I<(outs PPRAny:$Pt), (ins GPR64sp:$Rn, simm9:$imm9),
+ asm, "\t$Pt, [$Rn, $imm9, mul vl]",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pt;
+ bits<5> Rn;
+ bits<9> imm9;
+ let Inst{31-22} = 0b1000010110;
+ let Inst{21-16} = imm9{8-3};
+ let Inst{15-13} = 0b000;
+ let Inst{12-10} = imm9{2-0};
+ let Inst{9-5} = Rn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = Pt;
+
+ let mayLoad = 1;
+}
+
+multiclass sve_mem_p_fill<string asm> {
+ def NAME : sve_mem_p_fill<asm>;
+
+ def : InstAlias<asm # "\t$Pt, [$Rn]",
+ (!cast<Instruction>(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Memory - 64-bit Gather Group
+//===----------------------------------------------------------------------===//
+
+// bit xs is '1' if offsets are signed
+// bit scaled is '1' if the offsets are scaled
+// bit lsl is '0' if the offsets are extended (uxtw/sxtw), '1' if shifted (lsl)
+class sve_mem_64b_gld_sv<bits<4> opc, bit xs, bit scaled, bit lsl, string asm,
+ RegisterOperand zprext>
+: I<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm),
+ asm, "\t$Zt, $Pg/z, [$Rn, $Zm]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rn;
+ bits<5> Zm;
+ bits<5> Zt;
+ let Inst{31-25} = 0b1100010;
+ let Inst{24-23} = opc{3-2};
+ let Inst{22} = xs;
+ let Inst{21} = scaled;
+ let Inst{20-16} = Zm;
+ let Inst{15} = lsl;
+ let Inst{14-13} = opc{1-0};
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+ let Defs = !if(!eq(opc{0}, 1), [FFR], []);
+ let Uses = !if(!eq(opc{0}, 1), [FFR], []);
+}
+
+multiclass sve_mem_64b_gld_sv_32_scaled<bits<4> opc, string asm,
+ RegisterOperand sxtw_opnd,
+ RegisterOperand uxtw_opnd> {
+ def _UXTW_SCALED_REAL : sve_mem_64b_gld_sv<opc, 0, 1, 0, asm, uxtw_opnd>;
+ def _SXTW_SCALED_REAL : sve_mem_64b_gld_sv<opc, 1, 1, 0, asm, sxtw_opnd>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _UXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _SXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+}
+
+multiclass sve_mem_64b_gld_vs_32_unscaled<bits<4> opc, string asm,
+ RegisterOperand sxtw_opnd,
+ RegisterOperand uxtw_opnd> {
+ def _UXTW_REAL : sve_mem_64b_gld_sv<opc, 0, 0, 0, asm, uxtw_opnd>;
+ def _SXTW_REAL : sve_mem_64b_gld_sv<opc, 1, 0, 0, asm, sxtw_opnd>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _UXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _SXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
+}
+
+multiclass sve_mem_64b_gld_sv2_64_scaled<bits<4> opc, string asm,
+ RegisterOperand zprext> {
+ def _SCALED_REAL : sve_mem_64b_gld_sv<opc, 1, 1, 1, asm, zprext>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>;
+}
+
+multiclass sve_mem_64b_gld_vs2_64_unscaled<bits<4> opc, string asm> {
+ def _REAL : sve_mem_64b_gld_sv<opc, 1, 0, 1, asm, ZPR64ExtLSL8>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
+ (!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>;
+}
+
+class sve_mem_64b_gld_vi<bits<4> opc, string asm, Operand imm_ty>
+: I<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5),
+ asm, "\t$Zt, $Pg/z, [$Zn, $imm5]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zn;
+ bits<5> Zt;
+ bits<5> imm5;
+ let Inst{31-25} = 0b1100010;
+ let Inst{24-23} = opc{3-2};
+ let Inst{22-21} = 0b01;
+ let Inst{20-16} = imm5;
+ let Inst{15} = 0b1;
+ let Inst{14-13} = opc{1-0};
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+ let Defs = !if(!eq(opc{0}, 1), [FFR], []);
+ let Uses = !if(!eq(opc{0}, 1), [FFR], []);
+}
+
+multiclass sve_mem_64b_gld_vi_64_ptrs<bits<4> opc, string asm, Operand imm_ty> {
+ def _IMM_REAL : sve_mem_64b_gld_vi<opc, asm, imm_ty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
+ (!cast<Instruction>(NAME # _IMM_REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $imm5]",
+ (!cast<Instruction>(NAME # _IMM_REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
+ (!cast<Instruction>(NAME # _IMM_REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>;
+}
+
+// bit lsl is '0' if the offsets are extended (uxtw/sxtw), '1' if shifted (lsl)
+class sve_mem_64b_prfm_sv<bits<2> msz, bit xs, bit lsl, string asm,
+ RegisterOperand zprext>
+: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm),
+ asm, "\t$prfop, $Pg, [$Rn, $Zm]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rn;
+ bits<5> Zm;
+ bits<4> prfop;
+ let Inst{31-23} = 0b110001000;
+ let Inst{22} = xs;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15} = lsl;
+ let Inst{14-13} = msz;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = prfop;
+
+ let hasSideEffects = 1;
+}
+
+multiclass sve_mem_64b_prfm_sv_ext_scaled<bits<2> msz, string asm,
+ RegisterOperand sxtw_opnd,
+ RegisterOperand uxtw_opnd> {
+ def _UXTW_SCALED : sve_mem_64b_prfm_sv<msz, 0, 0, asm, uxtw_opnd>;
+ def _SXTW_SCALED : sve_mem_64b_prfm_sv<msz, 1, 0, asm, sxtw_opnd>;
+}
+
+multiclass sve_mem_64b_prfm_sv_lsl_scaled<bits<2> msz, string asm,
+ RegisterOperand zprext> {
+ def NAME : sve_mem_64b_prfm_sv<msz, 1, 1, asm, zprext>;
+}
+
+
+class sve_mem_64b_prfm_vi<bits<2> msz, string asm, Operand imm_ty>
+: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5),
+ asm, "\t$prfop, $Pg, [$Zn, $imm5]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zn;
+ bits<5> imm5;
+ bits<4> prfop;
+ let Inst{31-25} = 0b1100010;
+ let Inst{24-23} = msz;
+ let Inst{22-21} = 0b00;
+ let Inst{20-16} = imm5;
+ let Inst{15-13} = 0b111;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = prfop;
+
+ let hasSideEffects = 1;
+}
+
+multiclass sve_mem_64b_prfm_vi<bits<2> msz, string asm, Operand imm_ty> {
+ def NAME : sve_mem_64b_prfm_vi<msz, asm, imm_ty>;
+
+ def : InstAlias<asm # "\t$prfop, $Pg, [$Zn]",
+ (!cast<Instruction>(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Compute Vector Address Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_bin_cons_misc_0_a<bits<2> opc, bits<2> msz, string asm,
+ ZPRRegOp zprty, RegisterOperand zprext>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, zprext:$Zm),
+ asm, "\t$Zd, [$Zn, $Zm]",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-12} = 0b1010;
+ let Inst{11-10} = msz;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_bin_cons_misc_0_a_uxtw<bits<2> opc, string asm> {
+ def _0 : sve_int_bin_cons_misc_0_a<opc, 0b00, asm, ZPR64, ZPR64ExtUXTW8>;
+ def _1 : sve_int_bin_cons_misc_0_a<opc, 0b01, asm, ZPR64, ZPR64ExtUXTW16>;
+ def _2 : sve_int_bin_cons_misc_0_a<opc, 0b10, asm, ZPR64, ZPR64ExtUXTW32>;
+ def _3 : sve_int_bin_cons_misc_0_a<opc, 0b11, asm, ZPR64, ZPR64ExtUXTW64>;
+}
+
+multiclass sve_int_bin_cons_misc_0_a_sxtw<bits<2> opc, string asm> {
+ def _0 : sve_int_bin_cons_misc_0_a<opc, 0b00, asm, ZPR64, ZPR64ExtSXTW8>;
+ def _1 : sve_int_bin_cons_misc_0_a<opc, 0b01, asm, ZPR64, ZPR64ExtSXTW16>;
+ def _2 : sve_int_bin_cons_misc_0_a<opc, 0b10, asm, ZPR64, ZPR64ExtSXTW32>;
+ def _3 : sve_int_bin_cons_misc_0_a<opc, 0b11, asm, ZPR64, ZPR64ExtSXTW64>;
+}
+
+multiclass sve_int_bin_cons_misc_0_a_32_lsl<bits<2> opc, string asm> {
+ def _0 : sve_int_bin_cons_misc_0_a<opc, 0b00, asm, ZPR32, ZPR32ExtLSL8>;
+ def _1 : sve_int_bin_cons_misc_0_a<opc, 0b01, asm, ZPR32, ZPR32ExtLSL16>;
+ def _2 : sve_int_bin_cons_misc_0_a<opc, 0b10, asm, ZPR32, ZPR32ExtLSL32>;
+ def _3 : sve_int_bin_cons_misc_0_a<opc, 0b11, asm, ZPR32, ZPR32ExtLSL64>;
+}
+
+multiclass sve_int_bin_cons_misc_0_a_64_lsl<bits<2> opc, string asm> {
+ def _0 : sve_int_bin_cons_misc_0_a<opc, 0b00, asm, ZPR64, ZPR64ExtLSL8>;
+ def _1 : sve_int_bin_cons_misc_0_a<opc, 0b01, asm, ZPR64, ZPR64ExtLSL16>;
+ def _2 : sve_int_bin_cons_misc_0_a<opc, 0b10, asm, ZPR64, ZPR64ExtLSL32>;
+ def _3 : sve_int_bin_cons_misc_0_a<opc, 0b11, asm, ZPR64, ZPR64ExtLSL64>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Misc - Unpredicated Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_bin_cons_misc_0_b<bits<2> sz, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
+ asm, "\t$Zd, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-10} = 0b101100;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve_int_bin_cons_misc_0_b<string asm> {
+ def _H : sve_int_bin_cons_misc_0_b<0b01, asm, ZPR16>;
+ def _S : sve_int_bin_cons_misc_0_b<0b10, asm, ZPR32>;
+ def _D : sve_int_bin_cons_misc_0_b<0b11, asm, ZPR64>;
+}
+
+class sve_int_bin_cons_misc_0_c<bits<8> opc, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn),
+ asm, "\t$Zd, $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = opc{7-6};
+ let Inst{21} = 0b1;
+ let Inst{20-16} = opc{5-1};
+ let Inst{15-11} = 0b10111;
+ let Inst{10} = opc{0};
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Integer Reduction Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_reduce<bits<2> sz8_32, bits<2> fmt, bits<3> opc, string asm,
+ ZPRRegOp zprty, RegisterClass regtype>
+: I<(outs regtype:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn),
+ asm, "\t$Vd, $Pg, $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Vd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_32;
+ let Inst{21} = 0b0;
+ let Inst{20-19} = fmt;
+ let Inst{18-16} = opc;
+ let Inst{15-13} = 0b001;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Vd;
+}
+
+multiclass sve_int_reduce_0_saddv<bits<3> opc, string asm> {
+ def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>;
+ def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>;
+ def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>;
+}
+
+multiclass sve_int_reduce_0_uaddv<bits<3> opc, string asm> {
+ def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>;
+ def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>;
+ def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>;
+ def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64>;
+}
+
+multiclass sve_int_reduce_1<bits<3> opc, string asm> {
+ def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8>;
+ def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16>;
+ def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32>;
+ def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64>;
+}
+
+multiclass sve_int_reduce_2<bits<3> opc, string asm> {
+ def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8>;
+ def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16>;
+ def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32>;
+ def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64>;
+}
+
+class sve_int_movprfx_pred<bits<2> sz8_32, bits<3> opc, string asm,
+ ZPRRegOp zprty, string pg_suffix, dag iops>
+: I<(outs zprty:$Zd), iops,
+ asm, "\t$Zd, $Pg"#pg_suffix#", $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz8_32;
+ let Inst{21-19} = 0b010;
+ let Inst{18-16} = opc;
+ let Inst{15-13} = 0b001;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve_int_movprfx_pred_merge<bits<3> opc, string asm> {
+let Constraints = "$Zd = $_Zd" in {
+ def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/m",
+ (ins ZPR8:$_Zd, PPR3bAny:$Pg, ZPR8:$Zn)>;
+ def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/m",
+ (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR16:$Zn)>;
+ def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/m",
+ (ins ZPR32:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn)>;
+ def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/m",
+ (ins ZPR64:$_Zd, PPR3bAny:$Pg, ZPR64:$Zn)>;
+}
+}
+
+multiclass sve_int_movprfx_pred_zero<bits<3> opc, string asm> {
+ def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/z",
+ (ins PPR3bAny:$Pg, ZPR8:$Zn)>;
+ def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/z",
+ (ins PPR3bAny:$Pg, ZPR16:$Zn)>;
+ def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/z",
+ (ins PPR3bAny:$Pg, ZPR32:$Zn)>;
+ def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/z",
+ (ins PPR3bAny:$Pg, ZPR64:$Zn)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE Propagate Break Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_brkp<bits<2> opc, string asm>
+: I<(outs PPR8:$Pd), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$Pm),
+ asm, "\t$Pd, $Pg/z, $Pn, $Pm",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<4> Pg;
+ bits<4> Pm;
+ bits<4> Pn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23} = 0b0;
+ let Inst{22} = opc{1};
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = Pm;
+ let Inst{15-14} = 0b11;
+ let Inst{13-10} = Pg;
+ let Inst{9} = 0b0;
+ let Inst{8-5} = Pn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+
+ let Defs = !if(!eq (opc{1}, 1), [NZCV], []);
+}
+
+
+//===----------------------------------------------------------------------===//
+// SVE Partition Break Group
+//===----------------------------------------------------------------------===//
+
+class sve_int_brkn<bit S, string asm>
+: I<(outs PPR8:$Pdm), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$_Pdm),
+ asm, "\t$Pdm, $Pg/z, $Pn, $_Pdm",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pdm;
+ bits<4> Pg;
+ bits<4> Pn;
+ let Inst{31-23} = 0b001001010;
+ let Inst{22} = S;
+ let Inst{21-14} = 0b01100001;
+ let Inst{13-10} = Pg;
+ let Inst{9} = 0b0;
+ let Inst{8-5} = Pn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = Pdm;
+
+ let Constraints = "$Pdm = $_Pdm";
+ let Defs = !if(!eq (S, 0b1), [NZCV], []);
+}
+
+class sve_int_break<bits<3> opc, string asm, string suffix, dag iops>
+: I<(outs PPR8:$Pd), iops,
+ asm, "\t$Pd, $Pg"#suffix#", $Pn",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<4> Pg;
+ bits<4> Pn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = opc{2-1};
+ let Inst{21-14} = 0b01000001;
+ let Inst{13-10} = Pg;
+ let Inst{9} = 0b0;
+ let Inst{8-5} = Pn;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = Pd;
+
+ let Constraints = !if(!eq (opc{0}, 1), "$Pd = $_Pd", "");
+ let Defs = !if(!eq (opc{1}, 1), [NZCV], []);
+
+}
+
+multiclass sve_int_break_m<bits<3> opc, string asm> {
+ def NAME : sve_int_break<opc, asm, "/m", (ins PPR8:$_Pd, PPRAny:$Pg, PPR8:$Pn)>;
+}
+
+multiclass sve_int_break_z<bits<3> opc, string asm> {
+ def NAME : sve_int_break<opc, asm, "/z", (ins PPRAny:$Pg, PPR8:$Pn)>;
+}
+