diff options
Diffstat (limited to 'capstone/suite/synctools/tablegen/AArch64')
23 files changed, 36567 insertions, 0 deletions
diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64.td b/capstone/suite/synctools/tablegen/AArch64/AArch64.td new file mode 100644 index 000000000..a69d38144 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64.td @@ -0,0 +1,579 @@ +//=- AArch64.td - Describe the AArch64 Target Machine --------*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing. +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// AArch64 Subtarget features. +// + +def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", + "Enable ARMv8 FP">; + +def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", + "Enable Advanced SIMD instructions", [FeatureFPARMv8]>; + +def FeatureSM4 : SubtargetFeature< + "sm4", "HasSM4", "true", + "Enable SM3 and SM4 support", [FeatureNEON]>; + +def FeatureSHA2 : SubtargetFeature< + "sha2", "HasSHA2", "true", + "Enable SHA1 and SHA256 support", [FeatureNEON]>; + +def FeatureSHA3 : SubtargetFeature< + "sha3", "HasSHA3", "true", + "Enable SHA512 and SHA3 support", [FeatureNEON, FeatureSHA2]>; + +def FeatureAES : SubtargetFeature< + "aes", "HasAES", "true", + "Enable AES support", [FeatureNEON]>; + +// Crypto has been split up and any combination is now valid (see the +// crypto defintions above). Also, crypto is now context sensitive: +// it has a different meaning for e.g. Armv8.4 than it has for Armv8.2. +// Therefore, we rely on Clang, the user interacing tool, to pass on the +// appropriate crypto options. But here in the backend, crypto has very little +// meaning anymore. We kept the Crypto defintion here for backward +// compatibility, and now imply features SHA2 and AES, which was the +// "traditional" meaning of Crypto. +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable cryptographic instructions", [FeatureNEON, FeatureSHA2, FeatureAES]>; + +def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", + "Enable ARMv8 CRC-32 checksum instructions">; + +def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", + "Enable ARMv8 Reliability, Availability and Serviceability Extensions">; + +def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true", + "Enable ARMv8.1 Large System Extension (LSE) atomic instructions">; + +def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true", + "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">; + +def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", + "Enable ARMv8 PMUv3 Performance Monitors extension">; + +def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", + "Full FP16", [FeatureFPARMv8]>; + +def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true", + "Enable Statistical Profiling extension">; + +def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true", + "Enable Scalable Vector Extension (SVE) instructions">; + +/// Cyclone has register move instructions which are "free". +def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", + "Has zero-cycle register moves">; + +/// Cyclone has instructions which zero registers for "free". +def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", + "Has zero-cycle zeroing instructions">; + +/// ... but the floating-point version doesn't quite work in rare cases on older +/// CPUs. +def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround", + "HasZeroCycleZeroingFPWorkaround", "true", + "The zero-cycle floating-point zeroing instruction has a bug">; + +def FeatureStrictAlign : SubtargetFeature<"strict-align", + "StrictAlign", "true", + "Disallow all unaligned memory " + "access">; + +def FeatureReserveX18 : SubtargetFeature<"reserve-x18", "ReserveX18", "true", + "Reserve X18, making it unavailable " + "as a GPR">; + +def FeatureReserveX20 : SubtargetFeature<"reserve-x20", "ReserveX20", "true", + "Reserve X20, making it unavailable " + "as a GPR">; + +def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", + "Use alias analysis during codegen">; + +def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops", "BalanceFPOps", + "true", + "balance mix of odd and even D-registers for fp multiply(-accumulate) ops">; + +def FeaturePredictableSelectIsExpensive : SubtargetFeature< + "predictable-select-expensive", "PredictableSelectIsExpensive", "true", + "Prefer likely predicted branches over selects">; + +def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move", + "CustomAsCheapAsMove", "true", + "Use custom code for TargetInstrInfo::isAsCheapAsAMove()">; + +def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move", + "ExynosAsCheapAsMove", "true", + "Use Exynos specific code in TargetInstrInfo::isAsCheapAsAMove()", + [FeatureCustomCheapAsMoveHandling]>; + +def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", + "UsePostRAScheduler", "true", "Schedule again after register allocation">; + +def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store", + "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">; + +def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128", + "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">; + +def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "STRQroIsSlow", + "true", "STR of Q register with register offset is slow">; + +def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature< + "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern", + "true", "Use alternative pattern for sextload convert to f32">; + +def FeatureArithmeticBccFusion : SubtargetFeature< + "arith-bcc-fusion", "HasArithmeticBccFusion", "true", + "CPU fuses arithmetic+bcc operations">; + +def FeatureArithmeticCbzFusion : SubtargetFeature< + "arith-cbz-fusion", "HasArithmeticCbzFusion", "true", + "CPU fuses arithmetic + cbz/cbnz operations">; + +def FeatureFuseAddress : SubtargetFeature< + "fuse-address", "HasFuseAddress", "true", + "CPU fuses address generation and memory operations">; + +def FeatureFuseAES : SubtargetFeature< + "fuse-aes", "HasFuseAES", "true", + "CPU fuses AES crypto operations">; + +def FeatureFuseCCSelect : SubtargetFeature< + "fuse-csel", "HasFuseCCSelect", "true", + "CPU fuses conditional select operations">; + +def FeatureFuseLiterals : SubtargetFeature< + "fuse-literals", "HasFuseLiterals", "true", + "CPU fuses literal generation operations">; + +def FeatureDisableLatencySchedHeuristic : SubtargetFeature< + "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", + "Disable latency scheduling heuristic">; + +def FeatureRCPC : SubtargetFeature<"rcpc", "HasRCPC", "true", + "Enable support for RCPC extension">; + +def FeatureUseRSqrt : SubtargetFeature< + "use-reciprocal-square-root", "UseRSqrt", "true", + "Use the reciprocal square root approximation">; + +def FeatureDotProd : SubtargetFeature< + "dotprod", "HasDotProd", "true", + "Enable dot product support">; + +def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", + "NegativeImmediates", "false", + "Convert immediates and instructions " + "to their negated or complemented " + "equivalent when the immediate does " + "not fit in the encoding.">; + +def FeatureLSLFast : SubtargetFeature< + "lsl-fast", "HasLSLFast", "true", + "CPU has a fastpath logical shift of up to 3 places">; + +def FeatureAggressiveFMA : + SubtargetFeature<"aggressive-fma", + "HasAggressiveFMA", + "true", + "Enable Aggressive FMA for floating-point.">; + +//===----------------------------------------------------------------------===// +// Architectures. +// + +def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", + "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM]>; + +def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", + "Support ARM v8.2a instructions", [HasV8_1aOps, FeatureRAS]>; + +def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", + "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC]>; + +def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true", + "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd]>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "AArch64RegisterInfo.td" +include "AArch64RegisterBanks.td" +include "AArch64CallingConvention.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "AArch64Schedule.td" +include "AArch64InstrInfo.td" + +def AArch64InstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// Named operands for MRS/MSR/TLBI/... +//===----------------------------------------------------------------------===// + +include "AArch64SystemOperands.td" + +//===----------------------------------------------------------------------===// +// AArch64 Processors supported. +// +include "AArch64SchedA53.td" +include "AArch64SchedA57.td" +include "AArch64SchedCyclone.td" +include "AArch64SchedFalkor.td" +include "AArch64SchedKryo.td" +include "AArch64SchedExynosM1.td" +include "AArch64SchedExynosM3.td" +include "AArch64SchedThunderX.td" +include "AArch64SchedThunderX2T99.td" + +def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", + "Cortex-A35 ARM processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureNEON, + FeaturePerfMon + ]>; + +def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", + "Cortex-A53 ARM processors", [ + FeatureBalanceFPOps, + FeatureCRC, + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeatureUseAA + ]>; + +def ProcA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", + "Cortex-A55 ARM processors", [ + HasV8_2aOps, + FeatureCrypto, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeatureFullFP16, + FeatureDotProd, + FeatureRCPC, + FeaturePerfMon + ]>; + +def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", + "Cortex-A57 ARM processors", [ + FeatureBalanceFPOps, + FeatureCRC, + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureFuseAES, + FeatureFuseLiterals, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive + ]>; + +def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", + "Cortex-A72 ARM processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon + ]>; + +def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", + "Cortex-A73 ARM processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon + ]>; + +def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", + "Cortex-A75 ARM processors", [ + HasV8_2aOps, + FeatureCrypto, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeatureFullFP16, + FeatureDotProd, + FeatureRCPC, + FeaturePerfMon + ]>; + +// Note that cyclone does not fuse AES instructions, but newer apple chips do +// perform the fusion and cyclone is used by default when targetting apple OSes. +def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone", + "Cyclone", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureCrypto, + FeatureDisableLatencySchedHeuristic, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon, + FeatureZCRegMove, + FeatureZCZeroing, + FeatureZCZeroingFPWorkaround + ]>; + +def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", + "Samsung Exynos-M1 processors", + [FeatureSlowPaired128, + FeatureCRC, + FeatureCrypto, + FeatureExynosCheapAsMoveHandling, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeatureSlowMisaligned128Store, + FeatureUseRSqrt, + FeatureZCZeroing]>; + +def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1", + "Samsung Exynos-M2 processors", + [FeatureSlowPaired128, + FeatureCRC, + FeatureCrypto, + FeatureExynosCheapAsMoveHandling, + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeatureSlowMisaligned128Store, + FeatureZCZeroing]>; + +def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", + "Samsung Exynos-M3 processors", + [FeatureCRC, + FeatureCrypto, + FeatureExynosCheapAsMoveHandling, + FeatureFPARMv8, + FeatureFuseAddress, + FeatureFuseAES, + FeatureFuseCCSelect, + FeatureFuseLiterals, + FeatureLSLFast, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing]>; + +def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", + "Qualcomm Kryo processors", [ + FeatureCRC, + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing, + FeatureLSLFast + ]>; + +def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", + "Qualcomm Falkor processors", [ + FeatureCRC, + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureRDM, + FeatureZCZeroing, + FeatureLSLFast, + FeatureSlowSTRQro + ]>; + +def ProcSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira", + "Qualcomm Saphira processors", [ + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureNEON, + FeatureSPE, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing, + FeatureLSLFast, + HasV8_3aOps]>; + +def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", + "ThunderX2T99", + "Cavium ThunderX2 processors", [ + FeatureAggressiveFMA, + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureArithmeticBccFusion, + FeatureNEON, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureLSE, + HasV8_1aOps]>; + +def ProcThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX", + "Cavium ThunderX processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureNEON]>; + +def ProcThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily", + "ThunderXT88", + "Cavium ThunderX processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureNEON]>; + +def ProcThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily", + "ThunderXT81", + "Cavium ThunderX processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureNEON]>; + +def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily", + "ThunderXT83", + "Cavium ThunderX processors", [ + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureNEON]>; + +def : ProcessorModel<"generic", NoSchedModel, [ + FeatureFPARMv8, + FeatureFuseAES, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler + ]>; + +// FIXME: Cortex-A35 and Cortex-A55 are currently modeled as a Cortex-A53. +def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>; +def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; +def : ProcessorModel<"cortex-a55", CortexA53Model, [ProcA55]>; +def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>; +// FIXME: Cortex-A72, Cortex-A73 and Cortex-A75 are currently modeled as a Cortex-A57. +def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA72]>; +def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>; +def : ProcessorModel<"cortex-a75", CortexA57Model, [ProcA75]>; +def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>; +def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>; +def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>; +def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>; +def : ProcessorModel<"exynos-m4", ExynosM3Model, [ProcExynosM3]>; +def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>; +def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>; +def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>; +// Cavium ThunderX/ThunderX T8X Processors +def : ProcessorModel<"thunderx", ThunderXT8XModel, [ProcThunderX]>; +def : ProcessorModel<"thunderxt88", ThunderXT8XModel, [ProcThunderXT88]>; +def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>; +def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>; +// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan. +def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>; + +//===----------------------------------------------------------------------===// +// Assembly parser +//===----------------------------------------------------------------------===// + +def GenericAsmParserVariant : AsmParserVariant { + int Variant = 0; + string Name = "generic"; + string BreakCharacters = "."; + string TokenizingCharacters = "[]*!/"; +} + +def AppleAsmParserVariant : AsmParserVariant { + int Variant = 1; + string Name = "apple-neon"; + string BreakCharacters = "."; + string TokenizingCharacters = "[]*!/"; +} + +//===----------------------------------------------------------------------===// +// Assembly printer +//===----------------------------------------------------------------------===// +// AArch64 Uses the MC printer for asm output, so make sure the TableGen +// AsmWriter bits get associated with the correct class. +def GenericAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 1; + int Variant = 0; + bit isMCAsmWriter = 1; +} + +def AppleAsmWriter : AsmWriter { + let AsmWriterClassName = "AppleInstPrinter"; + int PassSubtarget = 1; + int Variant = 1; + int isMCAsmWriter = 1; +} + +//===----------------------------------------------------------------------===// +// Target Declaration +//===----------------------------------------------------------------------===// + +def AArch64 : Target { + let InstructionSet = AArch64InstrInfo; + let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant]; + let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter]; + let AllowRegisterRenaming = 1; +} diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td b/capstone/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td new file mode 100644 index 000000000..30492003d --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64CallingConvention.td @@ -0,0 +1,366 @@ +//=- AArch64CallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for AArch64 architecture. +// +//===----------------------------------------------------------------------===// + +/// CCIfAlign - Match of the original alignment of the arg +class CCIfAlign<string Align, CCAction A> : + CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>; +/// CCIfBigEndian - Match only if we're in big endian mode. +class CCIfBigEndian<CCAction A> : + CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>; + +//===----------------------------------------------------------------------===// +// ARM AAPCS64 Calling Convention +//===----------------------------------------------------------------------===// + +def CC_AArch64_AAPCS : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType<i64>>, + CCIfType<[v2f32], CCBitConvertToType<v2i32>>, + CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>, + + // Big endian vectors must be passed as if they were 1-element vectors so that + // their lanes are in a consistent order. + CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8], + CCBitConvertToType<f64>>>, + CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8], + CCBitConvertToType<f128>>>, + + // An SRet is passed in X8, not X0 like a normal pointer parameter. + CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>, + + // Put ByVal arguments directly on the stack. Minimum size and alignment of a + // slot is 64-bit. + CCIfByVal<CCPassByVal<8, 8>>, + + // The 'nest' parameter, if any, is passed in X18. + // Darwin uses X18 as the platform register and hence 'nest' isn't currently + // supported there. + CCIfNest<CCAssignToReg<[X18]>>, + + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>, + + // A SwiftError is passed in X21. + CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>, + + CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>, + + // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, + // up to eight each of GPR and FPR. + CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], + [X0, X1, X2, X3, X4, X5, X6, X7]>>, + // i128 is split to two i64s, we can't fit half to register X7. + CCIfType<[i64], CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6], + [X0, X1, X3, X5]>>>, + + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>, + + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], + [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], + CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + // If more than will fit in registers, pass them on the stack instead. + CCIfType<[i1, i8, i16, f16], CCAssignToStack<8, 8>>, + CCIfType<[i32, f32], CCAssignToStack<8, 8>>, + CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16], + CCAssignToStack<8, 8>>, + CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], + CCAssignToStack<16, 16>> +]>; + +def RetCC_AArch64_AAPCS : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType<i64>>, + CCIfType<[v2f32], CCBitConvertToType<v2i32>>, + CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>, + + CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>, + + // Big endian vectors must be passed as if they were 1-element vectors so that + // their lanes are in a consistent order. + CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8], + CCBitConvertToType<f64>>>, + CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8], + CCBitConvertToType<f128>>>, + + CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], + [X0, X1, X2, X3, X4, X5, X6, X7]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], + [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], + CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> +]>; + +// Vararg functions on windows pass floats in integer registers +def CC_AArch64_Win64_VarArg : CallingConv<[ + CCIfType<[f16, f32], CCPromoteToType<f64>>, + CCIfType<[f64], CCBitConvertToType<i64>>, + CCDelegateTo<CC_AArch64_AAPCS> +]>; + + +// Darwin uses a calling convention which differs in only two ways +// from the standard one at this level: +// + i128s (i.e. split i64s) don't need even registers. +// + Stack slots are sized as needed rather than being at least 64-bit. +def CC_AArch64_DarwinPCS : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType<i64>>, + CCIfType<[v2f32], CCBitConvertToType<v2i32>>, + CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>, + + // An SRet is passed in X8, not X0 like a normal pointer parameter. + CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>, + + // Put ByVal arguments directly on the stack. Minimum size and alignment of a + // slot is 64-bit. + CCIfByVal<CCPassByVal<8, 8>>, + + // Pass SwiftSelf in a callee saved register. + CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>, + + // A SwiftError is passed in X21. + CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>, + + CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>, + + // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, + // up to eight each of GPR and FPR. + CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], + [X0, X1, X2, X3, X4, X5, X6, X7]>>, + // i128 is split to two i64s, we can't fit half to register X7. + CCIfType<[i64], + CCIfSplit<CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6], + [W0, W1, W2, W3, W4, W5, W6]>>>, + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>, + + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], + [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], + CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + // If more than will fit in registers, pass them on the stack instead. + CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>, + CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>, + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16], + CCAssignToStack<8, 8>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], + CCAssignToStack<16, 16>> +]>; + +def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType<i64>>, + CCIfType<[v2f32], CCBitConvertToType<v2i32>>, + CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>, + + CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Stack_Block">>, + + // Handle all scalar types as either i64 or f64. + CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, + CCIfType<[f16, f32], CCPromoteToType<f64>>, + + // Everything is on the stack. + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>, + CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], + CCAssignToStack<8, 8>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], + CCAssignToStack<16, 16>> +]>; + +// The WebKit_JS calling convention only passes the first argument (the callee) +// in register and the remaining arguments on stack. We allow 32bit stack slots, +// so that WebKit can write partial values in the stack and define the other +// 32bit quantity as undef. +def CC_AArch64_WebKit_JS : CallingConv<[ + // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0). + CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>, + + // Pass the remaining arguments on the stack instead. + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[i64, f64], CCAssignToStack<8, 8>> +]>; + +def RetCC_AArch64_WebKit_JS : CallingConv<[ + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], + [X0, X1, X2, X3, X4, X5, X6, X7]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], + [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> +]>; + +//===----------------------------------------------------------------------===// +// ARM64 Calling Convention for GHC +//===----------------------------------------------------------------------===// + +// This calling convention is specific to the Glasgow Haskell Compiler. +// The only documentation is the GHC source code, specifically the C header +// file: +// +// https://github.com/ghc/ghc/blob/master/includes/stg/MachRegs.h +// +// which defines the registers for the Spineless Tagless G-Machine (STG) that +// GHC uses to implement lazy evaluation. The generic STG machine has a set of +// registers which are mapped to appropriate set of architecture specific +// registers for each CPU architecture. +// +// The STG Machine is documented here: +// +// https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/GeneratedCode +// +// The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI +// register mapping". + +def CC_AArch64_GHC : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType<i64>>, + + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType<v2f64>>, + + CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, + CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>, + CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>, + + // Promote i8/i16/i32 arguments to i64. + CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, + + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim + CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>> +]>; + +// FIXME: LR is only callee-saved in the sense that *we* preserve it and are +// presumably a callee to someone. External functions may not do so, but this +// is currently safe since BL has LR as an implicit-def and what happens after a +// tail call doesn't matter. +// +// It would be better to model its preservation semantics properly (create a +// vreg on entry, use it in RET & tail call generation; make that vreg def if we +// end up saving LR as part of a call frame). Watch this space... +def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, + X23, X24, X25, X26, X27, X28, + D8, D9, D10, D11, + D12, D13, D14, D15)>; + +// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since +// 'this' and the pointer return value are both passed in X0 in these cases, +// this can be partially modelled by treating X0 as a callee-saved register; +// only the resulting RegMask is used; the SaveList is ignored +// +// (For generic ARM 64-bit ABI code, clang will not generate constructors or +// destructors with 'this' returns, so this RegMask will not be used in that +// case) +def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>; + +def CSR_AArch64_AAPCS_SwiftError + : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>; + +// The function used by Darwin to obtain the address of a thread-local variable +// guarantees more than a normal AAPCS function. x16 and x17 are used on the +// fast path for calculation, but other registers except X0 (argument/return) +// and LR (it is a call, after all) are preserved. +def CSR_AArch64_TLS_Darwin + : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17), + FP, + (sequence "Q%u", 0, 31))>; + +// We can only handle a register pair with adjacent registers, the register pair +// should belong to the same class as well. Since the access function on the +// fast path calls a function that follows CSR_AArch64_TLS_Darwin, +// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin. +def CSR_AArch64_CXX_TLS_Darwin + : CalleeSavedRegs<(add CSR_AArch64_AAPCS, + (sub (sequence "X%u", 1, 28), X15, X16, X17, X18), + (sequence "D%u", 0, 31))>; + +// CSRs that are handled by prologue, epilogue. +def CSR_AArch64_CXX_TLS_Darwin_PE + : CalleeSavedRegs<(add LR, FP)>; + +// CSRs that are handled explicitly via copies. +def CSR_AArch64_CXX_TLS_Darwin_ViaCopy + : CalleeSavedRegs<(sub CSR_AArch64_CXX_TLS_Darwin, LR, FP)>; + +// The ELF stub used for TLS-descriptor access saves every feasible +// register. Only X0 and LR are clobbered. +def CSR_AArch64_TLS_ELF + : CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP, + (sequence "Q%u", 0, 31))>; + +def CSR_AArch64_AllRegs + : CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP, + (sequence "X%u", 0, 28), FP, LR, SP, + (sequence "B%u", 0, 31), (sequence "H%u", 0, 31), + (sequence "S%u", 0, 31), (sequence "D%u", 0, 31), + (sequence "Q%u", 0, 31))>; + +def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>; + +def CSR_AArch64_RT_MostRegs : CalleeSavedRegs<(add CSR_AArch64_AAPCS, + (sequence "X%u", 9, 15))>; + +def CSR_AArch64_StackProbe_Windows + : CalleeSavedRegs<(add (sequence "X%u", 0, 15), + (sequence "X%u", 18, 28), FP, SP, + (sequence "Q%u", 0, 31))>; + +// Variants of the standard calling conventions for shadow call stack. +// These all preserve x18 in addition to any other registers. +def CSR_AArch64_NoRegs_SCS + : CalleeSavedRegs<(add CSR_AArch64_NoRegs, X18)>; +def CSR_AArch64_AllRegs_SCS + : CalleeSavedRegs<(add CSR_AArch64_AllRegs, X18)>; +def CSR_AArch64_CXX_TLS_Darwin_SCS + : CalleeSavedRegs<(add CSR_AArch64_CXX_TLS_Darwin, X18)>; +def CSR_AArch64_AAPCS_SwiftError_SCS + : CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>; +def CSR_AArch64_RT_MostRegs_SCS + : CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>; +def CSR_AArch64_AAPCS_SCS + : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>; diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td new file mode 100644 index 000000000..35cd7735c --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrAtomics.td @@ -0,0 +1,426 @@ +//=- AArch64InstrAtomics.td - AArch64 Atomic codegen support -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AArch64 Atomic operand code-gen constructs. +// +//===----------------------------------------------------------------------===// + +//===---------------------------------- +// Atomic fences +//===---------------------------------- +let AddedComplexity = 15, Size = 0 in +def CompilerBarrier : Pseudo<(outs), (ins i32imm:$ordering), + [(atomic_fence imm:$ordering, 0)]>, Sched<[]>; +def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>; +def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>; + +//===---------------------------------- +// Atomic loads +//===---------------------------------- + +// When they're actually atomic, only one addressing mode (GPR64sp) is +// supported, but when they're relaxed and anything can be used, all the +// standard modes would be valid and may give efficiency gains. + +// A atomic load operation that actually needs acquire semantics. +class acquiring_load<PatFrag base> + : PatFrag<(ops node:$ptr), (base node:$ptr)> { + let IsAtomic = 1; + let IsAtomicOrderingAcquireOrStronger = 1; +} + +// An atomic load operation that does not need either acquire or release +// semantics. +class relaxed_load<PatFrag base> + : PatFrag<(ops node:$ptr), (base node:$ptr)> { + let IsAtomic = 1; + let IsAtomicOrderingAcquireOrStronger = 0; +} + +// 8-bit loads +def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; +def : Pat<(relaxed_load<atomic_load_8> (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend8:$offset)), + (LDRBBroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)>; +def : Pat<(relaxed_load<atomic_load_8> (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend8:$offset)), + (LDRBBroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$offset)>; +def : Pat<(relaxed_load<atomic_load_8> (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)), + (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(relaxed_load<atomic_load_8> + (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; + +// 16-bit loads +def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; +def : Pat<(relaxed_load<atomic_load_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend)), + (LDRHHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>; +def : Pat<(relaxed_load<atomic_load_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)), + (LDRHHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>; +def : Pat<(relaxed_load<atomic_load_16> (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)), + (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat<(relaxed_load<atomic_load_16> + (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), + (LDURHHi GPR64sp:$Rn, simm9:$offset)>; + +// 32-bit loads +def : Pat<(acquiring_load<atomic_load_32> GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>; +def : Pat<(relaxed_load<atomic_load_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend32:$extend)), + (LDRWroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>; +def : Pat<(relaxed_load<atomic_load_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend32:$extend)), + (LDRWroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>; +def : Pat<(relaxed_load<atomic_load_32> (am_indexed32 GPR64sp:$Rn, + uimm12s4:$offset)), + (LDRWui GPR64sp:$Rn, uimm12s4:$offset)>; +def : Pat<(relaxed_load<atomic_load_32> + (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), + (LDURWi GPR64sp:$Rn, simm9:$offset)>; + +// 64-bit loads +def : Pat<(acquiring_load<atomic_load_64> GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>; +def : Pat<(relaxed_load<atomic_load_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend64:$extend)), + (LDRXroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; +def : Pat<(relaxed_load<atomic_load_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend64:$extend)), + (LDRXroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; +def : Pat<(relaxed_load<atomic_load_64> (am_indexed64 GPR64sp:$Rn, + uimm12s8:$offset)), + (LDRXui GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(relaxed_load<atomic_load_64> + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (LDURXi GPR64sp:$Rn, simm9:$offset)>; + +//===---------------------------------- +// Atomic stores +//===---------------------------------- + +// When they're actually atomic, only one addressing mode (GPR64sp) is +// supported, but when they're relaxed and anything can be used, all the +// standard modes would be valid and may give efficiency gains. + +// A store operation that actually needs release semantics. +class releasing_store<PatFrag base> + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> { + let IsAtomic = 1; + let IsAtomicOrderingReleaseOrStronger = 1; +} + +// An atomic store operation that doesn't actually need to be atomic on AArch64. +class relaxed_store<PatFrag base> + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> { + let IsAtomic = 1; + let IsAtomicOrderingReleaseOrStronger = 0; +} + +// 8-bit stores +def : Pat<(releasing_store<atomic_store_8> GPR64sp:$ptr, GPR32:$val), + (STLRB GPR32:$val, GPR64sp:$ptr)>; +def : Pat<(relaxed_store<atomic_store_8> + (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend), + GPR32:$val), + (STRBBroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend)>; +def : Pat<(relaxed_store<atomic_store_8> + (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend), + GPR32:$val), + (STRBBroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend)>; +def : Pat<(relaxed_store<atomic_store_8> + (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), GPR32:$val), + (STRBBui GPR32:$val, GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(relaxed_store<atomic_store_8> + (am_unscaled8 GPR64sp:$Rn, simm9:$offset), GPR32:$val), + (STURBBi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; + +// 16-bit stores +def : Pat<(releasing_store<atomic_store_16> GPR64sp:$ptr, GPR32:$val), + (STLRH GPR32:$val, GPR64sp:$ptr)>; +def : Pat<(relaxed_store<atomic_store_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend), + GPR32:$val), + (STRHHroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>; +def : Pat<(relaxed_store<atomic_store_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend), + GPR32:$val), + (STRHHroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>; +def : Pat<(relaxed_store<atomic_store_16> + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), GPR32:$val), + (STRHHui GPR32:$val, GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat<(relaxed_store<atomic_store_16> + (am_unscaled16 GPR64sp:$Rn, simm9:$offset), GPR32:$val), + (STURHHi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; + +// 32-bit stores +def : Pat<(releasing_store<atomic_store_32> GPR64sp:$ptr, GPR32:$val), + (STLRW GPR32:$val, GPR64sp:$ptr)>; +def : Pat<(relaxed_store<atomic_store_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend32:$extend), + GPR32:$val), + (STRWroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>; +def : Pat<(relaxed_store<atomic_store_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend32:$extend), + GPR32:$val), + (STRWroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>; +def : Pat<(relaxed_store<atomic_store_32> + (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), GPR32:$val), + (STRWui GPR32:$val, GPR64sp:$Rn, uimm12s4:$offset)>; +def : Pat<(relaxed_store<atomic_store_32> + (am_unscaled32 GPR64sp:$Rn, simm9:$offset), GPR32:$val), + (STURWi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; + +// 64-bit stores +def : Pat<(releasing_store<atomic_store_64> GPR64sp:$ptr, GPR64:$val), + (STLRX GPR64:$val, GPR64sp:$ptr)>; +def : Pat<(relaxed_store<atomic_store_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend), + GPR64:$val), + (STRXroW GPR64:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; +def : Pat<(relaxed_store<atomic_store_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend), + GPR64:$val), + (STRXroX GPR64:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; +def : Pat<(relaxed_store<atomic_store_64> + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset), GPR64:$val), + (STRXui GPR64:$val, GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(relaxed_store<atomic_store_64> + (am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val), + (STURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>; + +//===---------------------------------- +// Low-level exclusive operations +//===---------------------------------- + +// Load-exclusives. + +def ldxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; + +def ldxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; + +def ldxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + +def ldxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64; +}]>; + +def : Pat<(ldxr_1 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>; +def : Pat<(ldxr_2 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDXRH GPR64sp:$addr), sub_32)>; +def : Pat<(ldxr_4 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDXRW GPR64sp:$addr), sub_32)>; +def : Pat<(ldxr_8 GPR64sp:$addr), (LDXRX GPR64sp:$addr)>; + +def : Pat<(and (ldxr_1 GPR64sp:$addr), 0xff), + (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>; +def : Pat<(and (ldxr_2 GPR64sp:$addr), 0xffff), + (SUBREG_TO_REG (i64 0), (LDXRH GPR64sp:$addr), sub_32)>; +def : Pat<(and (ldxr_4 GPR64sp:$addr), 0xffffffff), + (SUBREG_TO_REG (i64 0), (LDXRW GPR64sp:$addr), sub_32)>; + +// Load-exclusives. + +def ldaxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; + +def ldaxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; + +def ldaxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + +def ldaxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64; +}]>; + +def : Pat<(ldaxr_1 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>; +def : Pat<(ldaxr_2 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDAXRH GPR64sp:$addr), sub_32)>; +def : Pat<(ldaxr_4 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDAXRW GPR64sp:$addr), sub_32)>; +def : Pat<(ldaxr_8 GPR64sp:$addr), (LDAXRX GPR64sp:$addr)>; + +def : Pat<(and (ldaxr_1 GPR64sp:$addr), 0xff), + (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>; +def : Pat<(and (ldaxr_2 GPR64sp:$addr), 0xffff), + (SUBREG_TO_REG (i64 0), (LDAXRH GPR64sp:$addr), sub_32)>; +def : Pat<(and (ldaxr_4 GPR64sp:$addr), 0xffffffff), + (SUBREG_TO_REG (i64 0), (LDAXRW GPR64sp:$addr), sub_32)>; + +// Store-exclusives. + +def stxr_1 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stxr node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; + +def stxr_2 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stxr node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; + +def stxr_4 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stxr node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + +def stxr_8 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stxr node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64; +}]>; + + +def : Pat<(stxr_1 GPR64:$val, GPR64sp:$addr), + (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_2 GPR64:$val, GPR64sp:$addr), + (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_4 GPR64:$val, GPR64sp:$addr), + (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_8 GPR64:$val, GPR64sp:$addr), + (STXRX GPR64:$val, GPR64sp:$addr)>; + +def : Pat<(stxr_1 (zext (and GPR32:$val, 0xff)), GPR64sp:$addr), + (STXRB GPR32:$val, GPR64sp:$addr)>; +def : Pat<(stxr_2 (zext (and GPR32:$val, 0xffff)), GPR64sp:$addr), + (STXRH GPR32:$val, GPR64sp:$addr)>; +def : Pat<(stxr_4 (zext GPR32:$val), GPR64sp:$addr), + (STXRW GPR32:$val, GPR64sp:$addr)>; + +def : Pat<(stxr_1 (and GPR64:$val, 0xff), GPR64sp:$addr), + (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_2 (and GPR64:$val, 0xffff), GPR64sp:$addr), + (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr), + (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; + +// Store-release-exclusives. + +def stlxr_1 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stlxr node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; + +def stlxr_2 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stlxr node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; + +def stlxr_4 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stlxr node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + +def stlxr_8 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stlxr node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64; +}]>; + + +def : Pat<(stlxr_1 GPR64:$val, GPR64sp:$addr), + (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_2 GPR64:$val, GPR64sp:$addr), + (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_4 GPR64:$val, GPR64sp:$addr), + (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_8 GPR64:$val, GPR64sp:$addr), + (STLXRX GPR64:$val, GPR64sp:$addr)>; + +def : Pat<(stlxr_1 (zext (and GPR32:$val, 0xff)), GPR64sp:$addr), + (STLXRB GPR32:$val, GPR64sp:$addr)>; +def : Pat<(stlxr_2 (zext (and GPR32:$val, 0xffff)), GPR64sp:$addr), + (STLXRH GPR32:$val, GPR64sp:$addr)>; +def : Pat<(stlxr_4 (zext GPR32:$val), GPR64sp:$addr), + (STLXRW GPR32:$val, GPR64sp:$addr)>; + +def : Pat<(stlxr_1 (and GPR64:$val, 0xff), GPR64sp:$addr), + (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_2 (and GPR64:$val, 0xffff), GPR64sp:$addr), + (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr), + (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; + + +// And clear exclusive. + +def : Pat<(int_aarch64_clrex), (CLREX 0xf)>; + +//===---------------------------------- +// Atomic cmpxchg for -O0 +//===---------------------------------- + +// The fast register allocator used during -O0 inserts spills to cover any VRegs +// live across basic block boundaries. When this happens between an LDXR and an +// STXR it can clear the exclusive monitor, causing all cmpxchg attempts to +// fail. + +// Unfortunately, this means we have to have an alternative (expanded +// post-regalloc) path for -O0 compilations. Fortunately this path can be +// significantly more naive than the standard expansion: we conservatively +// assume seq_cst, strong cmpxchg and omit clrex on failure. + +let Constraints = "@earlyclobber $Rd,@earlyclobber $scratch", + mayLoad = 1, mayStore = 1 in { +def CMP_SWAP_8 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch), + (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>, + Sched<[WriteAtomic]>; + +def CMP_SWAP_16 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch), + (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>, + Sched<[WriteAtomic]>; + +def CMP_SWAP_32 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch), + (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>, + Sched<[WriteAtomic]>; + +def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$scratch), + (ins GPR64:$addr, GPR64:$desired, GPR64:$new), []>, + Sched<[WriteAtomic]>; +} + +let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $scratch", + mayLoad = 1, mayStore = 1 in +def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$scratch), + (ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi, + GPR64:$newLo, GPR64:$newHi), []>, + Sched<[WriteAtomic]>; + +// v8.1 Atomic instructions: +let Predicates = [HasLSE] in { + defm : LDOPregister_patterns<"LDADD", "atomic_load_add">; + defm : LDOPregister_patterns<"LDSET", "atomic_load_or">; + defm : LDOPregister_patterns<"LDEOR", "atomic_load_xor">; + defm : LDOPregister_patterns<"LDCLR", "atomic_load_clr">; + defm : LDOPregister_patterns<"LDSMAX", "atomic_load_max">; + defm : LDOPregister_patterns<"LDSMIN", "atomic_load_min">; + defm : LDOPregister_patterns<"LDUMAX", "atomic_load_umax">; + defm : LDOPregister_patterns<"LDUMIN", "atomic_load_umin">; + defm : LDOPregister_patterns<"SWP", "atomic_swap">; + defm : CASregister_patterns<"CAS", "atomic_cmp_swap">; + + // These two patterns are only needed for global isel, selection dag isel + // converts atomic load-sub into a sub and atomic load-add, and likewise for + // and -> clr. + defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">; + defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">; +} + diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td new file mode 100644 index 000000000..7caf32dbd --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrFormats.td @@ -0,0 +1,10402 @@ +//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe AArch64 instructions format here +// + +// Format specifies the encoding used by the instruction. This is part of the +// ad-hoc solution used to emit machine instruction encodings by our machine +// code emitter. +class Format<bits<2> val> { + bits<2> Value = val; +} + +def PseudoFrm : Format<0>; +def NormalFrm : Format<1>; // Do we need any others? + +// AArch64 Instruction Format +class AArch64Inst<Format f, string cstr> : Instruction { + field bits<32> Inst; // Instruction encoding. + // Mask of bits that cause an encoding to be UNPREDICTABLE. + // If a bit is set, then if the corresponding bit in the + // target encoding differs from its value in the "Inst" field, + // the instruction is UNPREDICTABLE (SoftFail in abstract parlance). + field bits<32> Unpredictable = 0; + // SoftFail is the generic name for this field, but we alias it so + // as to make it more obvious what it means in ARM-land. + field bits<32> SoftFail = Unpredictable; + let Namespace = "AArch64"; + Format F = f; + bits<2> Form = F.Value; + let Pattern = []; + let Constraints = cstr; +} + +class InstSubst<string Asm, dag Result, bit EmitPriority = 0> + : InstAlias<Asm, Result, EmitPriority>, Requires<[UseNegativeImmediates]>; + +// Pseudo instructions (don't have encoding information) +class Pseudo<dag oops, dag iops, list<dag> pattern, string cstr = ""> + : AArch64Inst<PseudoFrm, cstr> { + dag OutOperandList = oops; + dag InOperandList = iops; + let Pattern = pattern; + let isCodeGenOnly = 1; +} + +// Real instructions (have encoding information) +class EncodedI<string cstr, list<dag> pattern> : AArch64Inst<NormalFrm, cstr> { + let Pattern = pattern; + let Size = 4; +} + +// Enum describing whether an instruction is +// destructive in its first source operand. +class DestructiveInstTypeEnum<bits<1> val> { + bits<1> Value = val; +} +def NotDestructive : DestructiveInstTypeEnum<0>; +def Destructive : DestructiveInstTypeEnum<1>; + +// Normal instructions +class I<dag oops, dag iops, string asm, string operands, string cstr, + list<dag> pattern> + : EncodedI<cstr, pattern> { + dag OutOperandList = oops; + dag InOperandList = iops; + let AsmString = !strconcat(asm, operands); + + // Destructive operations (SVE) + DestructiveInstTypeEnum DestructiveInstType = NotDestructive; + ElementSizeEnum ElementSize = ElementSizeB; + + let TSFlags{3} = DestructiveInstType.Value; + let TSFlags{2-0} = ElementSize.Value; +} + +class TriOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>; +class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>; +class UnOpFrag<dag res> : PatFrag<(ops node:$LHS), res>; + +// Helper fragment for an extract of the high portion of a 128-bit vector. +def extract_high_v16i8 : + UnOpFrag<(extract_subvector (v16i8 node:$LHS), (i64 8))>; +def extract_high_v8i16 : + UnOpFrag<(extract_subvector (v8i16 node:$LHS), (i64 4))>; +def extract_high_v4i32 : + UnOpFrag<(extract_subvector (v4i32 node:$LHS), (i64 2))>; +def extract_high_v2i64 : + UnOpFrag<(extract_subvector (v2i64 node:$LHS), (i64 1))>; + +//===----------------------------------------------------------------------===// +// Asm Operand Classes. +// + +// Shifter operand for arithmetic shifted encodings. +def ShifterOperand : AsmOperandClass { + let Name = "Shifter"; +} + +// Shifter operand for mov immediate encodings. +def MovImm32ShifterOperand : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "MovImm32Shifter"; + let RenderMethod = "addShifterOperands"; + let DiagnosticType = "InvalidMovImm32Shift"; +} +def MovImm64ShifterOperand : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "MovImm64Shifter"; + let RenderMethod = "addShifterOperands"; + let DiagnosticType = "InvalidMovImm64Shift"; +} + +// Shifter operand for arithmetic register shifted encodings. +class ArithmeticShifterOperand<int width> : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "ArithmeticShifter" # width; + let PredicateMethod = "isArithmeticShifter<" # width # ">"; + let RenderMethod = "addShifterOperands"; + let DiagnosticType = "AddSubRegShift" # width; +} + +def ArithmeticShifterOperand32 : ArithmeticShifterOperand<32>; +def ArithmeticShifterOperand64 : ArithmeticShifterOperand<64>; + +// Shifter operand for logical register shifted encodings. +class LogicalShifterOperand<int width> : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "LogicalShifter" # width; + let PredicateMethod = "isLogicalShifter<" # width # ">"; + let RenderMethod = "addShifterOperands"; + let DiagnosticType = "AddSubRegShift" # width; +} + +def LogicalShifterOperand32 : LogicalShifterOperand<32>; +def LogicalShifterOperand64 : LogicalShifterOperand<64>; + +// Shifter operand for logical vector 128/64-bit shifted encodings. +def LogicalVecShifterOperand : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "LogicalVecShifter"; + let RenderMethod = "addShifterOperands"; +} +def LogicalVecHalfWordShifterOperand : AsmOperandClass { + let SuperClasses = [LogicalVecShifterOperand]; + let Name = "LogicalVecHalfWordShifter"; + let RenderMethod = "addShifterOperands"; +} + +// The "MSL" shifter on the vector MOVI instruction. +def MoveVecShifterOperand : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "MoveVecShifter"; + let RenderMethod = "addShifterOperands"; +} + +// Extend operand for arithmetic encodings. +def ExtendOperand : AsmOperandClass { + let Name = "Extend"; + let DiagnosticType = "AddSubRegExtendLarge"; +} +def ExtendOperand64 : AsmOperandClass { + let SuperClasses = [ExtendOperand]; + let Name = "Extend64"; + let DiagnosticType = "AddSubRegExtendSmall"; +} +// 'extend' that's a lsl of a 64-bit register. +def ExtendOperandLSL64 : AsmOperandClass { + let SuperClasses = [ExtendOperand]; + let Name = "ExtendLSL64"; + let RenderMethod = "addExtend64Operands"; + let DiagnosticType = "AddSubRegExtendLarge"; +} + +// 8-bit floating-point immediate encodings. +def FPImmOperand : AsmOperandClass { + let Name = "FPImm"; + let ParserMethod = "tryParseFPImm<true>"; + let DiagnosticType = "InvalidFPImm"; +} + +def CondCode : AsmOperandClass { + let Name = "CondCode"; + let DiagnosticType = "InvalidCondCode"; +} + +// A 32-bit register pasrsed as 64-bit +def GPR32as64Operand : AsmOperandClass { + let Name = "GPR32as64"; + let ParserMethod = + "tryParseGPROperand<false, RegConstraintEqualityTy::EqualsSubReg>"; +} +def GPR32as64 : RegisterOperand<GPR32> { + let ParserMatchClass = GPR32as64Operand; +} + +// A 64-bit register pasrsed as 32-bit +def GPR64as32Operand : AsmOperandClass { + let Name = "GPR64as32"; + let ParserMethod = + "tryParseGPROperand<false, RegConstraintEqualityTy::EqualsSuperReg>"; +} +def GPR64as32 : RegisterOperand<GPR64, "printGPR64as32"> { + let ParserMatchClass = GPR64as32Operand; +} + +// 8-bit immediate for AdvSIMD where 64-bit values of the form: +// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh +// are encoded as the eight bit value 'abcdefgh'. +def SIMDImmType10Operand : AsmOperandClass { let Name = "SIMDImmType10"; } + +class UImmScaledMemoryIndexed<int Width, int Scale> : AsmOperandClass { + let Name = "UImm" # Width # "s" # Scale; + let DiagnosticType = "InvalidMemoryIndexed" # Scale # "UImm" # Width; + let RenderMethod = "addImmScaledOperands<" # Scale # ">"; + let PredicateMethod = "isUImmScaled<" # Width # ", " # Scale # ">"; +} + +class SImmScaledMemoryIndexed<int Width, int Scale> : AsmOperandClass { + let Name = "SImm" # Width # "s" # Scale; + let DiagnosticType = "InvalidMemoryIndexed" # Scale # "SImm" # Width; + let RenderMethod = "addImmScaledOperands<" # Scale # ">"; + let PredicateMethod = "isSImmScaled<" # Width # ", " # Scale # ">"; +} + +//===----------------------------------------------------------------------===// +// Operand Definitions. +// + +// ADR[P] instruction labels. +def AdrpOperand : AsmOperandClass { + let Name = "AdrpLabel"; + let ParserMethod = "tryParseAdrpLabel"; + let DiagnosticType = "InvalidLabel"; +} +def adrplabel : Operand<i64> { + let EncoderMethod = "getAdrLabelOpValue"; + let PrintMethod = "printAdrpLabel"; + let ParserMatchClass = AdrpOperand; +} + +def AdrOperand : AsmOperandClass { + let Name = "AdrLabel"; + let ParserMethod = "tryParseAdrLabel"; + let DiagnosticType = "InvalidLabel"; +} +def adrlabel : Operand<i64> { + let EncoderMethod = "getAdrLabelOpValue"; + let ParserMatchClass = AdrOperand; +} + +class SImmOperand<int width> : AsmOperandClass { + let Name = "SImm" # width; + let DiagnosticType = "InvalidMemoryIndexedSImm" # width; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isSImm<" # width # ">"; +} + +// Authenticated loads for v8.3 can have scaled 10-bit immediate offsets. +def SImm10s8Operand : SImmScaledMemoryIndexed<10, 8>; +def simm10Scaled : Operand<i64> { + let ParserMatchClass = SImm10s8Operand; + let DecoderMethod = "DecodeSImm<10>"; + let PrintMethod = "printImmScale<8>"; +} + +// uimm6 predicate - True if the immediate is in the range [0, 63]. +def UImm6Operand : AsmOperandClass { + let Name = "UImm6"; + let DiagnosticType = "InvalidImm0_63"; +} + +def uimm6 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> { + let ParserMatchClass = UImm6Operand; +} + +def SImm9Operand : SImmOperand<9>; +def simm9 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -256 && Imm < 256; }]> { + let ParserMatchClass = SImm9Operand; + let DecoderMethod = "DecodeSImm<9>"; +} + +def SImm8Operand : SImmOperand<8>; +def simm8 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -128 && Imm < 127; }]> { + let ParserMatchClass = SImm8Operand; + let DecoderMethod = "DecodeSImm<8>"; +} + +def SImm6Operand : SImmOperand<6>; +def simm6_32b : Operand<i32>, ImmLeaf<i32, [{ return Imm >= -32 && Imm < 32; }]> { + let ParserMatchClass = SImm6Operand; + let DecoderMethod = "DecodeSImm<6>"; +} + +def SImm5Operand : SImmOperand<5>; +def simm5_64b : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -16 && Imm < 16; }]> { + let ParserMatchClass = SImm5Operand; + let DecoderMethod = "DecodeSImm<5>"; +} + +def simm5_32b : Operand<i32>, ImmLeaf<i32, [{ return Imm >= -16 && Imm < 16; }]> { + let ParserMatchClass = SImm5Operand; + let DecoderMethod = "DecodeSImm<5>"; +} + +// simm7sN predicate - True if the immediate is a multiple of N in the range +// [-64 * N, 63 * N]. + +def SImm7s4Operand : SImmScaledMemoryIndexed<7, 4>; +def SImm7s8Operand : SImmScaledMemoryIndexed<7, 8>; +def SImm7s16Operand : SImmScaledMemoryIndexed<7, 16>; + +def simm7s4 : Operand<i32> { + let ParserMatchClass = SImm7s4Operand; + let PrintMethod = "printImmScale<4>"; +} + +def simm7s8 : Operand<i32> { + let ParserMatchClass = SImm7s8Operand; + let PrintMethod = "printImmScale<8>"; +} + +def simm7s16 : Operand<i32> { + let ParserMatchClass = SImm7s16Operand; + let PrintMethod = "printImmScale<16>"; +} + +def am_indexed7s8 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S8", []>; +def am_indexed7s16 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S16", []>; +def am_indexed7s32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S32", []>; +def am_indexed7s64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S64", []>; +def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>; + +// uimm5sN predicate - True if the immediate is a multiple of N in the range +// [0 * N, 32 * N]. +def UImm5s2Operand : UImmScaledMemoryIndexed<5, 2>; +def UImm5s4Operand : UImmScaledMemoryIndexed<5, 4>; +def UImm5s8Operand : UImmScaledMemoryIndexed<5, 8>; + +def uimm5s2 : Operand<i64>, ImmLeaf<i64, + [{ return Imm >= 0 && Imm < (32*2) && ((Imm % 2) == 0); }]> { + let ParserMatchClass = UImm5s2Operand; + let PrintMethod = "printImmScale<2>"; +} +def uimm5s4 : Operand<i64>, ImmLeaf<i64, + [{ return Imm >= 0 && Imm < (32*4) && ((Imm % 4) == 0); }]> { + let ParserMatchClass = UImm5s4Operand; + let PrintMethod = "printImmScale<4>"; +} +def uimm5s8 : Operand<i64>, ImmLeaf<i64, + [{ return Imm >= 0 && Imm < (32*8) && ((Imm % 8) == 0); }]> { + let ParserMatchClass = UImm5s8Operand; + let PrintMethod = "printImmScale<8>"; +} + +// uimm6sN predicate - True if the immediate is a multiple of N in the range +// [0 * N, 64 * N]. +def UImm6s1Operand : UImmScaledMemoryIndexed<6, 1>; +def UImm6s2Operand : UImmScaledMemoryIndexed<6, 2>; +def UImm6s4Operand : UImmScaledMemoryIndexed<6, 4>; +def UImm6s8Operand : UImmScaledMemoryIndexed<6, 8>; + +def uimm6s1 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> { + let ParserMatchClass = UImm6s1Operand; +} +def uimm6s2 : Operand<i64>, ImmLeaf<i64, +[{ return Imm >= 0 && Imm < (64*2) && ((Imm % 2) == 0); }]> { + let PrintMethod = "printImmScale<2>"; + let ParserMatchClass = UImm6s2Operand; +} +def uimm6s4 : Operand<i64>, ImmLeaf<i64, +[{ return Imm >= 0 && Imm < (64*4) && ((Imm % 4) == 0); }]> { + let PrintMethod = "printImmScale<4>"; + let ParserMatchClass = UImm6s4Operand; +} +def uimm6s8 : Operand<i64>, ImmLeaf<i64, +[{ return Imm >= 0 && Imm < (64*8) && ((Imm % 8) == 0); }]> { + let PrintMethod = "printImmScale<8>"; + let ParserMatchClass = UImm6s8Operand; +} + +// simm6sN predicate - True if the immediate is a multiple of N in the range +// [-32 * N, 31 * N]. +def SImm6s1Operand : SImmScaledMemoryIndexed<6, 1>; +def simm6s1 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -32 && Imm < 32; }]> { + let ParserMatchClass = SImm6s1Operand; + let DecoderMethod = "DecodeSImm<6>"; +} + +// simm4sN predicate - True if the immediate is a multiple of N in the range +// [ -8* N, 7 * N]. +def SImm4s1Operand : SImmScaledMemoryIndexed<4, 1>; +def SImm4s2Operand : SImmScaledMemoryIndexed<4, 2>; +def SImm4s3Operand : SImmScaledMemoryIndexed<4, 3>; +def SImm4s4Operand : SImmScaledMemoryIndexed<4, 4>; +def SImm4s16Operand : SImmScaledMemoryIndexed<4, 16>; + +def simm4s1 : Operand<i64>, ImmLeaf<i64, +[{ return Imm >=-8 && Imm <= 7; }]> { + let ParserMatchClass = SImm4s1Operand; + let DecoderMethod = "DecodeSImm<4>"; +} + +def simm4s2 : Operand<i64>, ImmLeaf<i64, +[{ return Imm >=-16 && Imm <= 14 && (Imm % 2) == 0x0; }]> { + let PrintMethod = "printImmScale<2>"; + let ParserMatchClass = SImm4s2Operand; + let DecoderMethod = "DecodeSImm<4>"; +} + +def simm4s3 : Operand<i64>, ImmLeaf<i64, +[{ return Imm >=-24 && Imm <= 21 && (Imm % 3) == 0x0; }]> { + let PrintMethod = "printImmScale<3>"; + let ParserMatchClass = SImm4s3Operand; + let DecoderMethod = "DecodeSImm<4>"; +} + +def simm4s4 : Operand<i64>, ImmLeaf<i64, +[{ return Imm >=-32 && Imm <= 28 && (Imm % 4) == 0x0; }]> { + let PrintMethod = "printImmScale<4>"; + let ParserMatchClass = SImm4s4Operand; + let DecoderMethod = "DecodeSImm<4>"; +} +def simm4s16 : Operand<i64>, ImmLeaf<i64, +[{ return Imm >=-128 && Imm <= 112 && (Imm % 16) == 0x0; }]> { + let PrintMethod = "printImmScale<16>"; + let ParserMatchClass = SImm4s16Operand; + let DecoderMethod = "DecodeSImm<4>"; +} + +class AsmImmRange<int Low, int High> : AsmOperandClass { + let Name = "Imm" # Low # "_" # High; + let DiagnosticType = "InvalidImm" # Low # "_" # High; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isImmInRange<" # Low # "," # High # ">"; +} + +def Imm1_8Operand : AsmImmRange<1, 8>; +def Imm1_16Operand : AsmImmRange<1, 16>; +def Imm1_32Operand : AsmImmRange<1, 32>; +def Imm1_64Operand : AsmImmRange<1, 64>; + +class BranchTarget<int N> : AsmOperandClass { + let Name = "BranchTarget" # N; + let DiagnosticType = "InvalidLabel"; + let PredicateMethod = "isBranchTarget<" # N # ">"; +} + +class PCRelLabel<int N> : BranchTarget<N> { + let Name = "PCRelLabel" # N; +} + +def BranchTarget14Operand : BranchTarget<14>; +def BranchTarget26Operand : BranchTarget<26>; +def PCRelLabel19Operand : PCRelLabel<19>; + +def MovZSymbolG3AsmOperand : AsmOperandClass { + let Name = "MovZSymbolG3"; + let RenderMethod = "addImmOperands"; +} + +def movz_symbol_g3 : Operand<i32> { + let ParserMatchClass = MovZSymbolG3AsmOperand; +} + +def MovZSymbolG2AsmOperand : AsmOperandClass { + let Name = "MovZSymbolG2"; + let RenderMethod = "addImmOperands"; +} + +def movz_symbol_g2 : Operand<i32> { + let ParserMatchClass = MovZSymbolG2AsmOperand; +} + +def MovZSymbolG1AsmOperand : AsmOperandClass { + let Name = "MovZSymbolG1"; + let RenderMethod = "addImmOperands"; +} + +def movz_symbol_g1 : Operand<i32> { + let ParserMatchClass = MovZSymbolG1AsmOperand; +} + +def MovZSymbolG0AsmOperand : AsmOperandClass { + let Name = "MovZSymbolG0"; + let RenderMethod = "addImmOperands"; +} + +def movz_symbol_g0 : Operand<i32> { + let ParserMatchClass = MovZSymbolG0AsmOperand; +} + +def MovKSymbolG3AsmOperand : AsmOperandClass { + let Name = "MovKSymbolG3"; + let RenderMethod = "addImmOperands"; +} + +def movk_symbol_g3 : Operand<i32> { + let ParserMatchClass = MovKSymbolG3AsmOperand; +} + +def MovKSymbolG2AsmOperand : AsmOperandClass { + let Name = "MovKSymbolG2"; + let RenderMethod = "addImmOperands"; +} + +def movk_symbol_g2 : Operand<i32> { + let ParserMatchClass = MovKSymbolG2AsmOperand; +} + +def MovKSymbolG1AsmOperand : AsmOperandClass { + let Name = "MovKSymbolG1"; + let RenderMethod = "addImmOperands"; +} + +def movk_symbol_g1 : Operand<i32> { + let ParserMatchClass = MovKSymbolG1AsmOperand; +} + +def MovKSymbolG0AsmOperand : AsmOperandClass { + let Name = "MovKSymbolG0"; + let RenderMethod = "addImmOperands"; +} + +def movk_symbol_g0 : Operand<i32> { + let ParserMatchClass = MovKSymbolG0AsmOperand; +} + +class fixedpoint_i32<ValueType FloatVT> + : Operand<FloatVT>, + ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<32>", [fpimm, ld]> { + let EncoderMethod = "getFixedPointScaleOpValue"; + let DecoderMethod = "DecodeFixedPointScaleImm32"; + let ParserMatchClass = Imm1_32Operand; +} + +class fixedpoint_i64<ValueType FloatVT> + : Operand<FloatVT>, + ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<64>", [fpimm, ld]> { + let EncoderMethod = "getFixedPointScaleOpValue"; + let DecoderMethod = "DecodeFixedPointScaleImm64"; + let ParserMatchClass = Imm1_64Operand; +} + +def fixedpoint_f16_i32 : fixedpoint_i32<f16>; +def fixedpoint_f32_i32 : fixedpoint_i32<f32>; +def fixedpoint_f64_i32 : fixedpoint_i32<f64>; + +def fixedpoint_f16_i64 : fixedpoint_i64<f16>; +def fixedpoint_f32_i64 : fixedpoint_i64<f32>; +def fixedpoint_f64_i64 : fixedpoint_i64<f64>; + +def vecshiftR8 : Operand<i32>, ImmLeaf<i32, [{ + return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9); +}]> { + let EncoderMethod = "getVecShiftR8OpValue"; + let DecoderMethod = "DecodeVecShiftR8Imm"; + let ParserMatchClass = Imm1_8Operand; +} +def vecshiftR16 : Operand<i32>, ImmLeaf<i32, [{ + return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17); +}]> { + let EncoderMethod = "getVecShiftR16OpValue"; + let DecoderMethod = "DecodeVecShiftR16Imm"; + let ParserMatchClass = Imm1_16Operand; +} +def vecshiftR16Narrow : Operand<i32>, ImmLeaf<i32, [{ + return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9); +}]> { + let EncoderMethod = "getVecShiftR16OpValue"; + let DecoderMethod = "DecodeVecShiftR16ImmNarrow"; + let ParserMatchClass = Imm1_8Operand; +} +def vecshiftR32 : Operand<i32>, ImmLeaf<i32, [{ + return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33); +}]> { + let EncoderMethod = "getVecShiftR32OpValue"; + let DecoderMethod = "DecodeVecShiftR32Imm"; + let ParserMatchClass = Imm1_32Operand; +} +def vecshiftR32Narrow : Operand<i32>, ImmLeaf<i32, [{ + return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17); +}]> { + let EncoderMethod = "getVecShiftR32OpValue"; + let DecoderMethod = "DecodeVecShiftR32ImmNarrow"; + let ParserMatchClass = Imm1_16Operand; +} +def vecshiftR64 : Operand<i32>, ImmLeaf<i32, [{ + return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65); +}]> { + let EncoderMethod = "getVecShiftR64OpValue"; + let DecoderMethod = "DecodeVecShiftR64Imm"; + let ParserMatchClass = Imm1_64Operand; +} +def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{ + return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33); +}]> { + let EncoderMethod = "getVecShiftR64OpValue"; + let DecoderMethod = "DecodeVecShiftR64ImmNarrow"; + let ParserMatchClass = Imm1_32Operand; +} + +def Imm0_1Operand : AsmImmRange<0, 1>; +def Imm0_7Operand : AsmImmRange<0, 7>; +def Imm0_15Operand : AsmImmRange<0, 15>; +def Imm0_31Operand : AsmImmRange<0, 31>; +def Imm0_63Operand : AsmImmRange<0, 63>; + +def vecshiftL8 : Operand<i32>, ImmLeaf<i32, [{ + return (((uint32_t)Imm) < 8); +}]> { + let EncoderMethod = "getVecShiftL8OpValue"; + let DecoderMethod = "DecodeVecShiftL8Imm"; + let ParserMatchClass = Imm0_7Operand; +} +def vecshiftL16 : Operand<i32>, ImmLeaf<i32, [{ + return (((uint32_t)Imm) < 16); +}]> { + let EncoderMethod = "getVecShiftL16OpValue"; + let DecoderMethod = "DecodeVecShiftL16Imm"; + let ParserMatchClass = Imm0_15Operand; +} +def vecshiftL32 : Operand<i32>, ImmLeaf<i32, [{ + return (((uint32_t)Imm) < 32); +}]> { + let EncoderMethod = "getVecShiftL32OpValue"; + let DecoderMethod = "DecodeVecShiftL32Imm"; + let ParserMatchClass = Imm0_31Operand; +} +def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{ + return (((uint32_t)Imm) < 64); +}]> { + let EncoderMethod = "getVecShiftL64OpValue"; + let DecoderMethod = "DecodeVecShiftL64Imm"; + let ParserMatchClass = Imm0_63Operand; +} + + +// Crazy immediate formats used by 32-bit and 64-bit logical immediate +// instructions for splatting repeating bit patterns across the immediate. +def logical_imm32_XFORM : SDNodeXForm<imm, [{ + uint64_t enc = AArch64_AM::encodeLogicalImmediate(N->getZExtValue(), 32); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); +}]>; +def logical_imm64_XFORM : SDNodeXForm<imm, [{ + uint64_t enc = AArch64_AM::encodeLogicalImmediate(N->getZExtValue(), 64); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); +}]>; + +let DiagnosticType = "LogicalSecondSource" in { + def LogicalImm32Operand : AsmOperandClass { + let Name = "LogicalImm32"; + let PredicateMethod = "isLogicalImm<int32_t>"; + let RenderMethod = "addLogicalImmOperands<int32_t>"; + } + def LogicalImm64Operand : AsmOperandClass { + let Name = "LogicalImm64"; + let PredicateMethod = "isLogicalImm<int64_t>"; + let RenderMethod = "addLogicalImmOperands<int64_t>"; + } + def LogicalImm32NotOperand : AsmOperandClass { + let Name = "LogicalImm32Not"; + let PredicateMethod = "isLogicalImm<int32_t>"; + let RenderMethod = "addLogicalImmNotOperands<int32_t>"; + } + def LogicalImm64NotOperand : AsmOperandClass { + let Name = "LogicalImm64Not"; + let PredicateMethod = "isLogicalImm<int64_t>"; + let RenderMethod = "addLogicalImmNotOperands<int64_t>"; + } +} +def logical_imm32 : Operand<i32>, IntImmLeaf<i32, [{ + return AArch64_AM::isLogicalImmediate(Imm.getZExtValue(), 32); +}], logical_imm32_XFORM> { + let PrintMethod = "printLogicalImm<int32_t>"; + let ParserMatchClass = LogicalImm32Operand; +} +def logical_imm64 : Operand<i64>, IntImmLeaf<i64, [{ + return AArch64_AM::isLogicalImmediate(Imm.getZExtValue(), 64); +}], logical_imm64_XFORM> { + let PrintMethod = "printLogicalImm<int64_t>"; + let ParserMatchClass = LogicalImm64Operand; +} +def logical_imm32_not : Operand<i32> { + let ParserMatchClass = LogicalImm32NotOperand; +} +def logical_imm64_not : Operand<i64> { + let ParserMatchClass = LogicalImm64NotOperand; +} + +// imm0_65535 predicate - True if the immediate is in the range [0,65535]. +def Imm0_65535Operand : AsmImmRange<0, 65535>; +def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{ + return ((uint32_t)Imm) < 65536; +}]> { + let ParserMatchClass = Imm0_65535Operand; + let PrintMethod = "printImmHex"; +} + +// imm0_255 predicate - True if the immediate is in the range [0,255]. +def Imm0_255Operand : AsmImmRange<0,255>; + +def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ + return ((uint32_t)Imm) < 256; +}]> { + let ParserMatchClass = Imm0_255Operand; + let PrintMethod = "printImm"; +} + +// imm0_127 predicate - True if the immediate is in the range [0,127] +def Imm0_127Operand : AsmImmRange<0, 127>; +def imm0_127 : Operand<i32>, ImmLeaf<i32, [{ + return ((uint32_t)Imm) < 128; +}]> { + let ParserMatchClass = Imm0_127Operand; + let PrintMethod = "printImm"; +} + +// NOTE: These imm0_N operands have to be of type i64 because i64 is the size +// for all shift-amounts. + +// imm0_63 predicate - True if the immediate is in the range [0,63] +def imm0_63 : Operand<i64>, ImmLeaf<i64, [{ + return ((uint64_t)Imm) < 64; +}]> { + let ParserMatchClass = Imm0_63Operand; +} + +// imm0_31 predicate - True if the immediate is in the range [0,31] +def imm0_31 : Operand<i64>, ImmLeaf<i64, [{ + return ((uint64_t)Imm) < 32; +}]> { + let ParserMatchClass = Imm0_31Operand; +} + +// True if the 32-bit immediate is in the range [0,31] +def imm32_0_31 : Operand<i32>, ImmLeaf<i32, [{ + return ((uint64_t)Imm) < 32; +}]> { + let ParserMatchClass = Imm0_31Operand; +} + +// imm0_1 predicate - True if the immediate is in the range [0,1] +def imm0_1 : Operand<i64>, ImmLeaf<i64, [{ + return ((uint64_t)Imm) < 2; +}]> { + let ParserMatchClass = Imm0_1Operand; +} + +// imm0_15 predicate - True if the immediate is in the range [0,15] +def imm0_15 : Operand<i64>, ImmLeaf<i64, [{ + return ((uint64_t)Imm) < 16; +}]> { + let ParserMatchClass = Imm0_15Operand; +} + +// imm0_7 predicate - True if the immediate is in the range [0,7] +def imm0_7 : Operand<i64>, ImmLeaf<i64, [{ + return ((uint64_t)Imm) < 8; +}]> { + let ParserMatchClass = Imm0_7Operand; +} + +// imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15] +def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{ + return ((uint32_t)Imm) < 16; +}]> { + let ParserMatchClass = Imm0_15Operand; +} + +// An arithmetic shifter operand: +// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr +// {5-0} - imm6 +class arith_shift<ValueType Ty, int width> : Operand<Ty> { + let PrintMethod = "printShifter"; + let ParserMatchClass = !cast<AsmOperandClass>( + "ArithmeticShifterOperand" # width); +} + +def arith_shift32 : arith_shift<i32, 32>; +def arith_shift64 : arith_shift<i64, 64>; + +class arith_shifted_reg<ValueType Ty, RegisterClass regclass, int width> + : Operand<Ty>, + ComplexPattern<Ty, 2, "SelectArithShiftedRegister", []> { + let PrintMethod = "printShiftedRegister"; + let MIOperandInfo = (ops regclass, !cast<Operand>("arith_shift" # width)); +} + +def arith_shifted_reg32 : arith_shifted_reg<i32, GPR32, 32>; +def arith_shifted_reg64 : arith_shifted_reg<i64, GPR64, 64>; + +// An arithmetic shifter operand: +// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr, 11 = ror +// {5-0} - imm6 +class logical_shift<int width> : Operand<i32> { + let PrintMethod = "printShifter"; + let ParserMatchClass = !cast<AsmOperandClass>( + "LogicalShifterOperand" # width); +} + +def logical_shift32 : logical_shift<32>; +def logical_shift64 : logical_shift<64>; + +class logical_shifted_reg<ValueType Ty, RegisterClass regclass, Operand shiftop> + : Operand<Ty>, + ComplexPattern<Ty, 2, "SelectLogicalShiftedRegister", []> { + let PrintMethod = "printShiftedRegister"; + let MIOperandInfo = (ops regclass, shiftop); +} + +def logical_shifted_reg32 : logical_shifted_reg<i32, GPR32, logical_shift32>; +def logical_shifted_reg64 : logical_shifted_reg<i64, GPR64, logical_shift64>; + +// A logical vector shifter operand: +// {7-6} - shift type: 00 = lsl +// {5-0} - imm6: #0, #8, #16, or #24 +def logical_vec_shift : Operand<i32> { + let PrintMethod = "printShifter"; + let EncoderMethod = "getVecShifterOpValue"; + let ParserMatchClass = LogicalVecShifterOperand; +} + +// A logical vector half-word shifter operand: +// {7-6} - shift type: 00 = lsl +// {5-0} - imm6: #0 or #8 +def logical_vec_hw_shift : Operand<i32> { + let PrintMethod = "printShifter"; + let EncoderMethod = "getVecShifterOpValue"; + let ParserMatchClass = LogicalVecHalfWordShifterOperand; +} + +// A vector move shifter operand: +// {0} - imm1: #8 or #16 +def move_vec_shift : Operand<i32> { + let PrintMethod = "printShifter"; + let EncoderMethod = "getMoveVecShifterOpValue"; + let ParserMatchClass = MoveVecShifterOperand; +} + +let DiagnosticType = "AddSubSecondSource" in { + def AddSubImmOperand : AsmOperandClass { + let Name = "AddSubImm"; + let ParserMethod = "tryParseImmWithOptionalShift"; + let RenderMethod = "addImmWithOptionalShiftOperands<12>"; + } + def AddSubImmNegOperand : AsmOperandClass { + let Name = "AddSubImmNeg"; + let ParserMethod = "tryParseImmWithOptionalShift"; + let RenderMethod = "addImmNegWithOptionalShiftOperands<12>"; + } +} +// An ADD/SUB immediate shifter operand: +// second operand: +// {7-6} - shift type: 00 = lsl +// {5-0} - imm6: #0 or #12 +class addsub_shifted_imm<ValueType Ty> + : Operand<Ty>, ComplexPattern<Ty, 2, "SelectArithImmed", [imm]> { + let PrintMethod = "printAddSubImm"; + let EncoderMethod = "getAddSubImmOpValue"; + let ParserMatchClass = AddSubImmOperand; + let MIOperandInfo = (ops i32imm, i32imm); +} + +class addsub_shifted_imm_neg<ValueType Ty> + : Operand<Ty> { + let EncoderMethod = "getAddSubImmOpValue"; + let ParserMatchClass = AddSubImmNegOperand; + let MIOperandInfo = (ops i32imm, i32imm); +} + +def addsub_shifted_imm32 : addsub_shifted_imm<i32>; +def addsub_shifted_imm64 : addsub_shifted_imm<i64>; +def addsub_shifted_imm32_neg : addsub_shifted_imm_neg<i32>; +def addsub_shifted_imm64_neg : addsub_shifted_imm_neg<i64>; + +def gi_addsub_shifted_imm32 : + GIComplexOperandMatcher<s32, "selectArithImmed">, + GIComplexPatternEquiv<addsub_shifted_imm32>; + +def gi_addsub_shifted_imm64 : + GIComplexOperandMatcher<s64, "selectArithImmed">, + GIComplexPatternEquiv<addsub_shifted_imm64>; + +class neg_addsub_shifted_imm<ValueType Ty> + : Operand<Ty>, ComplexPattern<Ty, 2, "SelectNegArithImmed", [imm]> { + let PrintMethod = "printAddSubImm"; + let EncoderMethod = "getAddSubImmOpValue"; + let ParserMatchClass = AddSubImmOperand; + let MIOperandInfo = (ops i32imm, i32imm); +} + +def neg_addsub_shifted_imm32 : neg_addsub_shifted_imm<i32>; +def neg_addsub_shifted_imm64 : neg_addsub_shifted_imm<i64>; + +// An extend operand: +// {5-3} - extend type +// {2-0} - imm3 +def arith_extend : Operand<i32> { + let PrintMethod = "printArithExtend"; + let ParserMatchClass = ExtendOperand; +} +def arith_extend64 : Operand<i32> { + let PrintMethod = "printArithExtend"; + let ParserMatchClass = ExtendOperand64; +} + +// 'extend' that's a lsl of a 64-bit register. +def arith_extendlsl64 : Operand<i32> { + let PrintMethod = "printArithExtend"; + let ParserMatchClass = ExtendOperandLSL64; +} + +class arith_extended_reg32<ValueType Ty> : Operand<Ty>, + ComplexPattern<Ty, 2, "SelectArithExtendedRegister", []> { + let PrintMethod = "printExtendedRegister"; + let MIOperandInfo = (ops GPR32, arith_extend); +} + +class arith_extended_reg32to64<ValueType Ty> : Operand<Ty>, + ComplexPattern<Ty, 2, "SelectArithExtendedRegister", []> { + let PrintMethod = "printExtendedRegister"; + let MIOperandInfo = (ops GPR32, arith_extend64); +} + +// Floating-point immediate. +def fpimm16 : Operand<f16>, + FPImmLeaf<f16, [{ + return AArch64_AM::getFP16Imm(Imm) != -1; + }], SDNodeXForm<fpimm, [{ + APFloat InVal = N->getValueAPF(); + uint32_t enc = AArch64_AM::getFP16Imm(InVal); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); + }]>> { + let ParserMatchClass = FPImmOperand; + let PrintMethod = "printFPImmOperand"; +} +def fpimm32 : Operand<f32>, + FPImmLeaf<f32, [{ + return AArch64_AM::getFP32Imm(Imm) != -1; + }], SDNodeXForm<fpimm, [{ + APFloat InVal = N->getValueAPF(); + uint32_t enc = AArch64_AM::getFP32Imm(InVal); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); + }]>> { + let ParserMatchClass = FPImmOperand; + let PrintMethod = "printFPImmOperand"; +} +def fpimm64 : Operand<f64>, + FPImmLeaf<f64, [{ + return AArch64_AM::getFP64Imm(Imm) != -1; + }], SDNodeXForm<fpimm, [{ + APFloat InVal = N->getValueAPF(); + uint32_t enc = AArch64_AM::getFP64Imm(InVal); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); + }]>> { + let ParserMatchClass = FPImmOperand; + let PrintMethod = "printFPImmOperand"; +} + +def fpimm8 : Operand<i32> { + let ParserMatchClass = FPImmOperand; + let PrintMethod = "printFPImmOperand"; +} + +def fpimm0 : FPImmLeaf<fAny, [{ + return Imm.isExactlyValue(+0.0); +}]>; + +// Vector lane operands +class AsmVectorIndex<int Min, int Max, string NamePrefix=""> : AsmOperandClass { + let Name = NamePrefix # "IndexRange" # Min # "_" # Max; + let DiagnosticType = "Invalid" # Name; + let PredicateMethod = "isVectorIndex<" # Min # ", " # Max # ">"; + let RenderMethod = "addVectorIndexOperands"; +} + +class AsmVectorIndexOpnd<AsmOperandClass mc, code pred> + : Operand<i64>, ImmLeaf<i64, pred> { + let ParserMatchClass = mc; + let PrintMethod = "printVectorIndex"; +} + +def VectorIndex1Operand : AsmVectorIndex<1, 1>; +def VectorIndexBOperand : AsmVectorIndex<0, 15>; +def VectorIndexHOperand : AsmVectorIndex<0, 7>; +def VectorIndexSOperand : AsmVectorIndex<0, 3>; +def VectorIndexDOperand : AsmVectorIndex<0, 1>; + +def VectorIndex1 : AsmVectorIndexOpnd<VectorIndex1Operand, [{ return ((uint64_t)Imm) == 1; }]>; +def VectorIndexB : AsmVectorIndexOpnd<VectorIndexBOperand, [{ return ((uint64_t)Imm) < 16; }]>; +def VectorIndexH : AsmVectorIndexOpnd<VectorIndexHOperand, [{ return ((uint64_t)Imm) < 8; }]>; +def VectorIndexS : AsmVectorIndexOpnd<VectorIndexSOperand, [{ return ((uint64_t)Imm) < 4; }]>; +def VectorIndexD : AsmVectorIndexOpnd<VectorIndexDOperand, [{ return ((uint64_t)Imm) < 2; }]>; + +def SVEVectorIndexExtDupBOperand : AsmVectorIndex<0, 63, "SVE">; +def SVEVectorIndexExtDupHOperand : AsmVectorIndex<0, 31, "SVE">; +def SVEVectorIndexExtDupSOperand : AsmVectorIndex<0, 15, "SVE">; +def SVEVectorIndexExtDupDOperand : AsmVectorIndex<0, 7, "SVE">; +def SVEVectorIndexExtDupQOperand : AsmVectorIndex<0, 3, "SVE">; + +def sve_elm_idx_extdup_b + : AsmVectorIndexOpnd<SVEVectorIndexExtDupBOperand, [{ return ((uint64_t)Imm) < 64; }]>; +def sve_elm_idx_extdup_h + : AsmVectorIndexOpnd<SVEVectorIndexExtDupHOperand, [{ return ((uint64_t)Imm) < 32; }]>; +def sve_elm_idx_extdup_s + : AsmVectorIndexOpnd<SVEVectorIndexExtDupSOperand, [{ return ((uint64_t)Imm) < 16; }]>; +def sve_elm_idx_extdup_d + : AsmVectorIndexOpnd<SVEVectorIndexExtDupDOperand, [{ return ((uint64_t)Imm) < 8; }]>; +def sve_elm_idx_extdup_q + : AsmVectorIndexOpnd<SVEVectorIndexExtDupQOperand, [{ return ((uint64_t)Imm) < 4; }]>; + +// 8-bit immediate for AdvSIMD where 64-bit values of the form: +// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh +// are encoded as the eight bit value 'abcdefgh'. +def simdimmtype10 : Operand<i32>, + FPImmLeaf<f64, [{ + return AArch64_AM::isAdvSIMDModImmType10( + Imm.bitcastToAPInt().getZExtValue()); + }], SDNodeXForm<fpimm, [{ + APFloat InVal = N->getValueAPF(); + uint32_t enc = AArch64_AM::encodeAdvSIMDModImmType10(N->getValueAPF() + .bitcastToAPInt() + .getZExtValue()); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); + }]>> { + let ParserMatchClass = SIMDImmType10Operand; + let PrintMethod = "printSIMDType10Operand"; +} + + +//--- +// System management +//--- + +// Base encoding for system instruction operands. +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in +class BaseSystemI<bit L, dag oops, dag iops, string asm, string operands, + list<dag> pattern = []> + : I<oops, iops, asm, operands, "", pattern> { + let Inst{31-22} = 0b1101010100; + let Inst{21} = L; +} + +// System instructions which do not have an Rt register. +class SimpleSystemI<bit L, dag iops, string asm, string operands, + list<dag> pattern = []> + : BaseSystemI<L, (outs), iops, asm, operands, pattern> { + let Inst{4-0} = 0b11111; +} + +// System instructions which have an Rt register. +class RtSystemI<bit L, dag oops, dag iops, string asm, string operands> + : BaseSystemI<L, oops, iops, asm, operands>, + Sched<[WriteSys]> { + bits<5> Rt; + let Inst{4-0} = Rt; +} + +// Hint instructions that take both a CRm and a 3-bit immediate. +// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot +// model patterns with sufficiently fine granularity +let mayStore = 1, mayLoad = 1, hasSideEffects = 1 in + class HintI<string mnemonic> + : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#"\t$imm", "", + [(int_aarch64_hint imm0_127:$imm)]>, + Sched<[WriteHint]> { + bits <7> imm; + let Inst{20-12} = 0b000110010; + let Inst{11-5} = imm; + } + +// System instructions taking a single literal operand which encodes into +// CRm. op2 differentiates the opcodes. +def BarrierAsmOperand : AsmOperandClass { + let Name = "Barrier"; + let ParserMethod = "tryParseBarrierOperand"; +} +def barrier_op : Operand<i32> { + let PrintMethod = "printBarrierOption"; + let ParserMatchClass = BarrierAsmOperand; +} +class CRmSystemI<Operand crmtype, bits<3> opc, string asm, + list<dag> pattern = []> + : SimpleSystemI<0, (ins crmtype:$CRm), asm, "\t$CRm", pattern>, + Sched<[WriteBarrier]> { + bits<4> CRm; + let Inst{20-12} = 0b000110011; + let Inst{11-8} = CRm; + let Inst{7-5} = opc; +} + +class SystemNoOperands<bits<3> op2, string asm, list<dag> pattern = []> + : SimpleSystemI<0, (ins), asm, "", pattern>, + Sched<[]> { + bits<4> CRm; + let CRm = 0b0011; + let Inst{31-12} = 0b11010101000000110010; + let Inst{11-8} = CRm; + let Inst{7-5} = op2; + let Inst{4-0} = 0b11111; +} + +// MRS/MSR system instructions. These have different operand classes because +// a different subset of registers can be accessed through each instruction. +def MRSSystemRegisterOperand : AsmOperandClass { + let Name = "MRSSystemRegister"; + let ParserMethod = "tryParseSysReg"; + let DiagnosticType = "MRS"; +} +// concatenation of op0, op1, CRn, CRm, op2. 16-bit immediate. +def mrs_sysreg_op : Operand<i32> { + let ParserMatchClass = MRSSystemRegisterOperand; + let DecoderMethod = "DecodeMRSSystemRegister"; + let PrintMethod = "printMRSSystemRegister"; +} + +def MSRSystemRegisterOperand : AsmOperandClass { + let Name = "MSRSystemRegister"; + let ParserMethod = "tryParseSysReg"; + let DiagnosticType = "MSR"; +} +def msr_sysreg_op : Operand<i32> { + let ParserMatchClass = MSRSystemRegisterOperand; + let DecoderMethod = "DecodeMSRSystemRegister"; + let PrintMethod = "printMSRSystemRegister"; +} + +def PSBHintOperand : AsmOperandClass { + let Name = "PSBHint"; + let ParserMethod = "tryParsePSBHint"; +} +def psbhint_op : Operand<i32> { + let ParserMatchClass = PSBHintOperand; + let PrintMethod = "printPSBHintOp"; + let MCOperandPredicate = [{ + // Check, if operand is valid, to fix exhaustive aliasing in disassembly. + // "psb" is an alias to "hint" only for certain values of CRm:Op2 fields. + if (!MCOp.isImm()) + return false; + return AArch64PSBHint::lookupPSBByEncoding(MCOp.getImm()) != nullptr; + }]; +} + +class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg), + "mrs", "\t$Rt, $systemreg"> { + bits<16> systemreg; + let Inst{20-5} = systemreg; +} + +// FIXME: Some of these def NZCV, others don't. Best way to model that? +// Explicitly modeling each of the system register as a register class +// would do it, but feels like overkill at this point. +class MSRI : RtSystemI<0, (outs), (ins msr_sysreg_op:$systemreg, GPR64:$Rt), + "msr", "\t$systemreg, $Rt"> { + bits<16> systemreg; + let Inst{20-5} = systemreg; +} + +def SystemPStateFieldWithImm0_15Operand : AsmOperandClass { + let Name = "SystemPStateFieldWithImm0_15"; + let ParserMethod = "tryParseSysReg"; +} +def pstatefield4_op : Operand<i32> { + let ParserMatchClass = SystemPStateFieldWithImm0_15Operand; + let PrintMethod = "printSystemPStateField"; +} + +let Defs = [NZCV] in +class MSRpstateImm0_15 + : SimpleSystemI<0, (ins pstatefield4_op:$pstatefield, imm0_15:$imm), + "msr", "\t$pstatefield, $imm">, + Sched<[WriteSys]> { + bits<6> pstatefield; + bits<4> imm; + let Inst{20-19} = 0b00; + let Inst{18-16} = pstatefield{5-3}; + let Inst{15-12} = 0b0100; + let Inst{11-8} = imm; + let Inst{7-5} = pstatefield{2-0}; + + let DecoderMethod = "DecodeSystemPStateInstruction"; + // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns + // Fail the decoder should attempt to decode the instruction as MSRI. + let hasCompleteDecoder = 0; +} + +def SystemPStateFieldWithImm0_1Operand : AsmOperandClass { + let Name = "SystemPStateFieldWithImm0_1"; + let ParserMethod = "tryParseSysReg"; +} +def pstatefield1_op : Operand<i32> { + let ParserMatchClass = SystemPStateFieldWithImm0_1Operand; + let PrintMethod = "printSystemPStateField"; +} + +let Defs = [NZCV] in +class MSRpstateImm0_1 + : SimpleSystemI<0, (ins pstatefield1_op:$pstatefield, imm0_1:$imm), + "msr", "\t$pstatefield, $imm">, + Sched<[WriteSys]> { + bits<6> pstatefield; + bit imm; + let Inst{20-19} = 0b00; + let Inst{18-16} = pstatefield{5-3}; + let Inst{15-9} = 0b0100000; + let Inst{8} = imm; + let Inst{7-5} = pstatefield{2-0}; + + let DecoderMethod = "DecodeSystemPStateInstruction"; + // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns + // Fail the decoder should attempt to decode the instruction as MSRI. + let hasCompleteDecoder = 0; +} + +// SYS and SYSL generic system instructions. +def SysCRAsmOperand : AsmOperandClass { + let Name = "SysCR"; + let ParserMethod = "tryParseSysCROperand"; +} + +def sys_cr_op : Operand<i32> { + let PrintMethod = "printSysCROperand"; + let ParserMatchClass = SysCRAsmOperand; +} + +class SystemXtI<bit L, string asm> + : RtSystemI<L, (outs), + (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, GPR64:$Rt), + asm, "\t$op1, $Cn, $Cm, $op2, $Rt"> { + bits<3> op1; + bits<4> Cn; + bits<4> Cm; + bits<3> op2; + let Inst{20-19} = 0b01; + let Inst{18-16} = op1; + let Inst{15-12} = Cn; + let Inst{11-8} = Cm; + let Inst{7-5} = op2; +} + +class SystemLXtI<bit L, string asm> + : RtSystemI<L, (outs), + (ins GPR64:$Rt, imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2), + asm, "\t$Rt, $op1, $Cn, $Cm, $op2"> { + bits<3> op1; + bits<4> Cn; + bits<4> Cm; + bits<3> op2; + let Inst{20-19} = 0b01; + let Inst{18-16} = op1; + let Inst{15-12} = Cn; + let Inst{11-8} = Cm; + let Inst{7-5} = op2; +} + + +// Branch (register) instructions: +// +// case opc of +// 0001 blr +// 0000 br +// 0101 dret +// 0100 eret +// 0010 ret +// otherwise UNDEFINED +class BaseBranchReg<bits<4> opc, dag oops, dag iops, string asm, + string operands, list<dag> pattern> + : I<oops, iops, asm, operands, "", pattern>, Sched<[WriteBrReg]> { + let Inst{31-25} = 0b1101011; + let Inst{24-21} = opc; + let Inst{20-16} = 0b11111; + let Inst{15-10} = 0b000000; + let Inst{4-0} = 0b00000; +} + +class BranchReg<bits<4> opc, string asm, list<dag> pattern> + : BaseBranchReg<opc, (outs), (ins GPR64:$Rn), asm, "\t$Rn", pattern> { + bits<5> Rn; + let Inst{9-5} = Rn; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 1, isReturn = 1 in +class SpecialReturn<bits<4> opc, string asm> + : BaseBranchReg<opc, (outs), (ins), asm, "", []> { + let Inst{9-5} = 0b11111; +} + +let mayLoad = 1 in +class RCPCLoad<bits<2> sz, string asm, RegisterClass RC> + : I<(outs RC:$Rt), (ins GPR64sp0:$Rn), asm, "\t$Rt, [$Rn]", "", []>, + Sched<[]> { + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = sz; + let Inst{29-10} = 0b11100010111111110000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +class AuthBase<bits<1> M, dag oops, dag iops, string asm, string operands, + list<dag> pattern> + : I<oops, iops, asm, operands, "", pattern>, Sched<[]> { + let Inst{31-25} = 0b1101011; + let Inst{20-11} = 0b1111100001; + let Inst{10} = M; + let Inst{4-0} = 0b11111; +} + +class AuthBranchTwoOperands<bits<1> op, bits<1> M, string asm> + : AuthBase<M, (outs), (ins GPR64:$Rn, GPR64sp:$Rm), asm, "\t$Rn, $Rm", []> { + bits<5> Rn; + bits<5> Rm; + let Inst{24-22} = 0b100; + let Inst{21} = op; + let Inst{9-5} = Rn; + let Inst{4-0} = Rm; +} + +class AuthOneOperand<bits<3> opc, bits<1> M, string asm> + : AuthBase<M, (outs), (ins GPR64:$Rn), asm, "\t$Rn", []> { + bits<5> Rn; + let Inst{24} = 0; + let Inst{23-21} = opc; + let Inst{9-5} = Rn; +} + +class AuthReturn<bits<3> op, bits<1> M, string asm> + : AuthBase<M, (outs), (ins), asm, "", []> { + let Inst{24} = 0; + let Inst{23-21} = op; + let Inst{9-0} = 0b1111111111; +} + +let mayLoad = 1 in +class BaseAuthLoad<bit M, bit W, dag oops, dag iops, string asm, + string operands, string cstr, Operand opr> + : I<oops, iops, asm, operands, cstr, []>, Sched<[]> { + bits<10> offset; + bits<5> Rn; + bits<5> Rt; + let Inst{31-24} = 0b11111000; + let Inst{23} = M; + let Inst{22} = offset{9}; + let Inst{21} = 1; + let Inst{20-12} = offset{8-0}; + let Inst{11} = W; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass AuthLoad<bit M, string asm, Operand opr> { + def indexed : BaseAuthLoad<M, 0, (outs GPR64:$Rt), + (ins GPR64sp:$Rn, opr:$offset), + asm, "\t$Rt, [$Rn, $offset]", "", opr>; + def writeback : BaseAuthLoad<M, 1, (outs GPR64sp:$wback, GPR64:$Rt), + (ins GPR64sp:$Rn, opr:$offset), + asm, "\t$Rt, [$Rn, $offset]!", + "$Rn = $wback,@earlyclobber $wback", opr>; + + def : InstAlias<asm # "\t$Rt, [$Rn]", + (!cast<Instruction>(NAME # "indexed") GPR64:$Rt, GPR64sp:$Rn, 0)>; +} + +//--- +// Conditional branch instruction. +//--- + +// Condition code. +// 4-bit immediate. Pretty-printed as <cc> +def ccode : Operand<i32> { + let PrintMethod = "printCondCode"; + let ParserMatchClass = CondCode; +} +def inv_ccode : Operand<i32> { + // AL and NV are invalid in the aliases which use inv_ccode + let PrintMethod = "printInverseCondCode"; + let ParserMatchClass = CondCode; + let MCOperandPredicate = [{ + return MCOp.isImm() && + MCOp.getImm() != AArch64CC::AL && + MCOp.getImm() != AArch64CC::NV; + }]; +} + +// Conditional branch target. 19-bit immediate. The low two bits of the target +// offset are implied zero and so are not part of the immediate. +def am_brcond : Operand<OtherVT> { + let EncoderMethod = "getCondBranchTargetOpValue"; + let DecoderMethod = "DecodePCRelLabel19"; + let PrintMethod = "printAlignedLabel"; + let ParserMatchClass = PCRelLabel19Operand; + let OperandType = "OPERAND_PCREL"; +} + +class BranchCond : I<(outs), (ins ccode:$cond, am_brcond:$target), + "b", ".$cond\t$target", "", + [(AArch64brcond bb:$target, imm:$cond, NZCV)]>, + Sched<[WriteBr]> { + let isBranch = 1; + let isTerminator = 1; + let Uses = [NZCV]; + + bits<4> cond; + bits<19> target; + let Inst{31-24} = 0b01010100; + let Inst{23-5} = target; + let Inst{4} = 0; + let Inst{3-0} = cond; +} + +//--- +// Compare-and-branch instructions. +//--- +class BaseCmpBranch<RegisterClass regtype, bit op, string asm, SDNode node> + : I<(outs), (ins regtype:$Rt, am_brcond:$target), + asm, "\t$Rt, $target", "", + [(node regtype:$Rt, bb:$target)]>, + Sched<[WriteBr]> { + let isBranch = 1; + let isTerminator = 1; + + bits<5> Rt; + bits<19> target; + let Inst{30-25} = 0b011010; + let Inst{24} = op; + let Inst{23-5} = target; + let Inst{4-0} = Rt; +} + +multiclass CmpBranch<bit op, string asm, SDNode node> { + def W : BaseCmpBranch<GPR32, op, asm, node> { + let Inst{31} = 0; + } + def X : BaseCmpBranch<GPR64, op, asm, node> { + let Inst{31} = 1; + } +} + +//--- +// Test-bit-and-branch instructions. +//--- +// Test-and-branch target. 14-bit sign-extended immediate. The low two bits of +// the target offset are implied zero and so are not part of the immediate. +def am_tbrcond : Operand<OtherVT> { + let EncoderMethod = "getTestBranchTargetOpValue"; + let PrintMethod = "printAlignedLabel"; + let ParserMatchClass = BranchTarget14Operand; + let OperandType = "OPERAND_PCREL"; +} + +// AsmOperand classes to emit (or not) special diagnostics +def TBZImm0_31Operand : AsmOperandClass { + let Name = "TBZImm0_31"; + let PredicateMethod = "isImmInRange<0,31>"; + let RenderMethod = "addImmOperands"; +} +def TBZImm32_63Operand : AsmOperandClass { + let Name = "Imm32_63"; + let PredicateMethod = "isImmInRange<32,63>"; + let DiagnosticType = "InvalidImm0_63"; + let RenderMethod = "addImmOperands"; +} + +class tbz_imm0_31<AsmOperandClass matcher> : Operand<i64>, ImmLeaf<i64, [{ + return (((uint32_t)Imm) < 32); +}]> { + let ParserMatchClass = matcher; +} + +def tbz_imm0_31_diag : tbz_imm0_31<Imm0_31Operand>; +def tbz_imm0_31_nodiag : tbz_imm0_31<TBZImm0_31Operand>; + +def tbz_imm32_63 : Operand<i64>, ImmLeaf<i64, [{ + return (((uint32_t)Imm) > 31) && (((uint32_t)Imm) < 64); +}]> { + let ParserMatchClass = TBZImm32_63Operand; +} + +class BaseTestBranch<RegisterClass regtype, Operand immtype, + bit op, string asm, SDNode node> + : I<(outs), (ins regtype:$Rt, immtype:$bit_off, am_tbrcond:$target), + asm, "\t$Rt, $bit_off, $target", "", + [(node regtype:$Rt, immtype:$bit_off, bb:$target)]>, + Sched<[WriteBr]> { + let isBranch = 1; + let isTerminator = 1; + + bits<5> Rt; + bits<6> bit_off; + bits<14> target; + + let Inst{30-25} = 0b011011; + let Inst{24} = op; + let Inst{23-19} = bit_off{4-0}; + let Inst{18-5} = target; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeTestAndBranch"; +} + +multiclass TestBranch<bit op, string asm, SDNode node> { + def W : BaseTestBranch<GPR32, tbz_imm0_31_diag, op, asm, node> { + let Inst{31} = 0; + } + + def X : BaseTestBranch<GPR64, tbz_imm32_63, op, asm, node> { + let Inst{31} = 1; + } + + // Alias X-reg with 0-31 imm to W-Reg. + def : InstAlias<asm # "\t$Rd, $imm, $target", + (!cast<Instruction>(NAME#"W") GPR32as64:$Rd, + tbz_imm0_31_nodiag:$imm, am_tbrcond:$target), 0>; + def : Pat<(node GPR64:$Rn, tbz_imm0_31_diag:$imm, bb:$target), + (!cast<Instruction>(NAME#"W") (EXTRACT_SUBREG GPR64:$Rn, sub_32), + tbz_imm0_31_diag:$imm, bb:$target)>; +} + +//--- +// Unconditional branch (immediate) instructions. +//--- +def am_b_target : Operand<OtherVT> { + let EncoderMethod = "getBranchTargetOpValue"; + let PrintMethod = "printAlignedLabel"; + let ParserMatchClass = BranchTarget26Operand; + let OperandType = "OPERAND_PCREL"; +} +def am_bl_target : Operand<i64> { + let EncoderMethod = "getBranchTargetOpValue"; + let PrintMethod = "printAlignedLabel"; + let ParserMatchClass = BranchTarget26Operand; + let OperandType = "OPERAND_PCREL"; +} + +class BImm<bit op, dag iops, string asm, list<dag> pattern> + : I<(outs), iops, asm, "\t$addr", "", pattern>, Sched<[WriteBr]> { + bits<26> addr; + let Inst{31} = op; + let Inst{30-26} = 0b00101; + let Inst{25-0} = addr; + + let DecoderMethod = "DecodeUnconditionalBranch"; +} + +class BranchImm<bit op, string asm, list<dag> pattern> + : BImm<op, (ins am_b_target:$addr), asm, pattern>; +class CallImm<bit op, string asm, list<dag> pattern> + : BImm<op, (ins am_bl_target:$addr), asm, pattern>; + +//--- +// Basic one-operand data processing instructions. +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseOneOperandData<bits<3> opc, RegisterClass regtype, string asm, + SDPatternOperator node> + : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "", + [(set regtype:$Rd, (node regtype:$Rn))]>, + Sched<[WriteI, ReadI]> { + bits<5> Rd; + bits<5> Rn; + + let Inst{30-13} = 0b101101011000000000; + let Inst{12-10} = opc; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +multiclass OneOperandData<bits<3> opc, string asm, + SDPatternOperator node = null_frag> { + def Wr : BaseOneOperandData<opc, GPR32, asm, node> { + let Inst{31} = 0; + } + + def Xr : BaseOneOperandData<opc, GPR64, asm, node> { + let Inst{31} = 1; + } +} + +class OneWRegData<bits<3> opc, string asm, SDPatternOperator node> + : BaseOneOperandData<opc, GPR32, asm, node> { + let Inst{31} = 0; +} + +class OneXRegData<bits<3> opc, string asm, SDPatternOperator node> + : BaseOneOperandData<opc, GPR64, asm, node> { + let Inst{31} = 1; +} + +class SignAuthOneData<bits<3> opcode_prefix, bits<2> opcode, string asm> + : I<(outs GPR64:$Rd), (ins GPR64sp:$Rn), asm, "\t$Rd, $Rn", "", + []>, + Sched<[WriteI, ReadI]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-15} = 0b11011010110000010; + let Inst{14-12} = opcode_prefix; + let Inst{11-10} = opcode; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class SignAuthZero<bits<3> opcode_prefix, bits<2> opcode, string asm> + : I<(outs GPR64:$Rd), (ins), asm, "\t$Rd", "", []>, Sched<[]> { + bits<5> Rd; + let Inst{31-15} = 0b11011010110000010; + let Inst{14-12} = opcode_prefix; + let Inst{11-10} = opcode; + let Inst{9-5} = 0b11111; + let Inst{4-0} = Rd; +} + +class SignAuthTwoOperand<bits<4> opc, string asm, + SDPatternOperator OpNode> + : I<(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64sp:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", + [(set GPR64:$Rd, (OpNode GPR64:$Rn, GPR64sp:$Rm))]>, + Sched<[WriteI, ReadI, ReadI]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-21} = 0b10011010110; + let Inst{20-16} = Rm; + let Inst{15-14} = 0b00; + let Inst{13-10} = opc; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +// Base class for the Armv8.4-A 8 and 16-bit flag manipulation instructions +class BaseFlagManipulation<bit sf, bit sz, dag iops, string asm, string ops> + : I<(outs), iops, asm, ops, "", []>, + Sched<[WriteI, ReadI, ReadI]> { + let Uses = [NZCV]; + bits<5> Rn; + let Inst{31} = sf; + let Inst{30-15} = 0b0111010000000000; + let Inst{14} = sz; + let Inst{13-10} = 0b0010; + let Inst{9-5} = Rn; + let Inst{4-0} = 0b01101; +} + +class FlagRotate<dag iops, string asm, string ops> + : BaseFlagManipulation<0b1, 0b0, iops, asm, ops> { + bits<6> imm; + bits<4> mask; + let Inst{20-15} = imm; + let Inst{13-10} = 0b0001; + let Inst{4} = 0b0; + let Inst{3-0} = mask; +} + +//--- +// Basic two-operand data processing instructions. +//--- +class BaseBaseAddSubCarry<bit isSub, RegisterClass regtype, string asm, + list<dag> pattern> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", pattern>, + Sched<[WriteI, ReadI, ReadI]> { + let Uses = [NZCV]; + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{30} = isSub; + let Inst{28-21} = 0b11010000; + let Inst{20-16} = Rm; + let Inst{15-10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class BaseAddSubCarry<bit isSub, RegisterClass regtype, string asm, + SDNode OpNode> + : BaseBaseAddSubCarry<isSub, regtype, asm, + [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm, NZCV))]>; + +class BaseAddSubCarrySetFlags<bit isSub, RegisterClass regtype, string asm, + SDNode OpNode> + : BaseBaseAddSubCarry<isSub, regtype, asm, + [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm, NZCV)), + (implicit NZCV)]> { + let Defs = [NZCV]; +} + +multiclass AddSubCarry<bit isSub, string asm, string asm_setflags, + SDNode OpNode, SDNode OpNode_setflags> { + def Wr : BaseAddSubCarry<isSub, GPR32, asm, OpNode> { + let Inst{31} = 0; + let Inst{29} = 0; + } + def Xr : BaseAddSubCarry<isSub, GPR64, asm, OpNode> { + let Inst{31} = 1; + let Inst{29} = 0; + } + + // Sets flags. + def SWr : BaseAddSubCarrySetFlags<isSub, GPR32, asm_setflags, + OpNode_setflags> { + let Inst{31} = 0; + let Inst{29} = 1; + } + def SXr : BaseAddSubCarrySetFlags<isSub, GPR64, asm_setflags, + OpNode_setflags> { + let Inst{31} = 1; + let Inst{29} = 1; + } +} + +class BaseTwoOperand<bits<4> opc, RegisterClass regtype, string asm, + SDPatternOperator OpNode> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", + [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{30-21} = 0b0011010110; + let Inst{20-16} = Rm; + let Inst{15-14} = 0b00; + let Inst{13-10} = opc; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class BaseDiv<bit isSigned, RegisterClass regtype, string asm, + SDPatternOperator OpNode> + : BaseTwoOperand<{0,0,1,?}, regtype, asm, OpNode> { + let Inst{10} = isSigned; +} + +multiclass Div<bit isSigned, string asm, SDPatternOperator OpNode> { + def Wr : BaseDiv<isSigned, GPR32, asm, OpNode>, + Sched<[WriteID32, ReadID, ReadID]> { + let Inst{31} = 0; + } + def Xr : BaseDiv<isSigned, GPR64, asm, OpNode>, + Sched<[WriteID64, ReadID, ReadID]> { + let Inst{31} = 1; + } +} + +class BaseShift<bits<2> shift_type, RegisterClass regtype, string asm, + SDPatternOperator OpNode = null_frag> + : BaseTwoOperand<{1,0,?,?}, regtype, asm, OpNode>, + Sched<[WriteIS, ReadI]> { + let Inst{11-10} = shift_type; +} + +multiclass Shift<bits<2> shift_type, string asm, SDNode OpNode> { + def Wr : BaseShift<shift_type, GPR32, asm> { + let Inst{31} = 0; + } + + def Xr : BaseShift<shift_type, GPR64, asm, OpNode> { + let Inst{31} = 1; + } + + def : Pat<(i32 (OpNode GPR32:$Rn, i64:$Rm)), + (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, + (EXTRACT_SUBREG i64:$Rm, sub_32))>; + + def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (zext GPR32:$Rm)))), + (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; + + def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (anyext GPR32:$Rm)))), + (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; + + def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (sext GPR32:$Rm)))), + (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; +} + +class ShiftAlias<string asm, Instruction inst, RegisterClass regtype> + : InstAlias<asm#"\t$dst, $src1, $src2", + (inst regtype:$dst, regtype:$src1, regtype:$src2), 0>; + +class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype, + RegisterClass addtype, string asm, + list<dag> pattern> + : I<(outs addtype:$Rd), (ins multype:$Rn, multype:$Rm, addtype:$Ra), + asm, "\t$Rd, $Rn, $Rm, $Ra", "", pattern> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<5> Ra; + let Inst{30-24} = 0b0011011; + let Inst{23-21} = opc; + let Inst{20-16} = Rm; + let Inst{15} = isSub; + let Inst{14-10} = Ra; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass MulAccum<bit isSub, string asm, SDNode AccNode> { + // MADD/MSUB generation is decided by MachineCombiner.cpp + def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm, + [/*(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))*/]>, + Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> { + let Inst{31} = 0; + } + + def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm, + [/*(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))*/]>, + Sched<[WriteIM64, ReadIM, ReadIM, ReadIMA]> { + let Inst{31} = 1; + } +} + +class WideMulAccum<bit isSub, bits<3> opc, string asm, + SDNode AccNode, SDNode ExtNode> + : BaseMulAccum<isSub, opc, GPR32, GPR64, asm, + [(set GPR64:$Rd, (AccNode GPR64:$Ra, + (mul (ExtNode GPR32:$Rn), (ExtNode GPR32:$Rm))))]>, + Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> { + let Inst{31} = 1; +} + +class MulHi<bits<3> opc, string asm, SDNode OpNode> + : I<(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", + [(set GPR64:$Rd, (OpNode GPR64:$Rn, GPR64:$Rm))]>, + Sched<[WriteIM64, ReadIM, ReadIM]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-24} = 0b10011011; + let Inst{23-21} = opc; + let Inst{20-16} = Rm; + let Inst{15} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + // The Ra field of SMULH and UMULH is unused: it should be assembled as 31 + // (i.e. all bits 1) but is ignored by the processor. + let PostEncoderMethod = "fixMulHigh"; +} + +class MulAccumWAlias<string asm, Instruction inst> + : InstAlias<asm#"\t$dst, $src1, $src2", + (inst GPR32:$dst, GPR32:$src1, GPR32:$src2, WZR)>; +class MulAccumXAlias<string asm, Instruction inst> + : InstAlias<asm#"\t$dst, $src1, $src2", + (inst GPR64:$dst, GPR64:$src1, GPR64:$src2, XZR)>; +class WideMulAccumAlias<string asm, Instruction inst> + : InstAlias<asm#"\t$dst, $src1, $src2", + (inst GPR64:$dst, GPR32:$src1, GPR32:$src2, XZR)>; + +class BaseCRC32<bit sf, bits<2> sz, bit C, RegisterClass StreamReg, + SDPatternOperator OpNode, string asm> + : I<(outs GPR32:$Rd), (ins GPR32:$Rn, StreamReg:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", + [(set GPR32:$Rd, (OpNode GPR32:$Rn, StreamReg:$Rm))]>, + Sched<[WriteISReg, ReadI, ReadISReg]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + + let Inst{31} = sf; + let Inst{30-21} = 0b0011010110; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b010; + let Inst{12} = C; + let Inst{11-10} = sz; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + let Predicates = [HasCRC]; +} + +//--- +// Address generation. +//--- + +class ADRI<bit page, string asm, Operand adr, list<dag> pattern> + : I<(outs GPR64:$Xd), (ins adr:$label), asm, "\t$Xd, $label", "", + pattern>, + Sched<[WriteI]> { + bits<5> Xd; + bits<21> label; + let Inst{31} = page; + let Inst{30-29} = label{1-0}; + let Inst{28-24} = 0b10000; + let Inst{23-5} = label{20-2}; + let Inst{4-0} = Xd; + + let DecoderMethod = "DecodeAdrInstruction"; +} + +//--- +// Move immediate. +//--- + +def movimm32_imm : Operand<i32> { + let ParserMatchClass = Imm0_65535Operand; + let EncoderMethod = "getMoveWideImmOpValue"; + let PrintMethod = "printImm"; +} +def movimm32_shift : Operand<i32> { + let PrintMethod = "printShifter"; + let ParserMatchClass = MovImm32ShifterOperand; +} +def movimm64_shift : Operand<i32> { + let PrintMethod = "printShifter"; + let ParserMatchClass = MovImm64ShifterOperand; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseMoveImmediate<bits<2> opc, RegisterClass regtype, Operand shifter, + string asm> + : I<(outs regtype:$Rd), (ins movimm32_imm:$imm, shifter:$shift), + asm, "\t$Rd, $imm$shift", "", []>, + Sched<[WriteImm]> { + bits<5> Rd; + bits<16> imm; + bits<6> shift; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100101; + let Inst{22-21} = shift{5-4}; + let Inst{20-5} = imm; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeMoveImmInstruction"; +} + +multiclass MoveImmediate<bits<2> opc, string asm> { + def Wi : BaseMoveImmediate<opc, GPR32, movimm32_shift, asm> { + let Inst{31} = 0; + } + + def Xi : BaseMoveImmediate<opc, GPR64, movimm64_shift, asm> { + let Inst{31} = 1; + } +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseInsertImmediate<bits<2> opc, RegisterClass regtype, Operand shifter, + string asm> + : I<(outs regtype:$Rd), + (ins regtype:$src, movimm32_imm:$imm, shifter:$shift), + asm, "\t$Rd, $imm$shift", "$src = $Rd", []>, + Sched<[WriteI, ReadI]> { + bits<5> Rd; + bits<16> imm; + bits<6> shift; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100101; + let Inst{22-21} = shift{5-4}; + let Inst{20-5} = imm; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeMoveImmInstruction"; +} + +multiclass InsertImmediate<bits<2> opc, string asm> { + def Wi : BaseInsertImmediate<opc, GPR32, movimm32_shift, asm> { + let Inst{31} = 0; + } + + def Xi : BaseInsertImmediate<opc, GPR64, movimm64_shift, asm> { + let Inst{31} = 1; + } +} + +//--- +// Add/Subtract +//--- + +class BaseAddSubImm<bit isSub, bit setFlags, RegisterClass dstRegtype, + RegisterClass srcRegtype, addsub_shifted_imm immtype, + string asm, SDPatternOperator OpNode> + : I<(outs dstRegtype:$Rd), (ins srcRegtype:$Rn, immtype:$imm), + asm, "\t$Rd, $Rn, $imm", "", + [(set dstRegtype:$Rd, (OpNode srcRegtype:$Rn, immtype:$imm))]>, + Sched<[WriteI, ReadI]> { + bits<5> Rd; + bits<5> Rn; + bits<14> imm; + let Inst{30} = isSub; + let Inst{29} = setFlags; + let Inst{28-24} = 0b10001; + let Inst{23-22} = imm{13-12}; // '00' => lsl #0, '01' => lsl #12 + let Inst{21-10} = imm{11-0}; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + let DecoderMethod = "DecodeBaseAddSubImm"; +} + +class BaseAddSubRegPseudo<RegisterClass regtype, + SDPatternOperator OpNode> + : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), + [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>, + Sched<[WriteI, ReadI, ReadI]>; + +class BaseAddSubSReg<bit isSub, bit setFlags, RegisterClass regtype, + arith_shifted_reg shifted_regtype, string asm, + SDPatternOperator OpNode> + : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", + [(set regtype:$Rd, (OpNode regtype:$Rn, shifted_regtype:$Rm))]>, + Sched<[WriteISReg, ReadI, ReadISReg]> { + // The operands are in order to match the 'addr' MI operands, so we + // don't need an encoder method and by-name matching. Just use the default + // in-order handling. Since we're using by-order, make sure the names + // do not match. + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<8> shift; + let Inst{30} = isSub; + let Inst{29} = setFlags; + let Inst{28-24} = 0b01011; + let Inst{23-22} = shift{7-6}; + let Inst{21} = 0; + let Inst{20-16} = src2; + let Inst{15-10} = shift{5-0}; + let Inst{9-5} = src1; + let Inst{4-0} = dst; + + let DecoderMethod = "DecodeThreeAddrSRegInstruction"; +} + +class BaseAddSubEReg<bit isSub, bit setFlags, RegisterClass dstRegtype, + RegisterClass src1Regtype, Operand src2Regtype, + string asm, SDPatternOperator OpNode> + : I<(outs dstRegtype:$R1), + (ins src1Regtype:$R2, src2Regtype:$R3), + asm, "\t$R1, $R2, $R3", "", + [(set dstRegtype:$R1, (OpNode src1Regtype:$R2, src2Regtype:$R3))]>, + Sched<[WriteIEReg, ReadI, ReadIEReg]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<6> ext; + let Inst{30} = isSub; + let Inst{29} = setFlags; + let Inst{28-24} = 0b01011; + let Inst{23-21} = 0b001; + let Inst{20-16} = Rm; + let Inst{15-13} = ext{5-3}; + let Inst{12-10} = ext{2-0}; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeAddSubERegInstruction"; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseAddSubEReg64<bit isSub, bit setFlags, RegisterClass dstRegtype, + RegisterClass src1Regtype, RegisterClass src2Regtype, + Operand ext_op, string asm> + : I<(outs dstRegtype:$Rd), + (ins src1Regtype:$Rn, src2Regtype:$Rm, ext_op:$ext), + asm, "\t$Rd, $Rn, $Rm$ext", "", []>, + Sched<[WriteIEReg, ReadI, ReadIEReg]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<6> ext; + let Inst{30} = isSub; + let Inst{29} = setFlags; + let Inst{28-24} = 0b01011; + let Inst{23-21} = 0b001; + let Inst{20-16} = Rm; + let Inst{15} = ext{5}; + let Inst{12-10} = ext{2-0}; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeAddSubERegInstruction"; +} + +// Aliases for register+register add/subtract. +class AddSubRegAlias<string asm, Instruction inst, RegisterClass dstRegtype, + RegisterClass src1Regtype, RegisterClass src2Regtype, + int shiftExt> + : InstAlias<asm#"\t$dst, $src1, $src2", + (inst dstRegtype:$dst, src1Regtype:$src1, src2Regtype:$src2, + shiftExt)>; + +multiclass AddSub<bit isSub, string mnemonic, string alias, + SDPatternOperator OpNode = null_frag> { + let hasSideEffects = 0, isReMaterializable = 1, isAsCheapAsAMove = 1 in { + // Add/Subtract immediate + // Increase the weight of the immediate variant to try to match it before + // the extended register variant. + // We used to match the register variant before the immediate when the + // register argument could be implicitly zero-extended. + let AddedComplexity = 6 in + def Wri : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32, + mnemonic, OpNode> { + let Inst{31} = 0; + } + let AddedComplexity = 6 in + def Xri : BaseAddSubImm<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64, + mnemonic, OpNode> { + let Inst{31} = 1; + } + + // Add/Subtract register - Only used for CodeGen + def Wrr : BaseAddSubRegPseudo<GPR32, OpNode>; + def Xrr : BaseAddSubRegPseudo<GPR64, OpNode>; + + // Add/Subtract shifted register + def Wrs : BaseAddSubSReg<isSub, 0, GPR32, arith_shifted_reg32, mnemonic, + OpNode> { + let Inst{31} = 0; + } + def Xrs : BaseAddSubSReg<isSub, 0, GPR64, arith_shifted_reg64, mnemonic, + OpNode> { + let Inst{31} = 1; + } + } + + // Add/Subtract extended register + let AddedComplexity = 1, hasSideEffects = 0 in { + def Wrx : BaseAddSubEReg<isSub, 0, GPR32sp, GPR32sp, + arith_extended_reg32<i32>, mnemonic, OpNode> { + let Inst{31} = 0; + } + def Xrx : BaseAddSubEReg<isSub, 0, GPR64sp, GPR64sp, + arith_extended_reg32to64<i64>, mnemonic, OpNode> { + let Inst{31} = 1; + } + } + + def Xrx64 : BaseAddSubEReg64<isSub, 0, GPR64sp, GPR64sp, GPR64, + arith_extendlsl64, mnemonic> { + // UXTX and SXTX only. + let Inst{14-13} = 0b11; + let Inst{31} = 1; + } + + // add Rd, Rb, -imm -> sub Rd, Rn, imm + def : InstSubst<alias#"\t$Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32sp:$Rn, + addsub_shifted_imm32_neg:$imm), 0>; + def : InstSubst<alias#"\t$Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64sp:$Rn, + addsub_shifted_imm64_neg:$imm), 0>; + + // Register/register aliases with no shift when SP is not used. + def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"), + GPR32, GPR32, GPR32, 0>; + def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Xrs"), + GPR64, GPR64, GPR64, 0>; + + // Register/register aliases with no shift when either the destination or + // first source register is SP. + def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrx"), + GPR32sponly, GPR32sp, GPR32, 16>; // UXTW #0 + def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrx"), + GPR32sp, GPR32sponly, GPR32, 16>; // UXTW #0 + def : AddSubRegAlias<mnemonic, + !cast<Instruction>(NAME#"Xrx64"), + GPR64sponly, GPR64sp, GPR64, 24>; // UXTX #0 + def : AddSubRegAlias<mnemonic, + !cast<Instruction>(NAME#"Xrx64"), + GPR64sp, GPR64sponly, GPR64, 24>; // UXTX #0 +} + +multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp, + string alias, string cmpAlias> { + let isCompare = 1, Defs = [NZCV] in { + // Add/Subtract immediate + def Wri : BaseAddSubImm<isSub, 1, GPR32, GPR32sp, addsub_shifted_imm32, + mnemonic, OpNode> { + let Inst{31} = 0; + } + def Xri : BaseAddSubImm<isSub, 1, GPR64, GPR64sp, addsub_shifted_imm64, + mnemonic, OpNode> { + let Inst{31} = 1; + } + + // Add/Subtract register + def Wrr : BaseAddSubRegPseudo<GPR32, OpNode>; + def Xrr : BaseAddSubRegPseudo<GPR64, OpNode>; + + // Add/Subtract shifted register + def Wrs : BaseAddSubSReg<isSub, 1, GPR32, arith_shifted_reg32, mnemonic, + OpNode> { + let Inst{31} = 0; + } + def Xrs : BaseAddSubSReg<isSub, 1, GPR64, arith_shifted_reg64, mnemonic, + OpNode> { + let Inst{31} = 1; + } + + // Add/Subtract extended register + let AddedComplexity = 1 in { + def Wrx : BaseAddSubEReg<isSub, 1, GPR32, GPR32sp, + arith_extended_reg32<i32>, mnemonic, OpNode> { + let Inst{31} = 0; + } + def Xrx : BaseAddSubEReg<isSub, 1, GPR64, GPR64sp, + arith_extended_reg32<i64>, mnemonic, OpNode> { + let Inst{31} = 1; + } + } + + def Xrx64 : BaseAddSubEReg64<isSub, 1, GPR64, GPR64sp, GPR64, + arith_extendlsl64, mnemonic> { + // UXTX and SXTX only. + let Inst{14-13} = 0b11; + let Inst{31} = 1; + } + } // Defs = [NZCV] + + // Support negative immediates, e.g. adds Rd, Rn, -imm -> subs Rd, Rn, imm + def : InstSubst<alias#"\t$Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32sp:$Rn, + addsub_shifted_imm32_neg:$imm), 0>; + def : InstSubst<alias#"\t$Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64sp:$Rn, + addsub_shifted_imm64_neg:$imm), 0>; + + // Compare aliases + def : InstAlias<cmp#"\t$src, $imm", (!cast<Instruction>(NAME#"Wri") + WZR, GPR32sp:$src, addsub_shifted_imm32:$imm), 5>; + def : InstAlias<cmp#"\t$src, $imm", (!cast<Instruction>(NAME#"Xri") + XZR, GPR64sp:$src, addsub_shifted_imm64:$imm), 5>; + def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Wrx") + WZR, GPR32sp:$src1, GPR32:$src2, arith_extend:$sh), 4>; + def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx") + XZR, GPR64sp:$src1, GPR32:$src2, arith_extend:$sh), 4>; + def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx64") + XZR, GPR64sp:$src1, GPR64:$src2, arith_extendlsl64:$sh), 4>; + def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Wrs") + WZR, GPR32:$src1, GPR32:$src2, arith_shift32:$sh), 4>; + def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Xrs") + XZR, GPR64:$src1, GPR64:$src2, arith_shift64:$sh), 4>; + + // Support negative immediates, e.g. cmp Rn, -imm -> cmn Rn, imm + def : InstSubst<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Wri") + WZR, GPR32sp:$src, addsub_shifted_imm32_neg:$imm), 0>; + def : InstSubst<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Xri") + XZR, GPR64sp:$src, addsub_shifted_imm64_neg:$imm), 0>; + + // Compare shorthands + def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Wrs") + WZR, GPR32:$src1, GPR32:$src2, 0), 5>; + def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Xrs") + XZR, GPR64:$src1, GPR64:$src2, 0), 5>; + def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Wrx") + WZR, GPR32sponly:$src1, GPR32:$src2, 16), 5>; + def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Xrx64") + XZR, GPR64sponly:$src1, GPR64:$src2, 24), 5>; + + // Register/register aliases with no shift when SP is not used. + def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"), + GPR32, GPR32, GPR32, 0>; + def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Xrs"), + GPR64, GPR64, GPR64, 0>; + + // Register/register aliases with no shift when the first source register + // is SP. + def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrx"), + GPR32, GPR32sponly, GPR32, 16>; // UXTW #0 + def : AddSubRegAlias<mnemonic, + !cast<Instruction>(NAME#"Xrx64"), + GPR64, GPR64sponly, GPR64, 24>; // UXTX #0 +} + +//--- +// Extract +//--- +def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisPtrTy<3>]>; +def AArch64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>; + +class BaseExtractImm<RegisterClass regtype, Operand imm_type, string asm, + list<dag> patterns> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, imm_type:$imm), + asm, "\t$Rd, $Rn, $Rm, $imm", "", patterns>, + Sched<[WriteExtr, ReadExtrHi]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<6> imm; + + let Inst{30-23} = 0b00100111; + let Inst{21} = 0; + let Inst{20-16} = Rm; + let Inst{15-10} = imm; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass ExtractImm<string asm> { + def Wrri : BaseExtractImm<GPR32, imm0_31, asm, + [(set GPR32:$Rd, + (AArch64Extr GPR32:$Rn, GPR32:$Rm, imm0_31:$imm))]> { + let Inst{31} = 0; + let Inst{22} = 0; + // imm<5> must be zero. + let imm{5} = 0; + } + def Xrri : BaseExtractImm<GPR64, imm0_63, asm, + [(set GPR64:$Rd, + (AArch64Extr GPR64:$Rn, GPR64:$Rm, imm0_63:$imm))]> { + + let Inst{31} = 1; + let Inst{22} = 1; + } +} + +//--- +// Bitfield +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseBitfieldImm<bits<2> opc, + RegisterClass regtype, Operand imm_type, string asm> + : I<(outs regtype:$Rd), (ins regtype:$Rn, imm_type:$immr, imm_type:$imms), + asm, "\t$Rd, $Rn, $immr, $imms", "", []>, + Sched<[WriteIS, ReadI]> { + bits<5> Rd; + bits<5> Rn; + bits<6> immr; + bits<6> imms; + + let Inst{30-29} = opc; + let Inst{28-23} = 0b100110; + let Inst{21-16} = immr; + let Inst{15-10} = imms; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass BitfieldImm<bits<2> opc, string asm> { + def Wri : BaseBitfieldImm<opc, GPR32, imm0_31, asm> { + let Inst{31} = 0; + let Inst{22} = 0; + // imms<5> and immr<5> must be zero, else ReservedValue(). + let Inst{21} = 0; + let Inst{15} = 0; + } + def Xri : BaseBitfieldImm<opc, GPR64, imm0_63, asm> { + let Inst{31} = 1; + let Inst{22} = 1; + } +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseBitfieldImmWith2RegArgs<bits<2> opc, + RegisterClass regtype, Operand imm_type, string asm> + : I<(outs regtype:$Rd), (ins regtype:$src, regtype:$Rn, imm_type:$immr, + imm_type:$imms), + asm, "\t$Rd, $Rn, $immr, $imms", "$src = $Rd", []>, + Sched<[WriteIS, ReadI]> { + bits<5> Rd; + bits<5> Rn; + bits<6> immr; + bits<6> imms; + + let Inst{30-29} = opc; + let Inst{28-23} = 0b100110; + let Inst{21-16} = immr; + let Inst{15-10} = imms; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass BitfieldImmWith2RegArgs<bits<2> opc, string asm> { + def Wri : BaseBitfieldImmWith2RegArgs<opc, GPR32, imm0_31, asm> { + let Inst{31} = 0; + let Inst{22} = 0; + // imms<5> and immr<5> must be zero, else ReservedValue(). + let Inst{21} = 0; + let Inst{15} = 0; + } + def Xri : BaseBitfieldImmWith2RegArgs<opc, GPR64, imm0_63, asm> { + let Inst{31} = 1; + let Inst{22} = 1; + } +} + +//--- +// Logical +//--- + +// Logical (immediate) +class BaseLogicalImm<bits<2> opc, RegisterClass dregtype, + RegisterClass sregtype, Operand imm_type, string asm, + list<dag> pattern> + : I<(outs dregtype:$Rd), (ins sregtype:$Rn, imm_type:$imm), + asm, "\t$Rd, $Rn, $imm", "", pattern>, + Sched<[WriteI, ReadI]> { + bits<5> Rd; + bits<5> Rn; + bits<13> imm; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100100; + let Inst{22} = imm{12}; + let Inst{21-16} = imm{11-6}; + let Inst{15-10} = imm{5-0}; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeLogicalImmInstruction"; +} + +// Logical (shifted register) +class BaseLogicalSReg<bits<2> opc, bit N, RegisterClass regtype, + logical_shifted_reg shifted_regtype, string asm, + list<dag> pattern> + : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", pattern>, + Sched<[WriteISReg, ReadI, ReadISReg]> { + // The operands are in order to match the 'addr' MI operands, so we + // don't need an encoder method and by-name matching. Just use the default + // in-order handling. Since we're using by-order, make sure the names + // do not match. + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<8> shift; + let Inst{30-29} = opc; + let Inst{28-24} = 0b01010; + let Inst{23-22} = shift{7-6}; + let Inst{21} = N; + let Inst{20-16} = src2; + let Inst{15-10} = shift{5-0}; + let Inst{9-5} = src1; + let Inst{4-0} = dst; + + let DecoderMethod = "DecodeThreeAddrSRegInstruction"; +} + +// Aliases for register+register logical instructions. +class LogicalRegAlias<string asm, Instruction inst, RegisterClass regtype> + : InstAlias<asm#"\t$dst, $src1, $src2", + (inst regtype:$dst, regtype:$src1, regtype:$src2, 0)>; + +multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode, + string Alias> { + let AddedComplexity = 6, isReMaterializable = 1, isAsCheapAsAMove = 1 in + def Wri : BaseLogicalImm<opc, GPR32sp, GPR32, logical_imm32, mnemonic, + [(set GPR32sp:$Rd, (OpNode GPR32:$Rn, + logical_imm32:$imm))]> { + let Inst{31} = 0; + let Inst{22} = 0; // 64-bit version has an additional bit of immediate. + } + let AddedComplexity = 6, isReMaterializable = 1, isAsCheapAsAMove = 1 in + def Xri : BaseLogicalImm<opc, GPR64sp, GPR64, logical_imm64, mnemonic, + [(set GPR64sp:$Rd, (OpNode GPR64:$Rn, + logical_imm64:$imm))]> { + let Inst{31} = 1; + } + + def : InstSubst<Alias # "\t$Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn, + logical_imm32_not:$imm), 0>; + def : InstSubst<Alias # "\t$Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn, + logical_imm64_not:$imm), 0>; +} + +multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode, + string Alias> { + let isCompare = 1, Defs = [NZCV] in { + def Wri : BaseLogicalImm<opc, GPR32, GPR32, logical_imm32, mnemonic, + [(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_imm32:$imm))]> { + let Inst{31} = 0; + let Inst{22} = 0; // 64-bit version has an additional bit of immediate. + } + def Xri : BaseLogicalImm<opc, GPR64, GPR64, logical_imm64, mnemonic, + [(set GPR64:$Rd, (OpNode GPR64:$Rn, logical_imm64:$imm))]> { + let Inst{31} = 1; + } + } // end Defs = [NZCV] + + def : InstSubst<Alias # "\t$Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32:$Rn, + logical_imm32_not:$imm), 0>; + def : InstSubst<Alias # "\t$Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64:$Rn, + logical_imm64_not:$imm), 0>; +} + +class BaseLogicalRegPseudo<RegisterClass regtype, SDPatternOperator OpNode> + : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), + [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>, + Sched<[WriteI, ReadI, ReadI]>; + +// Split from LogicalImm as not all instructions have both. +multiclass LogicalReg<bits<2> opc, bit N, string mnemonic, + SDPatternOperator OpNode> { + let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + def Wrr : BaseLogicalRegPseudo<GPR32, OpNode>; + def Xrr : BaseLogicalRegPseudo<GPR64, OpNode>; + } + + def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic, + [(set GPR32:$Rd, (OpNode GPR32:$Rn, + logical_shifted_reg32:$Rm))]> { + let Inst{31} = 0; + } + def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic, + [(set GPR64:$Rd, (OpNode GPR64:$Rn, + logical_shifted_reg64:$Rm))]> { + let Inst{31} = 1; + } + + def : LogicalRegAlias<mnemonic, + !cast<Instruction>(NAME#"Wrs"), GPR32>; + def : LogicalRegAlias<mnemonic, + !cast<Instruction>(NAME#"Xrs"), GPR64>; +} + +// Split from LogicalReg to allow setting NZCV Defs +multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic, + SDPatternOperator OpNode = null_frag> { + let Defs = [NZCV], mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def Wrr : BaseLogicalRegPseudo<GPR32, OpNode>; + def Xrr : BaseLogicalRegPseudo<GPR64, OpNode>; + + def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic, + [(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_shifted_reg32:$Rm))]> { + let Inst{31} = 0; + } + def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic, + [(set GPR64:$Rd, (OpNode GPR64:$Rn, logical_shifted_reg64:$Rm))]> { + let Inst{31} = 1; + } + } // Defs = [NZCV] + + def : LogicalRegAlias<mnemonic, + !cast<Instruction>(NAME#"Wrs"), GPR32>; + def : LogicalRegAlias<mnemonic, + !cast<Instruction>(NAME#"Xrs"), GPR64>; +} + +//--- +// Conditionally set flags +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseCondComparisonImm<bit op, RegisterClass regtype, ImmLeaf immtype, + string mnemonic, SDNode OpNode> + : I<(outs), (ins regtype:$Rn, immtype:$imm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $imm, $nzcv, $cond", "", + [(set NZCV, (OpNode regtype:$Rn, immtype:$imm, (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]>, + Sched<[WriteI, ReadI]> { + let Uses = [NZCV]; + let Defs = [NZCV]; + + bits<5> Rn; + bits<5> imm; + bits<4> nzcv; + bits<4> cond; + + let Inst{30} = op; + let Inst{29-21} = 0b111010010; + let Inst{20-16} = imm; + let Inst{15-12} = cond; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = nzcv; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseCondComparisonReg<bit op, RegisterClass regtype, string mnemonic, + SDNode OpNode> + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", + [(set NZCV, (OpNode regtype:$Rn, regtype:$Rm, (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]>, + Sched<[WriteI, ReadI, ReadI]> { + let Uses = [NZCV]; + let Defs = [NZCV]; + + bits<5> Rn; + bits<5> Rm; + bits<4> nzcv; + bits<4> cond; + + let Inst{30} = op; + let Inst{29-21} = 0b111010010; + let Inst{20-16} = Rm; + let Inst{15-12} = cond; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = nzcv; +} + +multiclass CondComparison<bit op, string mnemonic, SDNode OpNode> { + // immediate operand variants + def Wi : BaseCondComparisonImm<op, GPR32, imm32_0_31, mnemonic, OpNode> { + let Inst{31} = 0; + } + def Xi : BaseCondComparisonImm<op, GPR64, imm0_31, mnemonic, OpNode> { + let Inst{31} = 1; + } + // register operand variants + def Wr : BaseCondComparisonReg<op, GPR32, mnemonic, OpNode> { + let Inst{31} = 0; + } + def Xr : BaseCondComparisonReg<op, GPR64, mnemonic, OpNode> { + let Inst{31} = 1; + } +} + +//--- +// Conditional select +//--- + +class BaseCondSelect<bit op, bits<2> op2, RegisterClass regtype, string asm> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), + asm, "\t$Rd, $Rn, $Rm, $cond", "", + [(set regtype:$Rd, + (AArch64csel regtype:$Rn, regtype:$Rm, (i32 imm:$cond), NZCV))]>, + Sched<[WriteI, ReadI, ReadI]> { + let Uses = [NZCV]; + + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<4> cond; + + let Inst{30} = op; + let Inst{29-21} = 0b011010100; + let Inst{20-16} = Rm; + let Inst{15-12} = cond; + let Inst{11-10} = op2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass CondSelect<bit op, bits<2> op2, string asm> { + def Wr : BaseCondSelect<op, op2, GPR32, asm> { + let Inst{31} = 0; + } + def Xr : BaseCondSelect<op, op2, GPR64, asm> { + let Inst{31} = 1; + } +} + +class BaseCondSelectOp<bit op, bits<2> op2, RegisterClass regtype, string asm, + PatFrag frag> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), + asm, "\t$Rd, $Rn, $Rm, $cond", "", + [(set regtype:$Rd, + (AArch64csel regtype:$Rn, (frag regtype:$Rm), + (i32 imm:$cond), NZCV))]>, + Sched<[WriteI, ReadI, ReadI]> { + let Uses = [NZCV]; + + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<4> cond; + + let Inst{30} = op; + let Inst{29-21} = 0b011010100; + let Inst{20-16} = Rm; + let Inst{15-12} = cond; + let Inst{11-10} = op2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +def inv_cond_XFORM : SDNodeXForm<imm, [{ + AArch64CC::CondCode CC = static_cast<AArch64CC::CondCode>(N->getZExtValue()); + return CurDAG->getTargetConstant(AArch64CC::getInvertedCondCode(CC), SDLoc(N), + MVT::i32); +}]>; + +multiclass CondSelectOp<bit op, bits<2> op2, string asm, PatFrag frag> { + def Wr : BaseCondSelectOp<op, op2, GPR32, asm, frag> { + let Inst{31} = 0; + } + def Xr : BaseCondSelectOp<op, op2, GPR64, asm, frag> { + let Inst{31} = 1; + } + + def : Pat<(AArch64csel (frag GPR32:$Rm), GPR32:$Rn, (i32 imm:$cond), NZCV), + (!cast<Instruction>(NAME # Wr) GPR32:$Rn, GPR32:$Rm, + (inv_cond_XFORM imm:$cond))>; + + def : Pat<(AArch64csel (frag GPR64:$Rm), GPR64:$Rn, (i32 imm:$cond), NZCV), + (!cast<Instruction>(NAME # Xr) GPR64:$Rn, GPR64:$Rm, + (inv_cond_XFORM imm:$cond))>; +} + +//--- +// Special Mask Value +//--- +def maski8_or_more : Operand<i32>, + ImmLeaf<i32, [{ return (Imm & 0xff) == 0xff; }]> { +} +def maski16_or_more : Operand<i32>, + ImmLeaf<i32, [{ return (Imm & 0xffff) == 0xffff; }]> { +} + + +//--- +// Load/store +//--- + +// (unsigned immediate) +// Indexed for 8-bit registers. offset is in range [0,4095]. +def am_indexed8 : ComplexPattern<i64, 2, "SelectAddrModeIndexed8", []>; +def am_indexed16 : ComplexPattern<i64, 2, "SelectAddrModeIndexed16", []>; +def am_indexed32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed32", []>; +def am_indexed64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed64", []>; +def am_indexed128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed128", []>; + +def gi_am_indexed8 : + GIComplexOperandMatcher<s64, "selectAddrModeIndexed<8>">, + GIComplexPatternEquiv<am_indexed8>; +def gi_am_indexed16 : + GIComplexOperandMatcher<s64, "selectAddrModeIndexed<16>">, + GIComplexPatternEquiv<am_indexed16>; +def gi_am_indexed32 : + GIComplexOperandMatcher<s64, "selectAddrModeIndexed<32>">, + GIComplexPatternEquiv<am_indexed32>; +def gi_am_indexed64 : + GIComplexOperandMatcher<s64, "selectAddrModeIndexed<64>">, + GIComplexPatternEquiv<am_indexed64>; +def gi_am_indexed128 : + GIComplexOperandMatcher<s64, "selectAddrModeIndexed<128>">, + GIComplexPatternEquiv<am_indexed128>; + +class UImm12OffsetOperand<int Scale> : AsmOperandClass { + let Name = "UImm12Offset" # Scale; + let RenderMethod = "addUImm12OffsetOperands<" # Scale # ">"; + let PredicateMethod = "isUImm12Offset<" # Scale # ">"; + let DiagnosticType = "InvalidMemoryIndexed" # Scale; +} + +def UImm12OffsetScale1Operand : UImm12OffsetOperand<1>; +def UImm12OffsetScale2Operand : UImm12OffsetOperand<2>; +def UImm12OffsetScale4Operand : UImm12OffsetOperand<4>; +def UImm12OffsetScale8Operand : UImm12OffsetOperand<8>; +def UImm12OffsetScale16Operand : UImm12OffsetOperand<16>; + +class uimm12_scaled<int Scale> : Operand<i64> { + let ParserMatchClass + = !cast<AsmOperandClass>("UImm12OffsetScale" # Scale # "Operand"); + let EncoderMethod + = "getLdStUImm12OpValue<AArch64::fixup_aarch64_ldst_imm12_scale" # Scale # ">"; + let PrintMethod = "printUImm12Offset<" # Scale # ">"; +} + +def uimm12s1 : uimm12_scaled<1>; +def uimm12s2 : uimm12_scaled<2>; +def uimm12s4 : uimm12_scaled<4>; +def uimm12s8 : uimm12_scaled<8>; +def uimm12s16 : uimm12_scaled<16>; + +class BaseLoadStoreUI<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops, + string asm, list<dag> pattern> + : I<oops, iops, asm, "\t$Rt, [$Rn, $offset]", "", pattern> { + bits<5> Rt; + + bits<5> Rn; + bits<12> offset; + + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b01; + let Inst{23-22} = opc; + let Inst{21-10} = offset; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeUnsignedLdStInstruction"; +} + +multiclass LoadUI<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + Operand indextype, string asm, list<dag> pattern> { + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def ui : BaseLoadStoreUI<sz, V, opc, (outs regtype:$Rt), + (ins GPR64sp:$Rn, indextype:$offset), + asm, pattern>, + Sched<[WriteLD]>; + + def : InstAlias<asm # "\t$Rt, [$Rn]", + (!cast<Instruction>(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass StoreUI<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + Operand indextype, string asm, list<dag> pattern> { + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def ui : BaseLoadStoreUI<sz, V, opc, (outs), + (ins regtype:$Rt, GPR64sp:$Rn, indextype:$offset), + asm, pattern>, + Sched<[WriteST]>; + + def : InstAlias<asm # "\t$Rt, [$Rn]", + (!cast<Instruction>(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +// Same as StoreUI, but take a RegisterOperand. This is used by GlobalISel to +// substitute zero-registers automatically. +// +// TODO: Roll out zero-register subtitution to GPR32/GPR64 and fold this back +// into StoreUI. +multiclass StoreUIz<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + Operand indextype, string asm, list<dag> pattern> { + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def ui : BaseLoadStoreUI<sz, V, opc, (outs), + (ins regtype:$Rt, GPR64sp:$Rn, indextype:$offset), + asm, pattern>, + Sched<[WriteST]>; + + def : InstAlias<asm # "\t$Rt, [$Rn]", + (!cast<Instruction>(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +def PrefetchOperand : AsmOperandClass { + let Name = "Prefetch"; + let ParserMethod = "tryParsePrefetch"; +} +def prfop : Operand<i32> { + let PrintMethod = "printPrefetchOp"; + let ParserMatchClass = PrefetchOperand; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in +class PrefetchUI<bits<2> sz, bit V, bits<2> opc, string asm, list<dag> pat> + : BaseLoadStoreUI<sz, V, opc, + (outs), (ins prfop:$Rt, GPR64sp:$Rn, uimm12s8:$offset), + asm, pat>, + Sched<[WriteLD]>; + +//--- +// Load literal +//--- + +// Load literal address: 19-bit immediate. The low two bits of the target +// offset are implied zero and so are not part of the immediate. +def am_ldrlit : Operand<iPTR> { + let EncoderMethod = "getLoadLiteralOpValue"; + let DecoderMethod = "DecodePCRelLabel19"; + let PrintMethod = "printAlignedLabel"; + let ParserMatchClass = PCRelLabel19Operand; + let OperandType = "OPERAND_PCREL"; +} + +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +class LoadLiteral<bits<2> opc, bit V, RegisterOperand regtype, string asm> + : I<(outs regtype:$Rt), (ins am_ldrlit:$label), + asm, "\t$Rt, $label", "", []>, + Sched<[WriteLD]> { + bits<5> Rt; + bits<19> label; + let Inst{31-30} = opc; + let Inst{29-27} = 0b011; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-5} = label; + let Inst{4-0} = Rt; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in +class PrefetchLiteral<bits<2> opc, bit V, string asm, list<dag> pat> + : I<(outs), (ins prfop:$Rt, am_ldrlit:$label), + asm, "\t$Rt, $label", "", pat>, + Sched<[WriteLD]> { + bits<5> Rt; + bits<19> label; + let Inst{31-30} = opc; + let Inst{29-27} = 0b011; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-5} = label; + let Inst{4-0} = Rt; +} + +//--- +// Load/store register offset +//--- + +def ro_Xindexed8 : ComplexPattern<i64, 4, "SelectAddrModeXRO<8>", []>; +def ro_Xindexed16 : ComplexPattern<i64, 4, "SelectAddrModeXRO<16>", []>; +def ro_Xindexed32 : ComplexPattern<i64, 4, "SelectAddrModeXRO<32>", []>; +def ro_Xindexed64 : ComplexPattern<i64, 4, "SelectAddrModeXRO<64>", []>; +def ro_Xindexed128 : ComplexPattern<i64, 4, "SelectAddrModeXRO<128>", []>; + +def ro_Windexed8 : ComplexPattern<i64, 4, "SelectAddrModeWRO<8>", []>; +def ro_Windexed16 : ComplexPattern<i64, 4, "SelectAddrModeWRO<16>", []>; +def ro_Windexed32 : ComplexPattern<i64, 4, "SelectAddrModeWRO<32>", []>; +def ro_Windexed64 : ComplexPattern<i64, 4, "SelectAddrModeWRO<64>", []>; +def ro_Windexed128 : ComplexPattern<i64, 4, "SelectAddrModeWRO<128>", []>; + +class MemExtendOperand<string Reg, int Width> : AsmOperandClass { + let Name = "Mem" # Reg # "Extend" # Width; + let PredicateMethod = "isMem" # Reg # "Extend<" # Width # ">"; + let RenderMethod = "addMemExtendOperands"; + let DiagnosticType = "InvalidMemory" # Reg # "Extend" # Width; +} + +def MemWExtend8Operand : MemExtendOperand<"W", 8> { + // The address "[x0, x1, lsl #0]" actually maps to the variant which performs + // the trivial shift. + let RenderMethod = "addMemExtend8Operands"; +} +def MemWExtend16Operand : MemExtendOperand<"W", 16>; +def MemWExtend32Operand : MemExtendOperand<"W", 32>; +def MemWExtend64Operand : MemExtendOperand<"W", 64>; +def MemWExtend128Operand : MemExtendOperand<"W", 128>; + +def MemXExtend8Operand : MemExtendOperand<"X", 8> { + // The address "[x0, x1, lsl #0]" actually maps to the variant which performs + // the trivial shift. + let RenderMethod = "addMemExtend8Operands"; +} +def MemXExtend16Operand : MemExtendOperand<"X", 16>; +def MemXExtend32Operand : MemExtendOperand<"X", 32>; +def MemXExtend64Operand : MemExtendOperand<"X", 64>; +def MemXExtend128Operand : MemExtendOperand<"X", 128>; + +class ro_extend<AsmOperandClass ParserClass, string Reg, int Width> + : Operand<i32> { + let ParserMatchClass = ParserClass; + let PrintMethod = "printMemExtend<'" # Reg # "', " # Width # ">"; + let DecoderMethod = "DecodeMemExtend"; + let EncoderMethod = "getMemExtendOpValue"; + let MIOperandInfo = (ops i32imm:$signed, i32imm:$doshift); +} + +def ro_Wextend8 : ro_extend<MemWExtend8Operand, "w", 8>; +def ro_Wextend16 : ro_extend<MemWExtend16Operand, "w", 16>; +def ro_Wextend32 : ro_extend<MemWExtend32Operand, "w", 32>; +def ro_Wextend64 : ro_extend<MemWExtend64Operand, "w", 64>; +def ro_Wextend128 : ro_extend<MemWExtend128Operand, "w", 128>; + +def ro_Xextend8 : ro_extend<MemXExtend8Operand, "x", 8>; +def ro_Xextend16 : ro_extend<MemXExtend16Operand, "x", 16>; +def ro_Xextend32 : ro_extend<MemXExtend32Operand, "x", 32>; +def ro_Xextend64 : ro_extend<MemXExtend64Operand, "x", 64>; +def ro_Xextend128 : ro_extend<MemXExtend128Operand, "x", 128>; + +class ROAddrMode<ComplexPattern windex, ComplexPattern xindex, + Operand wextend, Operand xextend> { + // CodeGen-level pattern covering the entire addressing mode. + ComplexPattern Wpat = windex; + ComplexPattern Xpat = xindex; + + // Asm-level Operand covering the valid "uxtw #3" style syntax. + Operand Wext = wextend; + Operand Xext = xextend; +} + +def ro8 : ROAddrMode<ro_Windexed8, ro_Xindexed8, ro_Wextend8, ro_Xextend8>; +def ro16 : ROAddrMode<ro_Windexed16, ro_Xindexed16, ro_Wextend16, ro_Xextend16>; +def ro32 : ROAddrMode<ro_Windexed32, ro_Xindexed32, ro_Wextend32, ro_Xextend32>; +def ro64 : ROAddrMode<ro_Windexed64, ro_Xindexed64, ro_Wextend64, ro_Xextend64>; +def ro128 : ROAddrMode<ro_Windexed128, ro_Xindexed128, ro_Wextend128, + ro_Xextend128>; + +class LoadStore8RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, dag ins, dag outs, list<dag> pat> + : I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +class ROInstAlias<string asm, RegisterOperand regtype, Instruction INST> + : InstAlias<asm # "\t$Rt, [$Rn, $Rm]", + (INST regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, 0, 0)>; + +multiclass Load8RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10 in + def roW : LoadStore8RO<sz, V, opc, regtype, asm, + (outs regtype:$Rt), + (ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend), + [(set (Ty regtype:$Rt), + (loadop (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend8:$extend)))]>, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore8RO<sz, V, opc, regtype, asm, + (outs regtype:$Rt), + (ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend), + [(set (Ty regtype:$Rt), + (loadop (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend8:$extend)))]>, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>; +} + +multiclass Store8RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10 in + def roW : LoadStore8RO<sz, V, opc, regtype, asm, (outs), + (ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend), + [(storeop (Ty regtype:$Rt), + (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend8:$extend))]>, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore8RO<sz, V, opc, regtype, asm, (outs), + (ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend), + [(storeop (Ty regtype:$Rt), + (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend8:$extend))]>, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>; +} + +class LoadStore16RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, dag ins, dag outs, list<dag> pat> + : I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass Load16RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10 in + def roW : LoadStore16RO<sz, V, opc, regtype, asm, (outs regtype:$Rt), + (ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend), + [(set (Ty regtype:$Rt), + (loadop (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend)))]>, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore16RO<sz, V, opc, regtype, asm, (outs regtype:$Rt), + (ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend), + [(set (Ty regtype:$Rt), + (loadop (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)))]>, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>; +} + +multiclass Store16RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10 in + def roW : LoadStore16RO<sz, V, opc, regtype, asm, (outs), + (ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend), + [(storeop (Ty regtype:$Rt), + (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend))]>, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore16RO<sz, V, opc, regtype, asm, (outs), + (ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend), + [(storeop (Ty regtype:$Rt), + (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend))]>, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>; +} + +class LoadStore32RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, dag ins, dag outs, list<dag> pat> + : I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass Load32RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10 in + def roW : LoadStore32RO<sz, V, opc, regtype, asm, (outs regtype:$Rt), + (ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend), + [(set (Ty regtype:$Rt), + (loadop (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend32:$extend)))]>, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore32RO<sz, V, opc, regtype, asm, (outs regtype:$Rt), + (ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend), + [(set (Ty regtype:$Rt), + (loadop (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend32:$extend)))]>, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>; +} + +multiclass Store32RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10 in + def roW : LoadStore32RO<sz, V, opc, regtype, asm, (outs), + (ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend), + [(storeop (Ty regtype:$Rt), + (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend32:$extend))]>, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore32RO<sz, V, opc, regtype, asm, (outs), + (ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend), + [(storeop (Ty regtype:$Rt), + (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend32:$extend))]>, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>; +} + +class LoadStore64RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, dag ins, dag outs, list<dag> pat> + : I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass Load64RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def roW : LoadStore64RO<sz, V, opc, regtype, asm, (outs regtype:$Rt), + (ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend), + [(set (Ty regtype:$Rt), + (loadop (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend64:$extend)))]>, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def roX : LoadStore64RO<sz, V, opc, regtype, asm, (outs regtype:$Rt), + (ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend), + [(set (Ty regtype:$Rt), + (loadop (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend64:$extend)))]>, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>; +} + +multiclass Store64RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def roW : LoadStore64RO<sz, V, opc, regtype, asm, (outs), + (ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend), + [(storeop (Ty regtype:$Rt), + (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend64:$extend))]>, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def roX : LoadStore64RO<sz, V, opc, regtype, asm, (outs), + (ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend), + [(storeop (Ty regtype:$Rt), + (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend64:$extend))]>, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>; +} + +class LoadStore128RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, dag ins, dag outs, list<dag> pat> + : I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass Load128RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def roW : LoadStore128RO<sz, V, opc, regtype, asm, (outs regtype:$Rt), + (ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend), + [(set (Ty regtype:$Rt), + (loadop (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend128:$extend)))]>, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def roX : LoadStore128RO<sz, V, opc, regtype, asm, (outs regtype:$Rt), + (ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend128:$extend), + [(set (Ty regtype:$Rt), + (loadop (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend128:$extend)))]>, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>; +} + +multiclass Store128RO<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def roW : LoadStore128RO<sz, V, opc, regtype, asm, (outs), + (ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend), + []>, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def roX : LoadStore128RO<sz, V, opc, regtype, asm, (outs), + (ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend128:$extend), + []>, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in +class BasePrefetchRO<bits<2> sz, bit V, bits<2> opc, dag outs, dag ins, + string asm, list<dag> pat> + : I<outs, ins, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat>, + Sched<[WriteLD]> { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass PrefetchRO<bits<2> sz, bit V, bits<2> opc, string asm> { + def roW : BasePrefetchRO<sz, V, opc, (outs), + (ins prfop:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend), + asm, [(AArch64Prefetch imm:$Rt, + (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend64:$extend))]> { + let Inst{13} = 0b0; + } + + def roX : BasePrefetchRO<sz, V, opc, (outs), + (ins prfop:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend), + asm, [(AArch64Prefetch imm:$Rt, + (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend64:$extend))]> { + let Inst{13} = 0b1; + } + + def : InstAlias<"prfm $Rt, [$Rn, $Rm]", + (!cast<Instruction>(NAME # "roX") prfop:$Rt, + GPR64sp:$Rn, GPR64:$Rm, 0, 0)>; +} + +//--- +// Load/store unscaled immediate +//--- + +def am_unscaled8 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled8", []>; +def am_unscaled16 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled16", []>; +def am_unscaled32 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled32", []>; +def am_unscaled64 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled64", []>; +def am_unscaled128 :ComplexPattern<i64, 2, "SelectAddrModeUnscaled128", []>; + +def gi_am_unscaled8 : + GIComplexOperandMatcher<s64, "selectAddrModeUnscaled8">, + GIComplexPatternEquiv<am_unscaled8>; +def gi_am_unscaled16 : + GIComplexOperandMatcher<s64, "selectAddrModeUnscaled16">, + GIComplexPatternEquiv<am_unscaled16>; +def gi_am_unscaled32 : + GIComplexOperandMatcher<s64, "selectAddrModeUnscaled32">, + GIComplexPatternEquiv<am_unscaled32>; +def gi_am_unscaled64 : + GIComplexOperandMatcher<s64, "selectAddrModeUnscaled64">, + GIComplexPatternEquiv<am_unscaled64>; +def gi_am_unscaled128 : + GIComplexOperandMatcher<s64, "selectAddrModeUnscaled128">, + GIComplexPatternEquiv<am_unscaled128>; + + +class BaseLoadStoreUnscale<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops, + string asm, list<dag> pattern> + : I<oops, iops, asm, "\t$Rt, [$Rn, $offset]", "", pattern> { + bits<5> Rt; + bits<5> Rn; + bits<9> offset; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0; + let Inst{20-12} = offset; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeSignedLdStInstruction"; +} + +// Armv8.4 LDAPR & STLR with Immediate Offset instruction +multiclass BaseLoadUnscaleV84<string asm, bits<2> sz, bits<2> opc, + RegisterOperand regtype > { + def i : BaseLoadStoreUnscale<sz, 0, opc, (outs regtype:$Rt), + (ins GPR64sp:$Rn, simm9:$offset), asm, []>, + Sched<[WriteST]> { + let Inst{29} = 0; + let Inst{24} = 1; + } + def : InstAlias<asm # "\t$Rt, [$Rn]", + (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass BaseStoreUnscaleV84<string asm, bits<2> sz, bits<2> opc, + RegisterOperand regtype > { + def i : BaseLoadStoreUnscale<sz, 0, opc, (outs), + (ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset), + asm, []>, + Sched<[WriteST]> { + let Inst{29} = 0; + let Inst{24} = 1; + } + def : InstAlias<asm # "\t$Rt, [$Rn]", + (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass LoadUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, list<dag> pattern> { + let AddedComplexity = 1 in // try this before LoadUI + def i : BaseLoadStoreUnscale<sz, V, opc, (outs regtype:$Rt), + (ins GPR64sp:$Rn, simm9:$offset), asm, pattern>, + Sched<[WriteLD]>; + + def : InstAlias<asm # "\t$Rt, [$Rn]", + (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass StoreUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, list<dag> pattern> { + let AddedComplexity = 1 in // try this before StoreUI + def i : BaseLoadStoreUnscale<sz, V, opc, (outs), + (ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset), + asm, pattern>, + Sched<[WriteST]>; + + def : InstAlias<asm # "\t$Rt, [$Rn]", + (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass PrefetchUnscaled<bits<2> sz, bit V, bits<2> opc, string asm, + list<dag> pat> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in + def i : BaseLoadStoreUnscale<sz, V, opc, (outs), + (ins prfop:$Rt, GPR64sp:$Rn, simm9:$offset), + asm, pat>, + Sched<[WriteLD]>; + + def : InstAlias<asm # "\t$Rt, [$Rn]", + (!cast<Instruction>(NAME # "i") prfop:$Rt, GPR64sp:$Rn, 0)>; +} + +//--- +// Load/store unscaled immediate, unprivileged +//--- + +class BaseLoadStoreUnprivileged<bits<2> sz, bit V, bits<2> opc, + dag oops, dag iops, string asm> + : I<oops, iops, asm, "\t$Rt, [$Rn, $offset]", "", []> { + bits<5> Rt; + bits<5> Rn; + bits<9> offset; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0; + let Inst{20-12} = offset; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeSignedLdStInstruction"; +} + +multiclass LoadUnprivileged<bits<2> sz, bit V, bits<2> opc, + RegisterClass regtype, string asm> { + let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in + def i : BaseLoadStoreUnprivileged<sz, V, opc, (outs regtype:$Rt), + (ins GPR64sp:$Rn, simm9:$offset), asm>, + Sched<[WriteLD]>; + + def : InstAlias<asm # "\t$Rt, [$Rn]", + (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass StoreUnprivileged<bits<2> sz, bit V, bits<2> opc, + RegisterClass regtype, string asm> { + let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in + def i : BaseLoadStoreUnprivileged<sz, V, opc, (outs), + (ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset), + asm>, + Sched<[WriteST]>; + + def : InstAlias<asm # "\t$Rt, [$Rn]", + (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +//--- +// Load/store pre-indexed +//--- + +class BaseLoadStorePreIdx<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops, + string asm, string cstr, list<dag> pat> + : I<oops, iops, asm, "\t$Rt, [$Rn, $offset]!", cstr, pat> { + bits<5> Rt; + bits<5> Rn; + bits<9> offset; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0; + let Inst{23-22} = opc; + let Inst{21} = 0; + let Inst{20-12} = offset; + let Inst{11-10} = 0b11; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeSignedLdStInstruction"; +} + +let hasSideEffects = 0 in { +let mayStore = 0, mayLoad = 1 in +class LoadPreIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm> + : BaseLoadStorePreIdx<sz, V, opc, + (outs GPR64sp:$wback, regtype:$Rt), + (ins GPR64sp:$Rn, simm9:$offset), asm, + "$Rn = $wback,@earlyclobber $wback", []>, + Sched<[WriteLD, WriteAdr]>; + +let mayStore = 1, mayLoad = 0 in +class StorePreIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, SDPatternOperator storeop, ValueType Ty> + : BaseLoadStorePreIdx<sz, V, opc, + (outs GPR64sp:$wback), + (ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset), + asm, "$Rn = $wback,@earlyclobber $wback", + [(set GPR64sp:$wback, + (storeop (Ty regtype:$Rt), GPR64sp:$Rn, simm9:$offset))]>, + Sched<[WriteAdr, WriteST]>; +} // hasSideEffects = 0 + +//--- +// Load/store post-indexed +//--- + +class BaseLoadStorePostIdx<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops, + string asm, string cstr, list<dag> pat> + : I<oops, iops, asm, "\t$Rt, [$Rn], $offset", cstr, pat> { + bits<5> Rt; + bits<5> Rn; + bits<9> offset; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = offset; + let Inst{11-10} = 0b01; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeSignedLdStInstruction"; +} + +let hasSideEffects = 0 in { +let mayStore = 0, mayLoad = 1 in +class LoadPostIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm> + : BaseLoadStorePostIdx<sz, V, opc, + (outs GPR64sp:$wback, regtype:$Rt), + (ins GPR64sp:$Rn, simm9:$offset), + asm, "$Rn = $wback,@earlyclobber $wback", []>, + Sched<[WriteLD, WriteAdr]>; + +let mayStore = 1, mayLoad = 0 in +class StorePostIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype, + string asm, SDPatternOperator storeop, ValueType Ty> + : BaseLoadStorePostIdx<sz, V, opc, + (outs GPR64sp:$wback), + (ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset), + asm, "$Rn = $wback,@earlyclobber $wback", + [(set GPR64sp:$wback, + (storeop (Ty regtype:$Rt), GPR64sp:$Rn, simm9:$offset))]>, + Sched<[WriteAdr, WriteST]>; +} // hasSideEffects = 0 + + +//--- +// Load/store pair +//--- + +// (indexed, offset) + +class BaseLoadStorePairOffset<bits<2> opc, bit V, bit L, dag oops, dag iops, + string asm> + : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, $offset]", "", []> { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + bits<7> offset; + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = V; + let Inst{25-23} = 0b010; + let Inst{22} = L; + let Inst{21-15} = offset; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodePairLdStInstruction"; +} + +multiclass LoadPairOffset<bits<2> opc, bit V, RegisterOperand regtype, + Operand indextype, string asm> { + let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in + def i : BaseLoadStorePairOffset<opc, V, 1, + (outs regtype:$Rt, regtype:$Rt2), + (ins GPR64sp:$Rn, indextype:$offset), asm>, + Sched<[WriteLD, WriteLDHi]>; + + def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]", + (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, 0)>; +} + + +multiclass StorePairOffset<bits<2> opc, bit V, RegisterOperand regtype, + Operand indextype, string asm> { + let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in + def i : BaseLoadStorePairOffset<opc, V, 0, (outs), + (ins regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, indextype:$offset), + asm>, + Sched<[WriteSTP]>; + + def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]", + (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, 0)>; +} + +// (pre-indexed) +class BaseLoadStorePairPreIdx<bits<2> opc, bit V, bit L, dag oops, dag iops, + string asm> + : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, $offset]!", "$Rn = $wback,@earlyclobber $wback", []> { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + bits<7> offset; + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = V; + let Inst{25-23} = 0b011; + let Inst{22} = L; + let Inst{21-15} = offset; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodePairLdStInstruction"; +} + +let hasSideEffects = 0 in { +let mayStore = 0, mayLoad = 1 in +class LoadPairPreIdx<bits<2> opc, bit V, RegisterOperand regtype, + Operand indextype, string asm> + : BaseLoadStorePairPreIdx<opc, V, 1, + (outs GPR64sp:$wback, regtype:$Rt, regtype:$Rt2), + (ins GPR64sp:$Rn, indextype:$offset), asm>, + Sched<[WriteLD, WriteLDHi, WriteAdr]>; + +let mayStore = 1, mayLoad = 0 in +class StorePairPreIdx<bits<2> opc, bit V, RegisterOperand regtype, + Operand indextype, string asm> + : BaseLoadStorePairPreIdx<opc, V, 0, (outs GPR64sp:$wback), + (ins regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, indextype:$offset), + asm>, + Sched<[WriteAdr, WriteSTP]>; +} // hasSideEffects = 0 + +// (post-indexed) + +class BaseLoadStorePairPostIdx<bits<2> opc, bit V, bit L, dag oops, dag iops, + string asm> + : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn], $offset", "$Rn = $wback,@earlyclobber $wback", []> { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + bits<7> offset; + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = V; + let Inst{25-23} = 0b001; + let Inst{22} = L; + let Inst{21-15} = offset; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodePairLdStInstruction"; +} + +let hasSideEffects = 0 in { +let mayStore = 0, mayLoad = 1 in +class LoadPairPostIdx<bits<2> opc, bit V, RegisterOperand regtype, + Operand idxtype, string asm> + : BaseLoadStorePairPostIdx<opc, V, 1, + (outs GPR64sp:$wback, regtype:$Rt, regtype:$Rt2), + (ins GPR64sp:$Rn, idxtype:$offset), asm>, + Sched<[WriteLD, WriteLDHi, WriteAdr]>; + +let mayStore = 1, mayLoad = 0 in +class StorePairPostIdx<bits<2> opc, bit V, RegisterOperand regtype, + Operand idxtype, string asm> + : BaseLoadStorePairPostIdx<opc, V, 0, (outs GPR64sp:$wback), + (ins regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, idxtype:$offset), + asm>, + Sched<[WriteAdr, WriteSTP]>; +} // hasSideEffects = 0 + +// (no-allocate) + +class BaseLoadStorePairNoAlloc<bits<2> opc, bit V, bit L, dag oops, dag iops, + string asm> + : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, $offset]", "", []> { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + bits<7> offset; + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = V; + let Inst{25-23} = 0b000; + let Inst{22} = L; + let Inst{21-15} = offset; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodePairLdStInstruction"; +} + +multiclass LoadPairNoAlloc<bits<2> opc, bit V, RegisterClass regtype, + Operand indextype, string asm> { + let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in + def i : BaseLoadStorePairNoAlloc<opc, V, 1, + (outs regtype:$Rt, regtype:$Rt2), + (ins GPR64sp:$Rn, indextype:$offset), asm>, + Sched<[WriteLD, WriteLDHi]>; + + + def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]", + (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, 0)>; +} + +multiclass StorePairNoAlloc<bits<2> opc, bit V, RegisterClass regtype, + Operand indextype, string asm> { + let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in + def i : BaseLoadStorePairNoAlloc<opc, V, 0, (outs), + (ins regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, indextype:$offset), + asm>, + Sched<[WriteSTP]>; + + def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]", + (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, 0)>; +} + +//--- +// Load/store exclusive +//--- + +// True exclusive operations write to and/or read from the system's exclusive +// monitors, which as far as a compiler is concerned can be modelled as a +// random shared memory address. Hence LoadExclusive mayStore. +// +// Since these instructions have the undefined register bits set to 1 in +// their canonical form, we need a post encoder method to set those bits +// to 1 when encoding these instructions. We do this using the +// fixLoadStoreExclusive function. This function has template parameters: +// +// fixLoadStoreExclusive<int hasRs, int hasRt2> +// +// hasRs indicates that the instruction uses the Rs field, so we won't set +// it to 1 (and the same for Rt2). We don't need template parameters for +// the other register fields since Rt and Rn are always used. +// +let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in +class BaseLoadStoreExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0, + dag oops, dag iops, string asm, string operands> + : I<oops, iops, asm, operands, "", []> { + let Inst{31-30} = sz; + let Inst{29-24} = 0b001000; + let Inst{23} = o2; + let Inst{22} = L; + let Inst{21} = o1; + let Inst{15} = o0; + + let DecoderMethod = "DecodeExclusiveLdStInstruction"; +} + +// Neither Rs nor Rt2 operands. +class LoadStoreExclusiveSimple<bits<2> sz, bit o2, bit L, bit o1, bit o0, + dag oops, dag iops, string asm, string operands> + : BaseLoadStoreExclusive<sz, o2, L, o1, o0, oops, iops, asm, operands> { + bits<5> Rt; + bits<5> Rn; + let Inst{20-16} = 0b11111; + let Unpredictable{20-16} = 0b11111; + let Inst{14-10} = 0b11111; + let Unpredictable{14-10} = 0b11111; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; +} + +// Simple load acquires don't set the exclusive monitor +let mayLoad = 1, mayStore = 0 in +class LoadAcquire<bits<2> sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs regtype:$Rt), + (ins GPR64sp0:$Rn), asm, "\t$Rt, [$Rn]">, + Sched<[WriteLD]>; + +class LoadExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs regtype:$Rt), + (ins GPR64sp0:$Rn), asm, "\t$Rt, [$Rn]">, + Sched<[WriteLD]>; + +class LoadExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : BaseLoadStoreExclusive<sz, o2, L, o1, o0, + (outs regtype:$Rt, regtype:$Rt2), + (ins GPR64sp0:$Rn), asm, + "\t$Rt, $Rt2, [$Rn]">, + Sched<[WriteLD, WriteLDHi]> { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let PostEncoderMethod = "fixLoadStoreExclusive<0,1>"; +} + +// Simple store release operations do not check the exclusive monitor. +let mayLoad = 0, mayStore = 1 in +class StoreRelease<bits<2> sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs), + (ins regtype:$Rt, GPR64sp0:$Rn), + asm, "\t$Rt, [$Rn]">, + Sched<[WriteST]>; + +let mayLoad = 1, mayStore = 1 in +class StoreExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : BaseLoadStoreExclusive<sz, o2, L, o1, o0, (outs GPR32:$Ws), + (ins regtype:$Rt, GPR64sp0:$Rn), + asm, "\t$Ws, $Rt, [$Rn]">, + Sched<[WriteSTX]> { + bits<5> Ws; + bits<5> Rt; + bits<5> Rn; + let Inst{20-16} = Ws; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let Constraints = "@earlyclobber $Ws"; + let PostEncoderMethod = "fixLoadStoreExclusive<1,0>"; +} + +class StoreExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : BaseLoadStoreExclusive<sz, o2, L, o1, o0, + (outs GPR32:$Ws), + (ins regtype:$Rt, regtype:$Rt2, GPR64sp0:$Rn), + asm, "\t$Ws, $Rt, $Rt2, [$Rn]">, + Sched<[WriteSTX]> { + bits<5> Ws; + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + let Inst{20-16} = Ws; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let Constraints = "@earlyclobber $Ws"; +} + +//--- +// Exception generation +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in +class ExceptionGeneration<bits<3> op1, bits<2> ll, string asm> + : I<(outs), (ins imm0_65535:$imm), asm, "\t$imm", "", []>, + Sched<[WriteSys]> { + bits<16> imm; + let Inst{31-24} = 0b11010100; + let Inst{23-21} = op1; + let Inst{20-5} = imm; + let Inst{4-2} = 0b000; + let Inst{1-0} = ll; +} + +let Predicates = [HasFPARMv8] in { + +//--- +// Floating point to integer conversion +//--- + +class BaseFPToIntegerUnscaled<bits<2> type, bits<2> rmode, bits<3> opcode, + RegisterClass srcType, RegisterClass dstType, + string asm, list<dag> pattern> + : I<(outs dstType:$Rd), (ins srcType:$Rn), + asm, "\t$Rd, $Rn", "", pattern>, + Sched<[WriteFCvt]> { + bits<5> Rd; + bits<5> Rn; + let Inst{30-29} = 0b00; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 1; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseFPToInteger<bits<2> type, bits<2> rmode, bits<3> opcode, + RegisterClass srcType, RegisterClass dstType, + Operand immType, string asm, list<dag> pattern> + : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale), + asm, "\t$Rd, $Rn, $scale", "", pattern>, + Sched<[WriteFCvt]> { + bits<5> Rd; + bits<5> Rn; + bits<6> scale; + let Inst{30-29} = 0b00; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = scale; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm, + SDPatternOperator OpN> { + // Unscaled half-precision to 32-bit + def UWHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR32, asm, + [(set GPR32:$Rd, (OpN FPR16:$Rn))]> { + let Inst{31} = 0; // 32-bit GPR flag + let Predicates = [HasFullFP16]; + } + + // Unscaled half-precision to 64-bit + def UXHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR64, asm, + [(set GPR64:$Rd, (OpN FPR16:$Rn))]> { + let Inst{31} = 1; // 64-bit GPR flag + let Predicates = [HasFullFP16]; + } + + // Unscaled single-precision to 32-bit + def UWSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR32, asm, + [(set GPR32:$Rd, (OpN FPR32:$Rn))]> { + let Inst{31} = 0; // 32-bit GPR flag + } + + // Unscaled single-precision to 64-bit + def UXSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR64, asm, + [(set GPR64:$Rd, (OpN FPR32:$Rn))]> { + let Inst{31} = 1; // 64-bit GPR flag + } + + // Unscaled double-precision to 32-bit + def UWDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR32, asm, + [(set GPR32:$Rd, (OpN (f64 FPR64:$Rn)))]> { + let Inst{31} = 0; // 32-bit GPR flag + } + + // Unscaled double-precision to 64-bit + def UXDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR64, asm, + [(set GPR64:$Rd, (OpN (f64 FPR64:$Rn)))]> { + let Inst{31} = 1; // 64-bit GPR flag + } +} + +multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm, + SDPatternOperator OpN> { + // Scaled half-precision to 32-bit + def SWHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR32, + fixedpoint_f16_i32, asm, + [(set GPR32:$Rd, (OpN (fmul FPR16:$Rn, + fixedpoint_f16_i32:$scale)))]> { + let Inst{31} = 0; // 32-bit GPR flag + let scale{5} = 1; + let Predicates = [HasFullFP16]; + } + + // Scaled half-precision to 64-bit + def SXHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR64, + fixedpoint_f16_i64, asm, + [(set GPR64:$Rd, (OpN (fmul FPR16:$Rn, + fixedpoint_f16_i64:$scale)))]> { + let Inst{31} = 1; // 64-bit GPR flag + let Predicates = [HasFullFP16]; + } + + // Scaled single-precision to 32-bit + def SWSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR32, + fixedpoint_f32_i32, asm, + [(set GPR32:$Rd, (OpN (fmul FPR32:$Rn, + fixedpoint_f32_i32:$scale)))]> { + let Inst{31} = 0; // 32-bit GPR flag + let scale{5} = 1; + } + + // Scaled single-precision to 64-bit + def SXSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR64, + fixedpoint_f32_i64, asm, + [(set GPR64:$Rd, (OpN (fmul FPR32:$Rn, + fixedpoint_f32_i64:$scale)))]> { + let Inst{31} = 1; // 64-bit GPR flag + } + + // Scaled double-precision to 32-bit + def SWDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR32, + fixedpoint_f64_i32, asm, + [(set GPR32:$Rd, (OpN (fmul FPR64:$Rn, + fixedpoint_f64_i32:$scale)))]> { + let Inst{31} = 0; // 32-bit GPR flag + let scale{5} = 1; + } + + // Scaled double-precision to 64-bit + def SXDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR64, + fixedpoint_f64_i64, asm, + [(set GPR64:$Rd, (OpN (fmul FPR64:$Rn, + fixedpoint_f64_i64:$scale)))]> { + let Inst{31} = 1; // 64-bit GPR flag + } +} + +//--- +// Integer to floating point conversion +//--- + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseIntegerToFP<bit isUnsigned, + RegisterClass srcType, RegisterClass dstType, + Operand immType, string asm, list<dag> pattern> + : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale), + asm, "\t$Rd, $Rn, $scale", "", pattern>, + Sched<[WriteFCvt]> { + bits<5> Rd; + bits<5> Rn; + bits<6> scale; + let Inst{30-24} = 0b0011110; + let Inst{21-17} = 0b00001; + let Inst{16} = isUnsigned; + let Inst{15-10} = scale; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class BaseIntegerToFPUnscaled<bit isUnsigned, + RegisterClass srcType, RegisterClass dstType, + ValueType dvt, string asm, SDNode node> + : I<(outs dstType:$Rd), (ins srcType:$Rn), + asm, "\t$Rd, $Rn", "", [(set (dvt dstType:$Rd), (node srcType:$Rn))]>, + Sched<[WriteFCvt]> { + bits<5> Rd; + bits<5> Rn; + bits<6> scale; + let Inst{30-24} = 0b0011110; + let Inst{21-17} = 0b10001; + let Inst{16} = isUnsigned; + let Inst{15-10} = 0b000000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> { + // Unscaled + def UWHri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR16, f16, asm, node> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def UWSri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR32, f32, asm, node> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag + } + + def UWDri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR64, f64, asm, node> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag + } + + def UXHri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR16, f16, asm, node> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def UXSri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR32, f32, asm, node> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag + } + + def UXDri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR64, f64, asm, node> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag + } + + // Scaled + def SWHri: BaseIntegerToFP<isUnsigned, GPR32, FPR16, fixedpoint_f16_i32, asm, + [(set FPR16:$Rd, + (fdiv (node GPR32:$Rn), + fixedpoint_f16_i32:$scale))]> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let scale{5} = 1; + let Predicates = [HasFullFP16]; + } + + def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint_f32_i32, asm, + [(set FPR32:$Rd, + (fdiv (node GPR32:$Rn), + fixedpoint_f32_i32:$scale))]> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag + let scale{5} = 1; + } + + def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint_f64_i32, asm, + [(set FPR64:$Rd, + (fdiv (node GPR32:$Rn), + fixedpoint_f64_i32:$scale))]> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag + let scale{5} = 1; + } + + def SXHri: BaseIntegerToFP<isUnsigned, GPR64, FPR16, fixedpoint_f16_i64, asm, + [(set FPR16:$Rd, + (fdiv (node GPR64:$Rn), + fixedpoint_f16_i64:$scale))]> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint_f32_i64, asm, + [(set FPR32:$Rd, + (fdiv (node GPR64:$Rn), + fixedpoint_f32_i64:$scale))]> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag + } + + def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint_f64_i64, asm, + [(set FPR64:$Rd, + (fdiv (node GPR64:$Rn), + fixedpoint_f64_i64:$scale))]> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag + } +} + +//--- +// Unscaled integer <-> floating point conversion (i.e. FMOV) +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseUnscaledConversion<bits<2> rmode, bits<3> opcode, + RegisterClass srcType, RegisterClass dstType, + string asm> + : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", + // We use COPY_TO_REGCLASS for these bitconvert operations. + // copyPhysReg() expands the resultant COPY instructions after + // regalloc is done. This gives greater freedom for the allocator + // and related passes (coalescing, copy propagation, et. al.) to + // be more effective. + [/*(set (dvt dstType:$Rd), (bitconvert (svt srcType:$Rn)))*/]>, + Sched<[WriteFCopy]> { + bits<5> Rd; + bits<5> Rn; + let Inst{30-24} = 0b0011110; + let Inst{21} = 1; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = 0b000000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseUnscaledConversionToHigh<bits<2> rmode, bits<3> opcode, + RegisterClass srcType, RegisterOperand dstType, string asm, + string kind> + : I<(outs dstType:$Rd), (ins srcType:$Rn, VectorIndex1:$idx), asm, + "{\t$Rd"#kind#"$idx, $Rn|"#kind#"\t$Rd$idx, $Rn}", "", []>, + Sched<[WriteFCopy]> { + bits<5> Rd; + bits<5> Rn; + let Inst{30-23} = 0b00111101; + let Inst{21} = 1; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = 0b000000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeFMOVLaneInstruction"; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseUnscaledConversionFromHigh<bits<2> rmode, bits<3> opcode, + RegisterOperand srcType, RegisterClass dstType, string asm, + string kind> + : I<(outs dstType:$Rd), (ins srcType:$Rn, VectorIndex1:$idx), asm, + "{\t$Rd, $Rn"#kind#"$idx|"#kind#"\t$Rd, $Rn$idx}", "", []>, + Sched<[WriteFCopy]> { + bits<5> Rd; + bits<5> Rn; + let Inst{30-23} = 0b00111101; + let Inst{21} = 1; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = 0b000000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeFMOVLaneInstruction"; +} + + +multiclass UnscaledConversion<string asm> { + def WHr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR16, asm> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def XHr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR16, asm> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def WSr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR32, asm> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag + } + + def XDr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR64, asm> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag + } + + def HWr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR32, asm> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def HXr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR64, asm> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def SWr : BaseUnscaledConversion<0b00, 0b110, FPR32, GPR32, asm> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag + } + + def DXr : BaseUnscaledConversion<0b00, 0b110, FPR64, GPR64, asm> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag + } + + def XDHighr : BaseUnscaledConversionToHigh<0b01, 0b111, GPR64, V128, + asm, ".d"> { + let Inst{31} = 1; + let Inst{22} = 0; + } + + def DXHighr : BaseUnscaledConversionFromHigh<0b01, 0b110, V128, GPR64, + asm, ".d"> { + let Inst{31} = 1; + let Inst{22} = 0; + } +} + +//--- +// Floating point conversion +//--- + +class BaseFPConversion<bits<2> type, bits<2> opcode, RegisterClass dstType, + RegisterClass srcType, string asm, list<dag> pattern> + : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", pattern>, + Sched<[WriteFCvt]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-24} = 0b00011110; + let Inst{23-22} = type; + let Inst{21-17} = 0b10001; + let Inst{16-15} = opcode; + let Inst{14-10} = 0b10000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass FPConversion<string asm> { + // Double-precision to Half-precision + def HDr : BaseFPConversion<0b01, 0b11, FPR16, FPR64, asm, + [(set FPR16:$Rd, (fpround FPR64:$Rn))]>; + + // Double-precision to Single-precision + def SDr : BaseFPConversion<0b01, 0b00, FPR32, FPR64, asm, + [(set FPR32:$Rd, (fpround FPR64:$Rn))]>; + + // Half-precision to Double-precision + def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm, + [(set FPR64:$Rd, (fpextend FPR16:$Rn))]>; + + // Half-precision to Single-precision + def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm, + [(set FPR32:$Rd, (fpextend FPR16:$Rn))]>; + + // Single-precision to Double-precision + def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm, + [(set FPR64:$Rd, (fpextend FPR32:$Rn))]>; + + // Single-precision to Half-precision + def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm, + [(set FPR16:$Rd, (fpround FPR32:$Rn))]>; +} + +//--- +// Single operand floating point data processing +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype, + ValueType vt, string asm, SDPatternOperator node> + : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "", + [(set (vt regtype:$Rd), (node (vt regtype:$Rn)))]>, + Sched<[WriteF]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-24} = 0b00011110; + let Inst{21-19} = 0b100; + let Inst{18-15} = opcode; + let Inst{14-10} = 0b10000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SingleOperandFPData<bits<4> opcode, string asm, + SDPatternOperator node = null_frag> { + def Hr : BaseSingleOperandFPData<opcode, FPR16, f16, asm, node> { + let Inst{23-22} = 0b11; // 16-bit size flag + let Predicates = [HasFullFP16]; + } + + def Sr : BaseSingleOperandFPData<opcode, FPR32, f32, asm, node> { + let Inst{23-22} = 0b00; // 32-bit size flag + } + + def Dr : BaseSingleOperandFPData<opcode, FPR64, f64, asm, node> { + let Inst{23-22} = 0b01; // 64-bit size flag + } +} + +//--- +// Two operand floating point data processing +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseTwoOperandFPData<bits<4> opcode, RegisterClass regtype, + string asm, list<dag> pat> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", pat>, + Sched<[WriteF]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-24} = 0b00011110; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass TwoOperandFPData<bits<4> opcode, string asm, + SDPatternOperator node = null_frag> { + def Hrr : BaseTwoOperandFPData<opcode, FPR16, asm, + [(set (f16 FPR16:$Rd), + (node (f16 FPR16:$Rn), (f16 FPR16:$Rm)))]> { + let Inst{23-22} = 0b11; // 16-bit size flag + let Predicates = [HasFullFP16]; + } + + def Srr : BaseTwoOperandFPData<opcode, FPR32, asm, + [(set (f32 FPR32:$Rd), + (node (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]> { + let Inst{23-22} = 0b00; // 32-bit size flag + } + + def Drr : BaseTwoOperandFPData<opcode, FPR64, asm, + [(set (f64 FPR64:$Rd), + (node (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]> { + let Inst{23-22} = 0b01; // 64-bit size flag + } +} + +multiclass TwoOperandFPDataNeg<bits<4> opcode, string asm, SDNode node> { + def Hrr : BaseTwoOperandFPData<opcode, FPR16, asm, + [(set FPR16:$Rd, (fneg (node FPR16:$Rn, (f16 FPR16:$Rm))))]> { + let Inst{23-22} = 0b11; // 16-bit size flag + let Predicates = [HasFullFP16]; + } + + def Srr : BaseTwoOperandFPData<opcode, FPR32, asm, + [(set FPR32:$Rd, (fneg (node FPR32:$Rn, (f32 FPR32:$Rm))))]> { + let Inst{23-22} = 0b00; // 32-bit size flag + } + + def Drr : BaseTwoOperandFPData<opcode, FPR64, asm, + [(set FPR64:$Rd, (fneg (node FPR64:$Rn, (f64 FPR64:$Rm))))]> { + let Inst{23-22} = 0b01; // 64-bit size flag + } +} + + +//--- +// Three operand floating point data processing +//--- + +class BaseThreeOperandFPData<bit isNegated, bit isSub, + RegisterClass regtype, string asm, list<dag> pat> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, regtype: $Ra), + asm, "\t$Rd, $Rn, $Rm, $Ra", "", pat>, + Sched<[WriteFMul]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<5> Ra; + let Inst{31-24} = 0b00011111; + let Inst{21} = isNegated; + let Inst{20-16} = Rm; + let Inst{15} = isSub; + let Inst{14-10} = Ra; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass ThreeOperandFPData<bit isNegated, bit isSub,string asm, + SDPatternOperator node> { + def Hrrr : BaseThreeOperandFPData<isNegated, isSub, FPR16, asm, + [(set FPR16:$Rd, + (node (f16 FPR16:$Rn), (f16 FPR16:$Rm), (f16 FPR16:$Ra)))]> { + let Inst{23-22} = 0b11; // 16-bit size flag + let Predicates = [HasFullFP16]; + } + + def Srrr : BaseThreeOperandFPData<isNegated, isSub, FPR32, asm, + [(set FPR32:$Rd, + (node (f32 FPR32:$Rn), (f32 FPR32:$Rm), (f32 FPR32:$Ra)))]> { + let Inst{23-22} = 0b00; // 32-bit size flag + } + + def Drrr : BaseThreeOperandFPData<isNegated, isSub, FPR64, asm, + [(set FPR64:$Rd, + (node (f64 FPR64:$Rn), (f64 FPR64:$Rm), (f64 FPR64:$Ra)))]> { + let Inst{23-22} = 0b01; // 64-bit size flag + } +} + +//--- +// Floating point data comparisons +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseOneOperandFPComparison<bit signalAllNans, + RegisterClass regtype, string asm, + list<dag> pat> + : I<(outs), (ins regtype:$Rn), asm, "\t$Rn, #0.0", "", pat>, + Sched<[WriteFCmp]> { + bits<5> Rn; + let Inst{31-24} = 0b00011110; + let Inst{21} = 1; + + let Inst{15-10} = 0b001000; + let Inst{9-5} = Rn; + let Inst{4} = signalAllNans; + let Inst{3-0} = 0b1000; + + // Rm should be 0b00000 canonically, but we need to accept any value. + let PostEncoderMethod = "fixOneOperandFPComparison"; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseTwoOperandFPComparison<bit signalAllNans, RegisterClass regtype, + string asm, list<dag> pat> + : I<(outs), (ins regtype:$Rn, regtype:$Rm), asm, "\t$Rn, $Rm", "", pat>, + Sched<[WriteFCmp]> { + bits<5> Rm; + bits<5> Rn; + let Inst{31-24} = 0b00011110; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-10} = 0b001000; + let Inst{9-5} = Rn; + let Inst{4} = signalAllNans; + let Inst{3-0} = 0b0000; +} + +multiclass FPComparison<bit signalAllNans, string asm, + SDPatternOperator OpNode = null_frag> { + let Defs = [NZCV] in { + def Hrr : BaseTwoOperandFPComparison<signalAllNans, FPR16, asm, + [(OpNode FPR16:$Rn, (f16 FPR16:$Rm)), (implicit NZCV)]> { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + + def Hri : BaseOneOperandFPComparison<signalAllNans, FPR16, asm, + [(OpNode (f16 FPR16:$Rn), fpimm0), (implicit NZCV)]> { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + + def Srr : BaseTwoOperandFPComparison<signalAllNans, FPR32, asm, + [(OpNode FPR32:$Rn, (f32 FPR32:$Rm)), (implicit NZCV)]> { + let Inst{23-22} = 0b00; + } + + def Sri : BaseOneOperandFPComparison<signalAllNans, FPR32, asm, + [(OpNode (f32 FPR32:$Rn), fpimm0), (implicit NZCV)]> { + let Inst{23-22} = 0b00; + } + + def Drr : BaseTwoOperandFPComparison<signalAllNans, FPR64, asm, + [(OpNode FPR64:$Rn, (f64 FPR64:$Rm)), (implicit NZCV)]> { + let Inst{23-22} = 0b01; + } + + def Dri : BaseOneOperandFPComparison<signalAllNans, FPR64, asm, + [(OpNode (f64 FPR64:$Rn), fpimm0), (implicit NZCV)]> { + let Inst{23-22} = 0b01; + } + } // Defs = [NZCV] +} + +//--- +// Floating point conditional comparisons +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseFPCondComparison<bit signalAllNans, RegisterClass regtype, + string mnemonic, list<dag> pat> + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", pat>, + Sched<[WriteFCmp]> { + let Uses = [NZCV]; + let Defs = [NZCV]; + + bits<5> Rn; + bits<5> Rm; + bits<4> nzcv; + bits<4> cond; + + let Inst{31-24} = 0b00011110; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-12} = cond; + let Inst{11-10} = 0b01; + let Inst{9-5} = Rn; + let Inst{4} = signalAllNans; + let Inst{3-0} = nzcv; +} + +multiclass FPCondComparison<bit signalAllNans, string mnemonic, + SDPatternOperator OpNode = null_frag> { + def Hrr : BaseFPCondComparison<signalAllNans, FPR16, mnemonic, + [(set NZCV, (OpNode (f16 FPR16:$Rn), (f16 FPR16:$Rm), (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]> { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + + def Srr : BaseFPCondComparison<signalAllNans, FPR32, mnemonic, + [(set NZCV, (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm), (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]> { + let Inst{23-22} = 0b00; + } + + def Drr : BaseFPCondComparison<signalAllNans, FPR64, mnemonic, + [(set NZCV, (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm), (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]> { + let Inst{23-22} = 0b01; + } +} + +//--- +// Floating point conditional select +//--- + +class BaseFPCondSelect<RegisterClass regtype, ValueType vt, string asm> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), + asm, "\t$Rd, $Rn, $Rm, $cond", "", + [(set regtype:$Rd, + (AArch64csel (vt regtype:$Rn), regtype:$Rm, + (i32 imm:$cond), NZCV))]>, + Sched<[WriteF]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<4> cond; + + let Inst{31-24} = 0b00011110; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-12} = cond; + let Inst{11-10} = 0b11; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass FPCondSelect<string asm> { + let Uses = [NZCV] in { + def Hrrr : BaseFPCondSelect<FPR16, f16, asm> { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + + def Srrr : BaseFPCondSelect<FPR32, f32, asm> { + let Inst{23-22} = 0b00; + } + + def Drrr : BaseFPCondSelect<FPR64, f64, asm> { + let Inst{23-22} = 0b01; + } + } // Uses = [NZCV] +} + +//--- +// Floating move immediate +//--- + +class BaseFPMoveImmediate<RegisterClass regtype, Operand fpimmtype, string asm> + : I<(outs regtype:$Rd), (ins fpimmtype:$imm), asm, "\t$Rd, $imm", "", + [(set regtype:$Rd, fpimmtype:$imm)]>, + Sched<[WriteFImm]> { + bits<5> Rd; + bits<8> imm; + let Inst{31-24} = 0b00011110; + let Inst{21} = 1; + let Inst{20-13} = imm; + let Inst{12-5} = 0b10000000; + let Inst{4-0} = Rd; +} + +multiclass FPMoveImmediate<string asm> { + def Hi : BaseFPMoveImmediate<FPR16, fpimm16, asm> { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + + def Si : BaseFPMoveImmediate<FPR32, fpimm32, asm> { + let Inst{23-22} = 0b00; + } + + def Di : BaseFPMoveImmediate<FPR64, fpimm64, asm> { + let Inst{23-22} = 0b01; + } +} +} // end of 'let Predicates = [HasFPARMv8]' + +//---------------------------------------------------------------------------- +// AdvSIMD +//---------------------------------------------------------------------------- + +let Predicates = [HasNEON] in { + +//---------------------------------------------------------------------------- +// AdvSIMD three register vector instructions +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDThreeSameVector<bit Q, bit U, bits<3> size, bits<5> opcode, + RegisterOperand regtype, string asm, string kind, + list<dag> pattern> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # + "|" # kind # "\t$Rd, $Rn, $Rm|}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-21} = size; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode, + RegisterOperand regtype, string asm, string kind, + list<dag> pattern> + : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # + "|" # kind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-21} = size; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class BaseSIMDThreeSameVectorDot<bit Q, bit U, string asm, string kind1, + string kind2, RegisterOperand RegType, + ValueType AccumType, ValueType InputType, + SDPatternOperator OpNode> : + BaseSIMDThreeSameVectorTied<Q, U, 0b100, 0b10010, RegType, asm, kind1, + [(set (AccumType RegType:$dst), + (OpNode (AccumType RegType:$Rd), + (InputType RegType:$Rn), + (InputType RegType:$Rm)))]> { + let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}"); +} + +multiclass SIMDThreeSameVectorDot<bit U, string asm, SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVectorDot<0, U, asm, ".2s", ".8b", V64, + v2i32, v8i8, OpNode>; + def v16i8 : BaseSIMDThreeSameVectorDot<1, U, asm, ".4s", ".16b", V128, + v4i32, v16i8, OpNode>; +} + +// All operand sizes distinguished in the encoding. +multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, + asm, ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, + asm, ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, + asm, ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, + asm, ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, + asm, ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, + asm, ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; + def v2i64 : BaseSIMDThreeSameVector<1, U, 0b111, opc, V128, + asm, ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>; +} + +// As above, but D sized elements unsupported. +multiclass SIMDThreeSameVectorBHS<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, + asm, ".8b", + [(set V64:$Rd, (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))]>; + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, + asm, ".16b", + [(set V128:$Rd, (v16i8 (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm))))]>; + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, + asm, ".4h", + [(set V64:$Rd, (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>; + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, + asm, ".8h", + [(set V128:$Rd, (v8i16 (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>; + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, + asm, ".2s", + [(set V64:$Rd, (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>; + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, + asm, ".4s", + [(set V128:$Rd, (v4i32 (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>; +} + +multiclass SIMDThreeSameVectorBHSTied<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b001, opc, V64, + asm, ".8b", + [(set (v8i8 V64:$dst), + (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; + def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b001, opc, V128, + asm, ".16b", + [(set (v16i8 V128:$dst), + (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; + def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b011, opc, V64, + asm, ".4h", + [(set (v4i16 V64:$dst), + (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; + def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b011, opc, V128, + asm, ".8h", + [(set (v8i16 V128:$dst), + (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; + def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b101, opc, V64, + asm, ".2s", + [(set (v2i32 V64:$dst), + (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; + def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b101, opc, V128, + asm, ".4s", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; +} + +// As above, but only B sized elements supported. +multiclass SIMDThreeSameVectorB<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, + asm, ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, + asm, ".16b", + [(set (v16i8 V128:$Rd), + (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; +} + +// As above, but only floating point elements supported. +multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<3> opc, + string asm, SDPatternOperator OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64, + asm, ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; + def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128, + asm, ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64, + asm, ".2s", + [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; + def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128, + asm, ".4s", + [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; + def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128, + asm, ".2d", + [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; +} + +multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<3> opc, + string asm, + SDPatternOperator OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64, + asm, ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; + def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128, + asm, ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64, + asm, ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; + def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128, + asm, ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; + def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128, + asm, ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; +} + +multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<3> opc, + string asm, SDPatternOperator OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVectorTied<0, U, {S,0b10}, {0b00,opc}, V64, + asm, ".4h", + [(set (v4f16 V64:$dst), + (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; + def v8f16 : BaseSIMDThreeSameVectorTied<1, U, {S,0b10}, {0b00,opc}, V128, + asm, ".8h", + [(set (v8f16 V128:$dst), + (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0b01}, {0b11,opc}, V64, + asm, ".2s", + [(set (v2f32 V64:$dst), + (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; + def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0b01}, {0b11,opc}, V128, + asm, ".4s", + [(set (v4f32 V128:$dst), + (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; + def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,0b11}, {0b11,opc}, V128, + asm, ".2d", + [(set (v2f64 V128:$dst), + (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; +} + +// As above, but D and B sized elements unsupported. +multiclass SIMDThreeSameVectorHS<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, + asm, ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, + asm, ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, + asm, ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, + asm, ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; +} + +// Logical three vector ops share opcode bits, and only use B sized elements. +multiclass SIMDLogicalThreeVector<bit U, bits<2> size, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8 : BaseSIMDThreeSameVector<0, U, {size,1}, 0b00011, V64, + asm, ".8b", + [(set (v8i8 V64:$Rd), (OpNode V64:$Rn, V64:$Rm))]>; + def v16i8 : BaseSIMDThreeSameVector<1, U, {size,1}, 0b00011, V128, + asm, ".16b", + [(set (v16i8 V128:$Rd), (OpNode V128:$Rn, V128:$Rm))]>; + + def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)), + (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>; + def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)), + (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>; + def : Pat<(v1i64 (OpNode V64:$LHS, V64:$RHS)), + (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>; + + def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)), + (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>; + def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)), + (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>; + def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)), + (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>; +} + +multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size, + string asm, SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVectorTied<0, U, {size,1}, 0b00011, V64, + asm, ".8b", + [(set (v8i8 V64:$dst), + (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; + def v16i8 : BaseSIMDThreeSameVectorTied<1, U, {size,1}, 0b00011, V128, + asm, ".16b", + [(set (v16i8 V128:$dst), + (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), + (v16i8 V128:$Rm)))]>; + + def : Pat<(v4i16 (OpNode (v4i16 V64:$LHS), (v4i16 V64:$MHS), + (v4i16 V64:$RHS))), + (!cast<Instruction>(NAME#"v8i8") + V64:$LHS, V64:$MHS, V64:$RHS)>; + def : Pat<(v2i32 (OpNode (v2i32 V64:$LHS), (v2i32 V64:$MHS), + (v2i32 V64:$RHS))), + (!cast<Instruction>(NAME#"v8i8") + V64:$LHS, V64:$MHS, V64:$RHS)>; + def : Pat<(v1i64 (OpNode (v1i64 V64:$LHS), (v1i64 V64:$MHS), + (v1i64 V64:$RHS))), + (!cast<Instruction>(NAME#"v8i8") + V64:$LHS, V64:$MHS, V64:$RHS)>; + + def : Pat<(v8i16 (OpNode (v8i16 V128:$LHS), (v8i16 V128:$MHS), + (v8i16 V128:$RHS))), + (!cast<Instruction>(NAME#"v16i8") + V128:$LHS, V128:$MHS, V128:$RHS)>; + def : Pat<(v4i32 (OpNode (v4i32 V128:$LHS), (v4i32 V128:$MHS), + (v4i32 V128:$RHS))), + (!cast<Instruction>(NAME#"v16i8") + V128:$LHS, V128:$MHS, V128:$RHS)>; + def : Pat<(v2i64 (OpNode (v2i64 V128:$LHS), (v2i64 V128:$MHS), + (v2i64 V128:$RHS))), + (!cast<Instruction>(NAME#"v16i8") + V128:$LHS, V128:$MHS, V128:$RHS)>; +} + + +//---------------------------------------------------------------------------- +// AdvSIMD two register vector instructions. +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode, + bits<2> size2, RegisterOperand regtype, string asm, + string dstkind, string srckind, list<dag> pattern> + : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, + "{\t$Rd" # dstkind # ", $Rn" # srckind # + "|" # dstkind # "\t$Rd, $Rn}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode, + bits<2> size2, RegisterOperand regtype, + string asm, string dstkind, string srckind, + list<dag> pattern> + : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm, + "{\t$Rd" # dstkind # ", $Rn" # srckind # + "|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +// Supports B, H, and S element sizes. +multiclass SIMDTwoVectorBHS<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; + def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; + def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; +} + +class BaseSIMDVectorLShiftLongBySize<bit Q, bits<2> size, + RegisterOperand regtype, string asm, string dstkind, + string srckind, string amount> + : I<(outs V128:$Rd), (ins regtype:$Rn), asm, + "{\t$Rd" # dstkind # ", $Rn" # srckind # ", #" # amount # + "|" # dstkind # "\t$Rd, $Rn, #" # amount # "}", "", []>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29-24} = 0b101110; + let Inst{23-22} = size; + let Inst{21-10} = 0b100001001110; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDVectorLShiftLongBySizeBHS { + let hasSideEffects = 0 in { + def v8i8 : BaseSIMDVectorLShiftLongBySize<0, 0b00, V64, + "shll", ".8h", ".8b", "8">; + def v16i8 : BaseSIMDVectorLShiftLongBySize<1, 0b00, V128, + "shll2", ".8h", ".16b", "8">; + def v4i16 : BaseSIMDVectorLShiftLongBySize<0, 0b01, V64, + "shll", ".4s", ".4h", "16">; + def v8i16 : BaseSIMDVectorLShiftLongBySize<1, 0b01, V128, + "shll2", ".4s", ".8h", "16">; + def v2i32 : BaseSIMDVectorLShiftLongBySize<0, 0b10, V64, + "shll", ".2d", ".2s", "32">; + def v4i32 : BaseSIMDVectorLShiftLongBySize<1, 0b10, V128, + "shll2", ".2d", ".4s", "32">; + } +} + +// Supports all element sizes. +multiclass SIMDLongTwoVector<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, + asm, ".4h", ".8b", + [(set (v4i16 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; + def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, + asm, ".8h", ".16b", + [(set (v8i16 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; + def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, + asm, ".2s", ".4h", + [(set (v2i32 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; + def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, + asm, ".4s", ".8h", + [(set (v4i32 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; + def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, + asm, ".1d", ".2s", + [(set (v1i64 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; + def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, + asm, ".2d", ".4s", + [(set (v2i64 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; +} + +multiclass SIMDLongTwoVectorTied<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64, + asm, ".4h", ".8b", + [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), + (v8i8 V64:$Rn)))]>; + def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128, + asm, ".8h", ".16b", + [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), + (v16i8 V128:$Rn)))]>; + def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64, + asm, ".2s", ".4h", + [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), + (v4i16 V64:$Rn)))]>; + def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128, + asm, ".4s", ".8h", + [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), + (v8i16 V128:$Rn)))]>; + def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64, + asm, ".1d", ".2s", + [(set (v1i64 V64:$dst), (OpNode (v1i64 V64:$Rd), + (v2i32 V64:$Rn)))]>; + def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128, + asm, ".2d", ".4s", + [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), + (v4i32 V128:$Rn)))]>; +} + +// Supports all element sizes, except 1xD. +multiclass SIMDTwoVectorBHSDTied<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64, + asm, ".8b", ".8b", + [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn)))]>; + def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128, + asm, ".16b", ".16b", + [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>; + def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64, + asm, ".4h", ".4h", + [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn)))]>; + def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128, + asm, ".8h", ".8h", + [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn)))]>; + def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64, + asm, ".2s", ".2s", + [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn)))]>; + def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128, + asm, ".4s", ".4s", + [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>; + def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, 0b00, V128, + asm, ".2d", ".2d", + [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn)))]>; +} + +multiclass SIMDTwoVectorBHSD<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; + def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; + def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; + def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, 0b00, V128, + asm, ".2d", ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; +} + + +// Supports only B element sizes. +multiclass SIMDTwoVectorB<bit U, bits<2> size, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, 0b00, V64, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; + def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, 0b00, V128, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; + +} + +// Supports only B and H element sizes. +multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode V64:$Rn))]>; + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode V128:$Rn))]>; + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode V64:$Rn))]>; + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>; +} + +// Supports only S and D element sizes, uses high bit of the size field +// as an extra opcode bit. +multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm, + SDPatternOperator OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, + asm, ".4h", ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>; + def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, + asm, ".8h", ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, + asm, ".2s", ".2s", + [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, + asm, ".4s", ".4s", + [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, + asm, ".2d", ".2d", + [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; +} + +// Supports only S element size. +multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; + def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; +} + + +multiclass SIMDTwoVectorFPToInt<bit U, bit S, bits<5> opc, string asm, + SDPatternOperator OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>; + def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, + asm, ".2d", ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; +} + +multiclass SIMDTwoVectorIntToFP<bit U, bit S, bits<5> opc, string asm, + SDPatternOperator OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, + asm, ".4h", ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; + def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, + asm, ".8h", ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, + asm, ".2s", ".2s", + [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, + asm, ".4s", ".4s", + [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, + asm, ".2d", ".2d", + [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; +} + + +class BaseSIMDMixedTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode, + RegisterOperand inreg, RegisterOperand outreg, + string asm, string outkind, string inkind, + list<dag> pattern> + : I<(outs outreg:$Rd), (ins inreg:$Rn), asm, + "{\t$Rd" # outkind # ", $Rn" # inkind # + "|" # outkind # "\t$Rd, $Rn}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class BaseSIMDMixedTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode, + RegisterOperand inreg, RegisterOperand outreg, + string asm, string outkind, string inkind, + list<dag> pattern> + : I<(outs outreg:$dst), (ins outreg:$Rd, inreg:$Rn), asm, + "{\t$Rd" # outkind # ", $Rn" # inkind # + "|" # outkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDMixedTwoVector<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDMixedTwoVector<0, U, 0b00, opc, V128, V64, + asm, ".8b", ".8h", + [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn)))]>; + def v16i8 : BaseSIMDMixedTwoVectorTied<1, U, 0b00, opc, V128, V128, + asm#"2", ".16b", ".8h", []>; + def v4i16 : BaseSIMDMixedTwoVector<0, U, 0b01, opc, V128, V64, + asm, ".4h", ".4s", + [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn)))]>; + def v8i16 : BaseSIMDMixedTwoVectorTied<1, U, 0b01, opc, V128, V128, + asm#"2", ".8h", ".4s", []>; + def v2i32 : BaseSIMDMixedTwoVector<0, U, 0b10, opc, V128, V64, + asm, ".2s", ".2d", + [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn)))]>; + def v4i32 : BaseSIMDMixedTwoVectorTied<1, U, 0b10, opc, V128, V128, + asm#"2", ".4s", ".2d", []>; + + def : Pat<(concat_vectors (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn))), + (!cast<Instruction>(NAME # "v16i8") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; + def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn))), + (!cast<Instruction>(NAME # "v8i16") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; + def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn))), + (!cast<Instruction>(NAME # "v4i32") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; +} + +class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2, + bits<5> opcode, RegisterOperand regtype, string asm, + string kind, string zero, ValueType dty, + ValueType sty, SDNode OpNode> + : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero # + "|" # kind # "\t$Rd, $Rn, #" # zero # "}", "", + [(set (dty regtype:$Rd), (OpNode (sty regtype:$Rn)))]>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +// Comparisons support all element sizes, except 1xD. +multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm, + SDNode OpNode> { + def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, 0b00, opc, V64, + asm, ".8b", "0", + v8i8, v8i8, OpNode>; + def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, 0b00, opc, V128, + asm, ".16b", "0", + v16i8, v16i8, OpNode>; + def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, 0b00, opc, V64, + asm, ".4h", "0", + v4i16, v4i16, OpNode>; + def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, 0b00, opc, V128, + asm, ".8h", "0", + v8i16, v8i16, OpNode>; + def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, 0b00, opc, V64, + asm, ".2s", "0", + v2i32, v2i32, OpNode>; + def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, 0b00, opc, V128, + asm, ".4s", "0", + v4i32, v4i32, OpNode>; + def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, 0b00, opc, V128, + asm, ".2d", "0", + v2i64, v2i64, OpNode>; +} + +// FP Comparisons support only S and D element sizes (and H for v8.2a). +multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc, + string asm, SDNode OpNode> { + + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16rz : BaseSIMDCmpTwoVector<0, U, {S,1}, 0b11, opc, V64, + asm, ".4h", "0.0", + v4i16, v4f16, OpNode>; + def v8i16rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b11, opc, V128, + asm, ".8h", "0.0", + v8i16, v8f16, OpNode>; + } // Predicates = [HasNEON, HasFullFP16] + def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, 0b00, opc, V64, + asm, ".2s", "0.0", + v2i32, v2f32, OpNode>; + def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, 0b00, opc, V128, + asm, ".4s", "0.0", + v4i32, v4f32, OpNode>; + def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b00, opc, V128, + asm, ".2d", "0.0", + v2i64, v2f64, OpNode>; + + let Predicates = [HasNEON, HasFullFP16] in { + def : InstAlias<asm # "\t$Vd.4h, $Vn.4h, #0", + (!cast<Instruction>(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>; + def : InstAlias<asm # "\t$Vd.8h, $Vn.8h, #0", + (!cast<Instruction>(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>; + } + def : InstAlias<asm # "\t$Vd.2s, $Vn.2s, #0", + (!cast<Instruction>(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>; + def : InstAlias<asm # "\t$Vd.4s, $Vn.4s, #0", + (!cast<Instruction>(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>; + def : InstAlias<asm # "\t$Vd.2d, $Vn.2d, #0", + (!cast<Instruction>(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>; + let Predicates = [HasNEON, HasFullFP16] in { + def : InstAlias<asm # ".4h\t$Vd, $Vn, #0", + (!cast<Instruction>(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>; + def : InstAlias<asm # ".8h\t$Vd, $Vn, #0", + (!cast<Instruction>(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>; + } + def : InstAlias<asm # ".2s\t$Vd, $Vn, #0", + (!cast<Instruction>(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>; + def : InstAlias<asm # ".4s\t$Vd, $Vn, #0", + (!cast<Instruction>(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>; + def : InstAlias<asm # ".2d\t$Vd, $Vn, #0", + (!cast<Instruction>(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDFPCvtTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode, + RegisterOperand outtype, RegisterOperand intype, + string asm, string VdTy, string VnTy, + list<dag> pattern> + : I<(outs outtype:$Rd), (ins intype:$Rn), asm, + !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class BaseSIMDFPCvtTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode, + RegisterOperand outtype, RegisterOperand intype, + string asm, string VdTy, string VnTy, + list<dag> pattern> + : I<(outs outtype:$dst), (ins outtype:$Rd, intype:$Rn), asm, + !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDFPWidenTwoVector<bit U, bit S, bits<5> opc, string asm> { + def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V128, V64, + asm, ".4s", ".4h", []>; + def v8i16 : BaseSIMDFPCvtTwoVector<1, U, {S,0}, opc, V128, V128, + asm#"2", ".4s", ".8h", []>; + def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V128, V64, + asm, ".2d", ".2s", []>; + def v4i32 : BaseSIMDFPCvtTwoVector<1, U, {S,1}, opc, V128, V128, + asm#"2", ".2d", ".4s", []>; +} + +multiclass SIMDFPNarrowTwoVector<bit U, bit S, bits<5> opc, string asm> { + def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V64, V128, + asm, ".4h", ".4s", []>; + def v8i16 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,0}, opc, V128, V128, + asm#"2", ".8h", ".4s", []>; + def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128, + asm, ".2s", ".2d", []>; + def v4i32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128, + asm#"2", ".4s", ".2d", []>; +} + +multiclass SIMDFPInexactCvtTwoVector<bit U, bit S, bits<5> opc, string asm, + Intrinsic OpNode> { + def v2f32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128, + asm, ".2s", ".2d", + [(set (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn)))]>; + def v4f32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128, + asm#"2", ".4s", ".2d", []>; + + def : Pat<(concat_vectors (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn))), + (!cast<Instruction>(NAME # "v4f32") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD three register different-size vector instructions. +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDDifferentThreeVector<bit U, bits<3> size, bits<4> opcode, + RegisterOperand outtype, RegisterOperand intype1, + RegisterOperand intype2, string asm, + string outkind, string inkind1, string inkind2, + list<dag> pattern> + : I<(outs outtype:$Rd), (ins intype1:$Rn, intype2:$Rm), asm, + "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 # + "|" # outkind # "\t$Rd, $Rn, $Rm}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31} = 0; + let Inst{30} = size{0}; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size{2-1}; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-12} = opcode; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDDifferentThreeVectorTied<bit U, bits<3> size, bits<4> opcode, + RegisterOperand outtype, RegisterOperand intype1, + RegisterOperand intype2, string asm, + string outkind, string inkind1, string inkind2, + list<dag> pattern> + : I<(outs outtype:$dst), (ins outtype:$Rd, intype1:$Rn, intype2:$Rm), asm, + "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 # + "|" # outkind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31} = 0; + let Inst{30} = size{0}; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size{2-1}; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-12} = opcode; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +// FIXME: TableGen doesn't know how to deal with expanded types that also +// change the element count (in this case, placing the results in +// the high elements of the result register rather than the low +// elements). Until that's fixed, we can't code-gen those. +multiclass SIMDNarrowThreeVectorBHS<bit U, bits<4> opc, string asm, + Intrinsic IntOp> { + def v8i16_v8i8 : BaseSIMDDifferentThreeVector<U, 0b000, opc, + V64, V128, V128, + asm, ".8b", ".8h", ".8h", + [(set (v8i8 V64:$Rd), (IntOp (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; + def v8i16_v16i8 : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc, + V128, V128, V128, + asm#"2", ".16b", ".8h", ".8h", + []>; + def v4i32_v4i16 : BaseSIMDDifferentThreeVector<U, 0b010, opc, + V64, V128, V128, + asm, ".4h", ".4s", ".4s", + [(set (v4i16 V64:$Rd), (IntOp (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; + def v4i32_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc, + V128, V128, V128, + asm#"2", ".8h", ".4s", ".4s", + []>; + def v2i64_v2i32 : BaseSIMDDifferentThreeVector<U, 0b100, opc, + V64, V128, V128, + asm, ".2s", ".2d", ".2d", + [(set (v2i32 V64:$Rd), (IntOp (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>; + def v2i64_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc, + V128, V128, V128, + asm#"2", ".4s", ".2d", ".2d", + []>; + + + // Patterns for the '2' variants involve INSERT_SUBREG, which you can't put in + // a version attached to an instruction. + def : Pat<(concat_vectors (v8i8 V64:$Rd), (IntOp (v8i16 V128:$Rn), + (v8i16 V128:$Rm))), + (!cast<Instruction>(NAME # "v8i16_v16i8") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; + def : Pat<(concat_vectors (v4i16 V64:$Rd), (IntOp (v4i32 V128:$Rn), + (v4i32 V128:$Rm))), + (!cast<Instruction>(NAME # "v4i32_v8i16") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; + def : Pat<(concat_vectors (v2i32 V64:$Rd), (IntOp (v2i64 V128:$Rn), + (v2i64 V128:$Rm))), + (!cast<Instruction>(NAME # "v2i64_v4i32") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; +} + +multiclass SIMDDifferentThreeVectorBD<bit U, bits<4> opc, string asm, + Intrinsic IntOp> { + def v8i8 : BaseSIMDDifferentThreeVector<U, 0b000, opc, + V128, V64, V64, + asm, ".8h", ".8b", ".8b", + [(set (v8i16 V128:$Rd), (IntOp (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; + def v16i8 : BaseSIMDDifferentThreeVector<U, 0b001, opc, + V128, V128, V128, + asm#"2", ".8h", ".16b", ".16b", []>; + let Predicates = [HasAES] in { + def v1i64 : BaseSIMDDifferentThreeVector<U, 0b110, opc, + V128, V64, V64, + asm, ".1q", ".1d", ".1d", []>; + def v2i64 : BaseSIMDDifferentThreeVector<U, 0b111, opc, + V128, V128, V128, + asm#"2", ".1q", ".2d", ".2d", []>; + } + + def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 V128:$Rn)), + (v8i8 (extract_high_v16i8 V128:$Rm)))), + (!cast<Instruction>(NAME#"v16i8") V128:$Rn, V128:$Rm)>; +} + +multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm, + SDPatternOperator OpNode> { + def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc, + V128, V64, V64, + asm, ".4s", ".4h", ".4h", + [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; + def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc, + V128, V128, V128, + asm#"2", ".4s", ".8h", ".8h", + [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 V128:$Rm)))]>; + def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc, + V128, V64, V64, + asm, ".2d", ".2s", ".2s", + [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; + def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc, + V128, V128, V128, + asm#"2", ".2d", ".4s", ".4s", + [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 V128:$Rm)))]>; +} + +multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc, + V128, V64, V64, + asm, ".8h", ".8b", ".8b", + [(set (v8i16 V128:$Rd), + (zext (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))))]>; + def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc, + V128, V128, V128, + asm#"2", ".8h", ".16b", ".16b", + [(set (v8i16 V128:$Rd), + (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn), + (extract_high_v16i8 V128:$Rm)))))]>; + def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc, + V128, V64, V64, + asm, ".4s", ".4h", ".4h", + [(set (v4i32 V128:$Rd), + (zext (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))))]>; + def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc, + V128, V128, V128, + asm#"2", ".4s", ".8h", ".8h", + [(set (v4i32 V128:$Rd), + (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 V128:$Rm)))))]>; + def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc, + V128, V64, V64, + asm, ".2d", ".2s", ".2s", + [(set (v2i64 V128:$Rd), + (zext (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))))]>; + def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc, + V128, V128, V128, + asm#"2", ".2d", ".4s", ".4s", + [(set (v2i64 V128:$Rd), + (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 V128:$Rm)))))]>; +} + +multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc, + string asm, + SDPatternOperator OpNode> { + def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b000, opc, + V128, V64, V64, + asm, ".8h", ".8b", ".8b", + [(set (v8i16 V128:$dst), + (add (v8i16 V128:$Rd), + (zext (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))))]>; + def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc, + V128, V128, V128, + asm#"2", ".8h", ".16b", ".16b", + [(set (v8i16 V128:$dst), + (add (v8i16 V128:$Rd), + (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn), + (extract_high_v16i8 V128:$Rm))))))]>; + def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc, + V128, V64, V64, + asm, ".4s", ".4h", ".4h", + [(set (v4i32 V128:$dst), + (add (v4i32 V128:$Rd), + (zext (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))))]>; + def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc, + V128, V128, V128, + asm#"2", ".4s", ".8h", ".8h", + [(set (v4i32 V128:$dst), + (add (v4i32 V128:$Rd), + (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 V128:$Rm))))))]>; + def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc, + V128, V64, V64, + asm, ".2d", ".2s", ".2s", + [(set (v2i64 V128:$dst), + (add (v2i64 V128:$Rd), + (zext (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))))]>; + def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc, + V128, V128, V128, + asm#"2", ".2d", ".4s", ".4s", + [(set (v2i64 V128:$dst), + (add (v2i64 V128:$Rd), + (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 V128:$Rm))))))]>; +} + +multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc, + V128, V64, V64, + asm, ".8h", ".8b", ".8b", + [(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; + def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc, + V128, V128, V128, + asm#"2", ".8h", ".16b", ".16b", + [(set (v8i16 V128:$Rd), (OpNode (extract_high_v16i8 V128:$Rn), + (extract_high_v16i8 V128:$Rm)))]>; + def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc, + V128, V64, V64, + asm, ".4s", ".4h", ".4h", + [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; + def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc, + V128, V128, V128, + asm#"2", ".4s", ".8h", ".8h", + [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 V128:$Rm)))]>; + def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc, + V128, V64, V64, + asm, ".2d", ".2s", ".2s", + [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; + def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc, + V128, V128, V128, + asm#"2", ".2d", ".4s", ".4s", + [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 V128:$Rm)))]>; +} + +multiclass SIMDLongThreeVectorTiedBHS<bit U, bits<4> opc, + string asm, + SDPatternOperator OpNode> { + def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b000, opc, + V128, V64, V64, + asm, ".8h", ".8b", ".8b", + [(set (v8i16 V128:$dst), + (OpNode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; + def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc, + V128, V128, V128, + asm#"2", ".8h", ".16b", ".16b", + [(set (v8i16 V128:$dst), + (OpNode (v8i16 V128:$Rd), + (extract_high_v16i8 V128:$Rn), + (extract_high_v16i8 V128:$Rm)))]>; + def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc, + V128, V64, V64, + asm, ".4s", ".4h", ".4h", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; + def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc, + V128, V128, V128, + asm#"2", ".4s", ".8h", ".8h", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), + (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 V128:$Rm)))]>; + def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc, + V128, V64, V64, + asm, ".2d", ".2s", ".2s", + [(set (v2i64 V128:$dst), + (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; + def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc, + V128, V128, V128, + asm#"2", ".2d", ".4s", ".4s", + [(set (v2i64 V128:$dst), + (OpNode (v2i64 V128:$Rd), + (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 V128:$Rm)))]>; +} + +multiclass SIMDLongThreeVectorSQDMLXTiedHS<bit U, bits<4> opc, string asm, + SDPatternOperator Accum> { + def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc, + V128, V64, V64, + asm, ".4s", ".4h", ".4h", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqdmull (v4i16 V64:$Rn), + (v4i16 V64:$Rm)))))]>; + def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc, + V128, V128, V128, + asm#"2", ".4s", ".8h", ".8h", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqdmull (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 V128:$Rm)))))]>; + def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc, + V128, V64, V64, + asm, ".2d", ".2s", ".2s", + [(set (v2i64 V128:$dst), + (Accum (v2i64 V128:$Rd), + (v2i64 (int_aarch64_neon_sqdmull (v2i32 V64:$Rn), + (v2i32 V64:$Rm)))))]>; + def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc, + V128, V128, V128, + asm#"2", ".2d", ".4s", ".4s", + [(set (v2i64 V128:$dst), + (Accum (v2i64 V128:$Rd), + (v2i64 (int_aarch64_neon_sqdmull (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 V128:$Rm)))))]>; +} + +multiclass SIMDWideThreeVectorBHS<bit U, bits<4> opc, string asm, + SDPatternOperator OpNode> { + def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc, + V128, V128, V64, + asm, ".8h", ".8h", ".8b", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i8 V64:$Rm)))]>; + def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc, + V128, V128, V128, + asm#"2", ".8h", ".8h", ".16b", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), + (extract_high_v16i8 V128:$Rm)))]>; + def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc, + V128, V128, V64, + asm, ".4s", ".4s", ".4h", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i16 V64:$Rm)))]>; + def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc, + V128, V128, V128, + asm#"2", ".4s", ".4s", ".8h", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), + (extract_high_v8i16 V128:$Rm)))]>; + def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc, + V128, V128, V64, + asm, ".2d", ".2d", ".2s", + [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i32 V64:$Rm)))]>; + def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc, + V128, V128, V128, + asm#"2", ".2d", ".2d", ".4s", + [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), + (extract_high_v4i32 V128:$Rm)))]>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD bitwise extract from vector +//---------------------------------------------------------------------------- + +class BaseSIMDBitwiseExtract<bit size, RegisterOperand regtype, ValueType vty, + string asm, string kind> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, i32imm:$imm), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $imm" # + "|" # kind # "\t$Rd, $Rn, $Rm, $imm}", "", + [(set (vty regtype:$Rd), + (AArch64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<4> imm; + let Inst{31} = 0; + let Inst{30} = size; + let Inst{29-21} = 0b101110000; + let Inst{20-16} = Rm; + let Inst{15} = 0; + let Inst{14-11} = imm; + let Inst{10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + + +multiclass SIMDBitwiseExtract<string asm> { + def v8i8 : BaseSIMDBitwiseExtract<0, V64, v8i8, asm, ".8b"> { + let imm{3} = 0; + } + def v16i8 : BaseSIMDBitwiseExtract<1, V128, v16i8, asm, ".16b">; +} + +//---------------------------------------------------------------------------- +// AdvSIMD zip vector +//---------------------------------------------------------------------------- + +class BaseSIMDZipVector<bits<3> size, bits<3> opc, RegisterOperand regtype, + string asm, string kind, SDNode OpNode, ValueType valty> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # + "|" # kind # "\t$Rd, $Rn, $Rm}", "", + [(set (valty regtype:$Rd), (OpNode regtype:$Rn, regtype:$Rm))]>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31} = 0; + let Inst{30} = size{0}; + let Inst{29-24} = 0b001110; + let Inst{23-22} = size{2-1}; + let Inst{21} = 0; + let Inst{20-16} = Rm; + let Inst{15} = 0; + let Inst{14-12} = opc; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDZipVector<bits<3>opc, string asm, + SDNode OpNode> { + def v8i8 : BaseSIMDZipVector<0b000, opc, V64, + asm, ".8b", OpNode, v8i8>; + def v16i8 : BaseSIMDZipVector<0b001, opc, V128, + asm, ".16b", OpNode, v16i8>; + def v4i16 : BaseSIMDZipVector<0b010, opc, V64, + asm, ".4h", OpNode, v4i16>; + def v8i16 : BaseSIMDZipVector<0b011, opc, V128, + asm, ".8h", OpNode, v8i16>; + def v2i32 : BaseSIMDZipVector<0b100, opc, V64, + asm, ".2s", OpNode, v2i32>; + def v4i32 : BaseSIMDZipVector<0b101, opc, V128, + asm, ".4s", OpNode, v4i32>; + def v2i64 : BaseSIMDZipVector<0b111, opc, V128, + asm, ".2d", OpNode, v2i64>; + + def : Pat<(v4f16 (OpNode V64:$Rn, V64:$Rm)), + (!cast<Instruction>(NAME#"v4i16") V64:$Rn, V64:$Rm)>; + def : Pat<(v8f16 (OpNode V128:$Rn, V128:$Rm)), + (!cast<Instruction>(NAME#"v8i16") V128:$Rn, V128:$Rm)>; + def : Pat<(v2f32 (OpNode V64:$Rn, V64:$Rm)), + (!cast<Instruction>(NAME#"v2i32") V64:$Rn, V64:$Rm)>; + def : Pat<(v4f32 (OpNode V128:$Rn, V128:$Rm)), + (!cast<Instruction>(NAME#"v4i32") V128:$Rn, V128:$Rm)>; + def : Pat<(v2f64 (OpNode V128:$Rn, V128:$Rm)), + (!cast<Instruction>(NAME#"v2i64") V128:$Rn, V128:$Rm)>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD three register scalar instructions +//---------------------------------------------------------------------------- + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDThreeScalar<bit U, bits<3> size, bits<5> opcode, + RegisterClass regtype, string asm, + list<dag> pattern> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, + "\t$Rd, $Rn, $Rm", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-21} = size; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode, + dag oops, dag iops, string asm, + list<dag> pattern> + : I<oops, iops, asm, "\t$Rd, $Rn, $Rm", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21} = R; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v1i64 : BaseSIMDThreeScalar<U, 0b111, opc, FPR64, asm, + [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>; +} + +multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v1i64 : BaseSIMDThreeScalar<U, 0b111, opc, FPR64, asm, + [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>; + def v1i32 : BaseSIMDThreeScalar<U, 0b101, opc, FPR32, asm, []>; + def v1i16 : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>; + def v1i8 : BaseSIMDThreeScalar<U, 0b001, opc, FPR8 , asm, []>; + + def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))), + (!cast<Instruction>(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>; + def : Pat<(i32 (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))), + (!cast<Instruction>(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>; +} + +multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v1i32 : BaseSIMDThreeScalar<U, 0b101, opc, FPR32, asm, + [(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>; + def v1i16 : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>; +} + +multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v1i32: BaseSIMDThreeScalarTied<U, 0b10, R, opc, (outs FPR32:$dst), + (ins FPR32:$Rd, FPR32:$Rn, FPR32:$Rm), + asm, []>; + def v1i16: BaseSIMDThreeScalarTied<U, 0b01, R, opc, (outs FPR16:$dst), + (ins FPR16:$Rd, FPR16:$Rn, FPR16:$Rm), + asm, []>; +} + +multiclass SIMDFPThreeScalar<bit U, bit S, bits<3> opc, string asm, + SDPatternOperator OpNode = null_frag> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def #NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm, + [(set (f64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>; + def #NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm, + [(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>; + let Predicates = [HasNEON, HasFullFP16] in { + def #NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm, + [(set FPR16:$Rd, (OpNode FPR16:$Rn, FPR16:$Rm))]>; + } // Predicates = [HasNEON, HasFullFP16] + } + + def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>; +} + +multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<3> opc, string asm, + SDPatternOperator OpNode = null_frag> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def #NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm, + [(set (i64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>; + def #NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm, + [(set (i32 FPR32:$Rd), (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]>; + let Predicates = [HasNEON, HasFullFP16] in { + def #NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm, + []>; + } // Predicates = [HasNEON, HasFullFP16] + } + + def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>; +} + +class BaseSIMDThreeScalarMixed<bit U, bits<2> size, bits<5> opcode, + dag oops, dag iops, string asm, string cstr, list<dag> pat> + : I<oops, iops, asm, + "\t$Rd, $Rn, $Rm", cstr, pat>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDThreeScalarMixedHS<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode = null_frag> { + def i16 : BaseSIMDThreeScalarMixed<U, 0b01, opc, + (outs FPR32:$Rd), + (ins FPR16:$Rn, FPR16:$Rm), asm, "", []>; + def i32 : BaseSIMDThreeScalarMixed<U, 0b10, opc, + (outs FPR64:$Rd), + (ins FPR32:$Rn, FPR32:$Rm), asm, "", + [(set (i64 FPR64:$Rd), (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm)))]>; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDThreeScalarMixedTiedHS<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode = null_frag> { + def i16 : BaseSIMDThreeScalarMixed<U, 0b01, opc, + (outs FPR32:$dst), + (ins FPR32:$Rd, FPR16:$Rn, FPR16:$Rm), + asm, "$Rd = $dst", []>; + def i32 : BaseSIMDThreeScalarMixed<U, 0b10, opc, + (outs FPR64:$dst), + (ins FPR64:$Rd, FPR32:$Rn, FPR32:$Rm), + asm, "$Rd = $dst", + [(set (i64 FPR64:$dst), + (OpNode (i64 FPR64:$Rd), (i32 FPR32:$Rn), (i32 FPR32:$Rm)))]>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD two register scalar instructions +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode, + RegisterClass regtype, RegisterClass regtype2, + string asm, list<dag> pat> + : I<(outs regtype:$Rd), (ins regtype2:$Rn), asm, + "\t$Rd, $Rn", "", pat>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDTwoScalarTied<bit U, bits<2> size, bits<5> opcode, + RegisterClass regtype, RegisterClass regtype2, + string asm, list<dag> pat> + : I<(outs regtype:$dst), (ins regtype:$Rd, regtype2:$Rn), asm, + "\t$Rd, $Rn", "$Rd = $dst", pat>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode, + RegisterClass regtype, string asm, string zero> + : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, + "\t$Rd, $Rn, #" # zero, "", []>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm> + : I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "", + [(set (f32 FPR32:$Rd), (int_aarch64_sisd_fcvtxn (f64 FPR64:$Rn)))]>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-17} = 0b011111100110000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDCmpTwoScalarD<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v1i64rz : BaseSIMDCmpTwoScalar<U, 0b11, 0b00, opc, FPR64, asm, "0">; + + def : Pat<(v1i64 (OpNode FPR64:$Rn)), + (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>; +} + +multiclass SIMDFPCmpTwoScalar<bit U, bit S, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v1i64rz : BaseSIMDCmpTwoScalar<U, {S,1}, 0b00, opc, FPR64, asm, "0.0">; + def v1i32rz : BaseSIMDCmpTwoScalar<U, {S,0}, 0b00, opc, FPR32, asm, "0.0">; + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16rz : BaseSIMDCmpTwoScalar<U, {S,1}, 0b11, opc, FPR16, asm, "0.0">; + } + + def : InstAlias<asm # "\t$Rd, $Rn, #0", + (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rd, FPR64:$Rn), 0>; + def : InstAlias<asm # "\t$Rd, $Rn, #0", + (!cast<Instruction>(NAME # v1i32rz) FPR32:$Rd, FPR32:$Rn), 0>; + let Predicates = [HasNEON, HasFullFP16] in { + def : InstAlias<asm # "\t$Rd, $Rn, #0", + (!cast<Instruction>(NAME # v1i16rz) FPR16:$Rd, FPR16:$Rn), 0>; + } + + def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))), + (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>; +} + +multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v1i64 : BaseSIMDTwoScalar<U, 0b11, 0b00, opc, FPR64, FPR64, asm, + [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn)))]>; + + def : Pat<(i64 (OpNode (i64 FPR64:$Rn))), + (!cast<Instruction>(NAME # "v1i64") FPR64:$Rn)>; +} + +multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> { + def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,[]>; + def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,[]>; + let Predicates = [HasNEON, HasFullFP16] in { + def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,[]>; + } +} + +multiclass SIMDFPTwoScalarCVT<bit U, bit S, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm, + [(set FPR64:$Rd, (OpNode (f64 FPR64:$Rn)))]>; + def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm, + [(set FPR32:$Rd, (OpNode (f32 FPR32:$Rn)))]>; + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm, + [(set FPR16:$Rd, (OpNode (f16 FPR16:$Rn)))]>; + } +} + +multiclass SIMDTwoScalarBHSD<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode = null_frag> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def v1i64 : BaseSIMDTwoScalar<U, 0b11, 0b00, opc, FPR64, FPR64, asm, + [(set (i64 FPR64:$Rd), (OpNode (i64 FPR64:$Rn)))]>; + def v1i32 : BaseSIMDTwoScalar<U, 0b10, 0b00, opc, FPR32, FPR32, asm, + [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>; + def v1i16 : BaseSIMDTwoScalar<U, 0b01, 0b00, opc, FPR16, FPR16, asm, []>; + def v1i8 : BaseSIMDTwoScalar<U, 0b00, 0b00, opc, FPR8 , FPR8 , asm, []>; + } + + def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))), + (!cast<Instruction>(NAME # v1i64) FPR64:$Rn)>; +} + +multiclass SIMDTwoScalarBHSDTied<bit U, bits<5> opc, string asm, + Intrinsic OpNode> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def v1i64 : BaseSIMDTwoScalarTied<U, 0b11, opc, FPR64, FPR64, asm, + [(set (i64 FPR64:$dst), (OpNode (i64 FPR64:$Rd), (i64 FPR64:$Rn)))]>; + def v1i32 : BaseSIMDTwoScalarTied<U, 0b10, opc, FPR32, FPR32, asm, + [(set (i32 FPR32:$dst), (OpNode (i32 FPR32:$Rd), (i32 FPR32:$Rn)))]>; + def v1i16 : BaseSIMDTwoScalarTied<U, 0b01, opc, FPR16, FPR16, asm, []>; + def v1i8 : BaseSIMDTwoScalarTied<U, 0b00, opc, FPR8 , FPR8 , asm, []>; + } + + def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn))), + (!cast<Instruction>(NAME # v1i64) FPR64:$Rd, FPR64:$Rn)>; +} + + + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDTwoScalarMixedBHS<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v1i32 : BaseSIMDTwoScalar<U, 0b10, 0b00, opc, FPR32, FPR64, asm, + [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn)))]>; + def v1i16 : BaseSIMDTwoScalar<U, 0b01, 0b00, opc, FPR16, FPR32, asm, []>; + def v1i8 : BaseSIMDTwoScalar<U, 0b00, 0b00, opc, FPR8 , FPR16, asm, []>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD scalar pairwise instructions +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDPairwiseScalar<bit U, bits<2> size, bits<5> opcode, + RegisterOperand regtype, RegisterOperand vectype, + string asm, string kind> + : I<(outs regtype:$Rd), (ins vectype:$Rn), asm, + "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", []>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21-17} = 0b11000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDPairwiseScalarD<bit U, bits<5> opc, string asm> { + def v2i64p : BaseSIMDPairwiseScalar<U, 0b11, opc, FPR64Op, V128, + asm, ".2d">; +} + +multiclass SIMDFPPairwiseScalar<bit S, bits<5> opc, string asm> { + let Predicates = [HasNEON, HasFullFP16] in { + def v2i16p : BaseSIMDPairwiseScalar<0, {S,0}, opc, FPR16Op, V64, + asm, ".2h">; + } + def v2i32p : BaseSIMDPairwiseScalar<1, {S,0}, opc, FPR32Op, V64, + asm, ".2s">; + def v2i64p : BaseSIMDPairwiseScalar<1, {S,1}, opc, FPR64Op, V128, + asm, ".2d">; +} + +//---------------------------------------------------------------------------- +// AdvSIMD across lanes instructions +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDAcrossLanes<bit Q, bit U, bits<2> size, bits<5> opcode, + RegisterClass regtype, RegisterOperand vectype, + string asm, string kind, list<dag> pattern> + : I<(outs regtype:$Rd), (ins vectype:$Rn), asm, + "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b11000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDAcrossLanesBHS<bit U, bits<5> opcode, + string asm> { + def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR8, V64, + asm, ".8b", []>; + def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR8, V128, + asm, ".16b", []>; + def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR16, V64, + asm, ".4h", []>; + def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR16, V128, + asm, ".8h", []>; + def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR32, V128, + asm, ".4s", []>; +} + +multiclass SIMDAcrossLanesHSD<bit U, bits<5> opcode, string asm> { + def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR16, V64, + asm, ".8b", []>; + def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR16, V128, + asm, ".16b", []>; + def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR32, V64, + asm, ".4h", []>; + def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR32, V128, + asm, ".8h", []>; + def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR64, V128, + asm, ".4s", []>; +} + +multiclass SIMDFPAcrossLanes<bits<5> opcode, bit sz1, string asm, + Intrinsic intOp> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16v : BaseSIMDAcrossLanes<0, 0, {sz1, 0}, opcode, FPR16, V64, + asm, ".4h", + [(set FPR16:$Rd, (intOp (v4f16 V64:$Rn)))]>; + def v8i16v : BaseSIMDAcrossLanes<1, 0, {sz1, 0}, opcode, FPR16, V128, + asm, ".8h", + [(set FPR16:$Rd, (intOp (v8f16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128, + asm, ".4s", + [(set FPR32:$Rd, (intOp (v4f32 V128:$Rn)))]>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD INS/DUP instructions +//---------------------------------------------------------------------------- + +// FIXME: There has got to be a better way to factor these. ugh. + +class BaseSIMDInsDup<bit Q, bit op, dag outs, dag ins, string asm, + string operands, string constraints, list<dag> pattern> + : I<outs, ins, asm, operands, constraints, pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = op; + let Inst{28-21} = 0b01110000; + let Inst{15} = 0; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class SIMDDupFromMain<bit Q, bits<5> imm5, string size, ValueType vectype, + RegisterOperand vecreg, RegisterClass regtype> + : BaseSIMDInsDup<Q, 0, (outs vecreg:$Rd), (ins regtype:$Rn), "dup", + "{\t$Rd" # size # ", $Rn" # + "|" # size # "\t$Rd, $Rn}", "", + [(set (vectype vecreg:$Rd), (AArch64dup regtype:$Rn))]> { + let Inst{20-16} = imm5; + let Inst{14-11} = 0b0001; +} + +class SIMDDupFromElement<bit Q, string dstkind, string srckind, + ValueType vectype, ValueType insreg, + RegisterOperand vecreg, Operand idxtype, + ValueType elttype, SDNode OpNode> + : BaseSIMDInsDup<Q, 0, (outs vecreg:$Rd), (ins V128:$Rn, idxtype:$idx), "dup", + "{\t$Rd" # dstkind # ", $Rn" # srckind # "$idx" # + "|" # dstkind # "\t$Rd, $Rn$idx}", "", + [(set (vectype vecreg:$Rd), + (OpNode (insreg V128:$Rn), idxtype:$idx))]> { + let Inst{14-11} = 0b0000; +} + +class SIMDDup64FromElement + : SIMDDupFromElement<1, ".2d", ".d", v2i64, v2i64, V128, + VectorIndexD, i64, AArch64duplane64> { + bits<1> idx; + let Inst{20} = idx; + let Inst{19-16} = 0b1000; +} + +class SIMDDup32FromElement<bit Q, string size, ValueType vectype, + RegisterOperand vecreg> + : SIMDDupFromElement<Q, size, ".s", vectype, v4i32, vecreg, + VectorIndexS, i64, AArch64duplane32> { + bits<2> idx; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; +} + +class SIMDDup16FromElement<bit Q, string size, ValueType vectype, + RegisterOperand vecreg> + : SIMDDupFromElement<Q, size, ".h", vectype, v8i16, vecreg, + VectorIndexH, i64, AArch64duplane16> { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; +} + +class SIMDDup8FromElement<bit Q, string size, ValueType vectype, + RegisterOperand vecreg> + : SIMDDupFromElement<Q, size, ".b", vectype, v16i8, vecreg, + VectorIndexB, i64, AArch64duplane8> { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; +} + +class BaseSIMDMov<bit Q, string size, bits<4> imm4, RegisterClass regtype, + Operand idxtype, string asm, list<dag> pattern> + : BaseSIMDInsDup<Q, 0, (outs regtype:$Rd), (ins V128:$Rn, idxtype:$idx), asm, + "{\t$Rd, $Rn" # size # "$idx" # + "|" # size # "\t$Rd, $Rn$idx}", "", pattern> { + let Inst{14-11} = imm4; +} + +class SIMDSMov<bit Q, string size, RegisterClass regtype, + Operand idxtype> + : BaseSIMDMov<Q, size, 0b0101, regtype, idxtype, "smov", []>; +class SIMDUMov<bit Q, string size, ValueType vectype, RegisterClass regtype, + Operand idxtype> + : BaseSIMDMov<Q, size, 0b0111, regtype, idxtype, "umov", + [(set regtype:$Rd, (vector_extract (vectype V128:$Rn), idxtype:$idx))]>; + +class SIMDMovAlias<string asm, string size, Instruction inst, + RegisterClass regtype, Operand idxtype> + : InstAlias<asm#"{\t$dst, $src"#size#"$idx" # + "|" # size # "\t$dst, $src$idx}", + (inst regtype:$dst, V128:$src, idxtype:$idx)>; + +multiclass SMov { + def vi8to32 : SIMDSMov<0, ".b", GPR32, VectorIndexB> { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; + } + def vi8to64 : SIMDSMov<1, ".b", GPR64, VectorIndexB> { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; + } + def vi16to32 : SIMDSMov<0, ".h", GPR32, VectorIndexH> { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + } + def vi16to64 : SIMDSMov<1, ".h", GPR64, VectorIndexH> { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + } + def vi32to64 : SIMDSMov<1, ".s", GPR64, VectorIndexS> { + bits<2> idx; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; + } +} + +multiclass UMov { + def vi8 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndexB> { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; + } + def vi16 : SIMDUMov<0, ".h", v8i16, GPR32, VectorIndexH> { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + } + def vi32 : SIMDUMov<0, ".s", v4i32, GPR32, VectorIndexS> { + bits<2> idx; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; + } + def vi64 : SIMDUMov<1, ".d", v2i64, GPR64, VectorIndexD> { + bits<1> idx; + let Inst{20} = idx; + let Inst{19-16} = 0b1000; + } + def : SIMDMovAlias<"mov", ".s", + !cast<Instruction>(NAME#"vi32"), + GPR32, VectorIndexS>; + def : SIMDMovAlias<"mov", ".d", + !cast<Instruction>(NAME#"vi64"), + GPR64, VectorIndexD>; +} + +class SIMDInsFromMain<string size, ValueType vectype, + RegisterClass regtype, Operand idxtype> + : BaseSIMDInsDup<1, 0, (outs V128:$dst), + (ins V128:$Rd, idxtype:$idx, regtype:$Rn), "ins", + "{\t$Rd" # size # "$idx, $Rn" # + "|" # size # "\t$Rd$idx, $Rn}", + "$Rd = $dst", + [(set V128:$dst, + (vector_insert (vectype V128:$Rd), regtype:$Rn, idxtype:$idx))]> { + let Inst{14-11} = 0b0011; +} + +class SIMDInsFromElement<string size, ValueType vectype, + ValueType elttype, Operand idxtype> + : BaseSIMDInsDup<1, 1, (outs V128:$dst), + (ins V128:$Rd, idxtype:$idx, V128:$Rn, idxtype:$idx2), "ins", + "{\t$Rd" # size # "$idx, $Rn" # size # "$idx2" # + "|" # size # "\t$Rd$idx, $Rn$idx2}", + "$Rd = $dst", + [(set V128:$dst, + (vector_insert + (vectype V128:$Rd), + (elttype (vector_extract (vectype V128:$Rn), idxtype:$idx2)), + idxtype:$idx))]>; + +class SIMDInsMainMovAlias<string size, Instruction inst, + RegisterClass regtype, Operand idxtype> + : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # + "|" # size #"\t$dst$idx, $src}", + (inst V128:$dst, idxtype:$idx, regtype:$src)>; +class SIMDInsElementMovAlias<string size, Instruction inst, + Operand idxtype> + : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # size # "$idx2" # + # "|" # size #"\t$dst$idx, $src$idx2}", + (inst V128:$dst, idxtype:$idx, V128:$src, idxtype:$idx2)>; + + +multiclass SIMDIns { + def vi8gpr : SIMDInsFromMain<".b", v16i8, GPR32, VectorIndexB> { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; + } + def vi16gpr : SIMDInsFromMain<".h", v8i16, GPR32, VectorIndexH> { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + } + def vi32gpr : SIMDInsFromMain<".s", v4i32, GPR32, VectorIndexS> { + bits<2> idx; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; + } + def vi64gpr : SIMDInsFromMain<".d", v2i64, GPR64, VectorIndexD> { + bits<1> idx; + let Inst{20} = idx; + let Inst{19-16} = 0b1000; + } + + def vi8lane : SIMDInsFromElement<".b", v16i8, i32, VectorIndexB> { + bits<4> idx; + bits<4> idx2; + let Inst{20-17} = idx; + let Inst{16} = 1; + let Inst{14-11} = idx2; + } + def vi16lane : SIMDInsFromElement<".h", v8i16, i32, VectorIndexH> { + bits<3> idx; + bits<3> idx2; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + let Inst{14-12} = idx2; + let Inst{11} = {?}; + } + def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> { + bits<2> idx; + bits<2> idx2; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; + let Inst{14-13} = idx2; + let Inst{12-11} = {?,?}; + } + def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> { + bits<1> idx; + bits<1> idx2; + let Inst{20} = idx; + let Inst{19-16} = 0b1000; + let Inst{14} = idx2; + let Inst{13-11} = {?,?,?}; + } + + // For all forms of the INS instruction, the "mov" mnemonic is the + // preferred alias. Why they didn't just call the instruction "mov" in + // the first place is a very good question indeed... + def : SIMDInsMainMovAlias<".b", !cast<Instruction>(NAME#"vi8gpr"), + GPR32, VectorIndexB>; + def : SIMDInsMainMovAlias<".h", !cast<Instruction>(NAME#"vi16gpr"), + GPR32, VectorIndexH>; + def : SIMDInsMainMovAlias<".s", !cast<Instruction>(NAME#"vi32gpr"), + GPR32, VectorIndexS>; + def : SIMDInsMainMovAlias<".d", !cast<Instruction>(NAME#"vi64gpr"), + GPR64, VectorIndexD>; + + def : SIMDInsElementMovAlias<".b", !cast<Instruction>(NAME#"vi8lane"), + VectorIndexB>; + def : SIMDInsElementMovAlias<".h", !cast<Instruction>(NAME#"vi16lane"), + VectorIndexH>; + def : SIMDInsElementMovAlias<".s", !cast<Instruction>(NAME#"vi32lane"), + VectorIndexS>; + def : SIMDInsElementMovAlias<".d", !cast<Instruction>(NAME#"vi64lane"), + VectorIndexD>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD TBL/TBX +//---------------------------------------------------------------------------- + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDTableLookup<bit Q, bits<2> len, bit op, RegisterOperand vectype, + RegisterOperand listtype, string asm, string kind> + : I<(outs vectype:$Vd), (ins listtype:$Vn, vectype:$Vm), asm, + "\t$Vd" # kind # ", $Vn, $Vm" # kind, "", []>, + Sched<[WriteV]> { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29-21} = 0b001110000; + let Inst{20-16} = Vm; + let Inst{15} = 0; + let Inst{14-13} = len; + let Inst{12} = op; + let Inst{11-10} = 0b00; + let Inst{9-5} = Vn; + let Inst{4-0} = Vd; +} + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDTableLookupTied<bit Q, bits<2> len, bit op, RegisterOperand vectype, + RegisterOperand listtype, string asm, string kind> + : I<(outs vectype:$dst), (ins vectype:$Vd, listtype:$Vn, vectype:$Vm), asm, + "\t$Vd" # kind # ", $Vn, $Vm" # kind, "$Vd = $dst", []>, + Sched<[WriteV]> { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29-21} = 0b001110000; + let Inst{20-16} = Vm; + let Inst{15} = 0; + let Inst{14-13} = len; + let Inst{12} = op; + let Inst{11-10} = 0b00; + let Inst{9-5} = Vn; + let Inst{4-0} = Vd; +} + +class SIMDTableLookupAlias<string asm, Instruction inst, + RegisterOperand vectype, RegisterOperand listtype> + : InstAlias<!strconcat(asm, "\t$dst, $lst, $index"), + (inst vectype:$dst, listtype:$lst, vectype:$index), 0>; + +multiclass SIMDTableLookup<bit op, string asm> { + def v8i8One : BaseSIMDTableLookup<0, 0b00, op, V64, VecListOne16b, + asm, ".8b">; + def v8i8Two : BaseSIMDTableLookup<0, 0b01, op, V64, VecListTwo16b, + asm, ".8b">; + def v8i8Three : BaseSIMDTableLookup<0, 0b10, op, V64, VecListThree16b, + asm, ".8b">; + def v8i8Four : BaseSIMDTableLookup<0, 0b11, op, V64, VecListFour16b, + asm, ".8b">; + def v16i8One : BaseSIMDTableLookup<1, 0b00, op, V128, VecListOne16b, + asm, ".16b">; + def v16i8Two : BaseSIMDTableLookup<1, 0b01, op, V128, VecListTwo16b, + asm, ".16b">; + def v16i8Three: BaseSIMDTableLookup<1, 0b10, op, V128, VecListThree16b, + asm, ".16b">; + def v16i8Four : BaseSIMDTableLookup<1, 0b11, op, V128, VecListFour16b, + asm, ".16b">; + + def : SIMDTableLookupAlias<asm # ".8b", + !cast<Instruction>(NAME#"v8i8One"), + V64, VecListOne128>; + def : SIMDTableLookupAlias<asm # ".8b", + !cast<Instruction>(NAME#"v8i8Two"), + V64, VecListTwo128>; + def : SIMDTableLookupAlias<asm # ".8b", + !cast<Instruction>(NAME#"v8i8Three"), + V64, VecListThree128>; + def : SIMDTableLookupAlias<asm # ".8b", + !cast<Instruction>(NAME#"v8i8Four"), + V64, VecListFour128>; + def : SIMDTableLookupAlias<asm # ".16b", + !cast<Instruction>(NAME#"v16i8One"), + V128, VecListOne128>; + def : SIMDTableLookupAlias<asm # ".16b", + !cast<Instruction>(NAME#"v16i8Two"), + V128, VecListTwo128>; + def : SIMDTableLookupAlias<asm # ".16b", + !cast<Instruction>(NAME#"v16i8Three"), + V128, VecListThree128>; + def : SIMDTableLookupAlias<asm # ".16b", + !cast<Instruction>(NAME#"v16i8Four"), + V128, VecListFour128>; +} + +multiclass SIMDTableLookupTied<bit op, string asm> { + def v8i8One : BaseSIMDTableLookupTied<0, 0b00, op, V64, VecListOne16b, + asm, ".8b">; + def v8i8Two : BaseSIMDTableLookupTied<0, 0b01, op, V64, VecListTwo16b, + asm, ".8b">; + def v8i8Three : BaseSIMDTableLookupTied<0, 0b10, op, V64, VecListThree16b, + asm, ".8b">; + def v8i8Four : BaseSIMDTableLookupTied<0, 0b11, op, V64, VecListFour16b, + asm, ".8b">; + def v16i8One : BaseSIMDTableLookupTied<1, 0b00, op, V128, VecListOne16b, + asm, ".16b">; + def v16i8Two : BaseSIMDTableLookupTied<1, 0b01, op, V128, VecListTwo16b, + asm, ".16b">; + def v16i8Three: BaseSIMDTableLookupTied<1, 0b10, op, V128, VecListThree16b, + asm, ".16b">; + def v16i8Four : BaseSIMDTableLookupTied<1, 0b11, op, V128, VecListFour16b, + asm, ".16b">; + + def : SIMDTableLookupAlias<asm # ".8b", + !cast<Instruction>(NAME#"v8i8One"), + V64, VecListOne128>; + def : SIMDTableLookupAlias<asm # ".8b", + !cast<Instruction>(NAME#"v8i8Two"), + V64, VecListTwo128>; + def : SIMDTableLookupAlias<asm # ".8b", + !cast<Instruction>(NAME#"v8i8Three"), + V64, VecListThree128>; + def : SIMDTableLookupAlias<asm # ".8b", + !cast<Instruction>(NAME#"v8i8Four"), + V64, VecListFour128>; + def : SIMDTableLookupAlias<asm # ".16b", + !cast<Instruction>(NAME#"v16i8One"), + V128, VecListOne128>; + def : SIMDTableLookupAlias<asm # ".16b", + !cast<Instruction>(NAME#"v16i8Two"), + V128, VecListTwo128>; + def : SIMDTableLookupAlias<asm # ".16b", + !cast<Instruction>(NAME#"v16i8Three"), + V128, VecListThree128>; + def : SIMDTableLookupAlias<asm # ".16b", + !cast<Instruction>(NAME#"v16i8Four"), + V128, VecListFour128>; +} + + +//---------------------------------------------------------------------------- +// AdvSIMD scalar CPY +//---------------------------------------------------------------------------- +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDScalarCPY<RegisterClass regtype, RegisterOperand vectype, + string kind, Operand idxtype> + : I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), "mov", + "{\t$dst, $src" # kind # "$idx" # + "|\t$dst, $src$idx}", "", []>, + Sched<[WriteV]> { + bits<5> dst; + bits<5> src; + let Inst{31-21} = 0b01011110000; + let Inst{15-10} = 0b000001; + let Inst{9-5} = src; + let Inst{4-0} = dst; +} + +class SIMDScalarCPYAlias<string asm, string size, Instruction inst, + RegisterClass regtype, RegisterOperand vectype, Operand idxtype> + : InstAlias<asm # "{\t$dst, $src" # size # "$index" # + # "|\t$dst, $src$index}", + (inst regtype:$dst, vectype:$src, idxtype:$index), 0>; + + +multiclass SIMDScalarCPY<string asm> { + def i8 : BaseSIMDScalarCPY<FPR8, V128, ".b", VectorIndexB> { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; + } + def i16 : BaseSIMDScalarCPY<FPR16, V128, ".h", VectorIndexH> { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + } + def i32 : BaseSIMDScalarCPY<FPR32, V128, ".s", VectorIndexS> { + bits<2> idx; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; + } + def i64 : BaseSIMDScalarCPY<FPR64, V128, ".d", VectorIndexD> { + bits<1> idx; + let Inst{20} = idx; + let Inst{19-16} = 0b1000; + } + + def : Pat<(v1i64 (scalar_to_vector (i64 (vector_extract (v2i64 V128:$src), + VectorIndexD:$idx)))), + (!cast<Instruction>(NAME # i64) V128:$src, VectorIndexD:$idx)>; + + // 'DUP' mnemonic aliases. + def : SIMDScalarCPYAlias<"dup", ".b", + !cast<Instruction>(NAME#"i8"), + FPR8, V128, VectorIndexB>; + def : SIMDScalarCPYAlias<"dup", ".h", + !cast<Instruction>(NAME#"i16"), + FPR16, V128, VectorIndexH>; + def : SIMDScalarCPYAlias<"dup", ".s", + !cast<Instruction>(NAME#"i32"), + FPR32, V128, VectorIndexS>; + def : SIMDScalarCPYAlias<"dup", ".d", + !cast<Instruction>(NAME#"i64"), + FPR64, V128, VectorIndexD>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD modified immediate instructions +//---------------------------------------------------------------------------- + +class BaseSIMDModifiedImm<bit Q, bit op, bit op2, dag oops, dag iops, + string asm, string op_string, + string cstr, list<dag> pattern> + : I<oops, iops, asm, op_string, cstr, pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<8> imm8; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = op; + let Inst{28-19} = 0b0111100000; + let Inst{18-16} = imm8{7-5}; + let Inst{11} = op2; + let Inst{10} = 1; + let Inst{9-5} = imm8{4-0}; + let Inst{4-0} = Rd; +} + +class BaseSIMDModifiedImmVector<bit Q, bit op, bit op2, RegisterOperand vectype, + Operand immtype, dag opt_shift_iop, + string opt_shift, string asm, string kind, + list<dag> pattern> + : BaseSIMDModifiedImm<Q, op, op2, (outs vectype:$Rd), + !con((ins immtype:$imm8), opt_shift_iop), asm, + "{\t$Rd" # kind # ", $imm8" # opt_shift # + "|" # kind # "\t$Rd, $imm8" # opt_shift # "}", + "", pattern> { + let DecoderMethod = "DecodeModImmInstruction"; +} + +class BaseSIMDModifiedImmVectorTied<bit Q, bit op, RegisterOperand vectype, + Operand immtype, dag opt_shift_iop, + string opt_shift, string asm, string kind, + list<dag> pattern> + : BaseSIMDModifiedImm<Q, op, 0, (outs vectype:$dst), + !con((ins vectype:$Rd, immtype:$imm8), opt_shift_iop), + asm, "{\t$Rd" # kind # ", $imm8" # opt_shift # + "|" # kind # "\t$Rd, $imm8" # opt_shift # "}", + "$Rd = $dst", pattern> { + let DecoderMethod = "DecodeModImmTiedInstruction"; +} + +class BaseSIMDModifiedImmVectorShift<bit Q, bit op, bits<2> b15_b12, + RegisterOperand vectype, string asm, + string kind, list<dag> pattern> + : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255, + (ins logical_vec_shift:$shift), + "$shift", asm, kind, pattern> { + bits<2> shift; + let Inst{15} = b15_b12{1}; + let Inst{14-13} = shift; + let Inst{12} = b15_b12{0}; +} + +class BaseSIMDModifiedImmVectorShiftTied<bit Q, bit op, bits<2> b15_b12, + RegisterOperand vectype, string asm, + string kind, list<dag> pattern> + : BaseSIMDModifiedImmVectorTied<Q, op, vectype, imm0_255, + (ins logical_vec_shift:$shift), + "$shift", asm, kind, pattern> { + bits<2> shift; + let Inst{15} = b15_b12{1}; + let Inst{14-13} = shift; + let Inst{12} = b15_b12{0}; +} + + +class BaseSIMDModifiedImmVectorShiftHalf<bit Q, bit op, bits<2> b15_b12, + RegisterOperand vectype, string asm, + string kind, list<dag> pattern> + : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255, + (ins logical_vec_hw_shift:$shift), + "$shift", asm, kind, pattern> { + bits<2> shift; + let Inst{15} = b15_b12{1}; + let Inst{14} = 0; + let Inst{13} = shift{0}; + let Inst{12} = b15_b12{0}; +} + +class BaseSIMDModifiedImmVectorShiftHalfTied<bit Q, bit op, bits<2> b15_b12, + RegisterOperand vectype, string asm, + string kind, list<dag> pattern> + : BaseSIMDModifiedImmVectorTied<Q, op, vectype, imm0_255, + (ins logical_vec_hw_shift:$shift), + "$shift", asm, kind, pattern> { + bits<2> shift; + let Inst{15} = b15_b12{1}; + let Inst{14} = 0; + let Inst{13} = shift{0}; + let Inst{12} = b15_b12{0}; +} + +multiclass SIMDModifiedImmVectorShift<bit op, bits<2> hw_cmode, bits<2> w_cmode, + string asm> { + def v4i16 : BaseSIMDModifiedImmVectorShiftHalf<0, op, hw_cmode, V64, + asm, ".4h", []>; + def v8i16 : BaseSIMDModifiedImmVectorShiftHalf<1, op, hw_cmode, V128, + asm, ".8h", []>; + + def v2i32 : BaseSIMDModifiedImmVectorShift<0, op, w_cmode, V64, + asm, ".2s", []>; + def v4i32 : BaseSIMDModifiedImmVectorShift<1, op, w_cmode, V128, + asm, ".4s", []>; +} + +multiclass SIMDModifiedImmVectorShiftTied<bit op, bits<2> hw_cmode, + bits<2> w_cmode, string asm, + SDNode OpNode> { + def v4i16 : BaseSIMDModifiedImmVectorShiftHalfTied<0, op, hw_cmode, V64, + asm, ".4h", + [(set (v4i16 V64:$dst), (OpNode V64:$Rd, + imm0_255:$imm8, + (i32 imm:$shift)))]>; + def v8i16 : BaseSIMDModifiedImmVectorShiftHalfTied<1, op, hw_cmode, V128, + asm, ".8h", + [(set (v8i16 V128:$dst), (OpNode V128:$Rd, + imm0_255:$imm8, + (i32 imm:$shift)))]>; + + def v2i32 : BaseSIMDModifiedImmVectorShiftTied<0, op, w_cmode, V64, + asm, ".2s", + [(set (v2i32 V64:$dst), (OpNode V64:$Rd, + imm0_255:$imm8, + (i32 imm:$shift)))]>; + def v4i32 : BaseSIMDModifiedImmVectorShiftTied<1, op, w_cmode, V128, + asm, ".4s", + [(set (v4i32 V128:$dst), (OpNode V128:$Rd, + imm0_255:$imm8, + (i32 imm:$shift)))]>; +} + +class SIMDModifiedImmMoveMSL<bit Q, bit op, bits<4> cmode, + RegisterOperand vectype, string asm, + string kind, list<dag> pattern> + : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255, + (ins move_vec_shift:$shift), + "$shift", asm, kind, pattern> { + bits<1> shift; + let Inst{15-13} = cmode{3-1}; + let Inst{12} = shift; +} + +class SIMDModifiedImmVectorNoShift<bit Q, bit op, bit op2, bits<4> cmode, + RegisterOperand vectype, + Operand imm_type, string asm, + string kind, list<dag> pattern> + : BaseSIMDModifiedImmVector<Q, op, op2, vectype, imm_type, (ins), "", + asm, kind, pattern> { + let Inst{15-12} = cmode; +} + +class SIMDModifiedImmScalarNoShift<bit Q, bit op, bits<4> cmode, string asm, + list<dag> pattern> + : BaseSIMDModifiedImm<Q, op, 0, (outs FPR64:$Rd), (ins simdimmtype10:$imm8), asm, + "\t$Rd, $imm8", "", pattern> { + let Inst{15-12} = cmode; + let DecoderMethod = "DecodeModImmInstruction"; +} + +//---------------------------------------------------------------------------- +// AdvSIMD indexed element +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDIndexed<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc, + RegisterOperand dst_reg, RegisterOperand lhs_reg, + RegisterOperand rhs_reg, Operand vec_idx, string asm, + string apple_kind, string dst_kind, string lhs_kind, + string rhs_kind, list<dag> pattern> + : I<(outs dst_reg:$Rd), (ins lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), + asm, + "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" # + "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28} = Scalar; + let Inst{27-24} = 0b1111; + let Inst{23-22} = size; + // Bit 21 must be set by the derived class. + let Inst{20-16} = Rm; + let Inst{15-12} = opc; + // Bit 11 must be set by the derived class. + let Inst{10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc, + RegisterOperand dst_reg, RegisterOperand lhs_reg, + RegisterOperand rhs_reg, Operand vec_idx, string asm, + string apple_kind, string dst_kind, string lhs_kind, + string rhs_kind, list<dag> pattern> + : I<(outs dst_reg:$dst), + (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), asm, + "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" # + "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28} = Scalar; + let Inst{27-24} = 0b1111; + let Inst{23-22} = size; + // Bit 21 must be set by the derived class. + let Inst{20-16} = Rm; + let Inst{15-12} = opc; + // Bit 11 must be set by the derived class. + let Inst{10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +// ARMv8.2 Index Dot product instructions +class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind, + string lhs_kind, string rhs_kind, + RegisterOperand RegType, + ValueType AccumType, ValueType InputType, + SDPatternOperator OpNode> : + BaseSIMDIndexedTied<Q, U, 0b0, 0b10, 0b1110, RegType, RegType, V128, + VectorIndexS, asm, "", dst_kind, lhs_kind, rhs_kind, + [(set (AccumType RegType:$dst), + (AccumType (OpNode (AccumType RegType:$Rd), + (InputType RegType:$Rn), + (InputType (bitconvert (AccumType + (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx)))))))]> { + bits<2> idx; + let Inst{21} = idx{0}; // L + let Inst{11} = idx{1}; // H +} + +multiclass SIMDThreeSameVectorDotIndex<bit U, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVectorDotIndex<0, U, asm, ".2s", ".8b", ".4b", V64, + v2i32, v8i8, OpNode>; + def v16i8 : BaseSIMDThreeSameVectorDotIndex<1, U, asm, ".4s", ".16b", ".4b", V128, + v4i32, v16i8, OpNode>; +} + +multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm, + SDPatternOperator OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b00, opc, + V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4f16 V64:$Rd), + (OpNode (v4f16 V64:$Rn), + (v4f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b00, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8f16 V128:$Rd), + (OpNode (v8f16 V128:$Rn), + (v8f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] + + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, + V64, V64, + V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2f32 V64:$Rd), + (OpNode (v2f32 V64:$Rn), + (v2f32 (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4f32 V128:$Rd), + (OpNode (v4f32 V128:$Rn), + (v4f32 (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v2i64_indexed : BaseSIMDIndexed<1, U, 0, 0b11, opc, + V128, V128, + V128, VectorIndexD, + asm, ".2d", ".2d", ".2d", ".d", + [(set (v2f64 V128:$Rd), + (OpNode (v2f64 V128:$Rn), + (v2f64 (AArch64duplane64 (v2f64 V128:$Rm), VectorIndexD:$idx))))]> { + bits<1> idx; + let Inst{11} = idx{0}; + let Inst{21} = 0; + } + + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b00, opc, + FPR16Op, FPR16Op, V128_lo, VectorIndexH, + asm, ".h", "", "", ".h", + [(set (f16 FPR16Op:$Rd), + (OpNode (f16 FPR16Op:$Rn), + (f16 (vector_extract (v8f16 V128_lo:$Rm), + VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] + + def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, + FPR32Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", + [(set (f32 FPR32Op:$Rd), + (OpNode (f32 FPR32Op:$Rn), + (f32 (vector_extract (v4f32 V128:$Rm), + VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b11, opc, + FPR64Op, FPR64Op, V128, VectorIndexD, + asm, ".d", "", "", ".d", + [(set (f64 FPR64Op:$Rd), + (OpNode (f64 FPR64Op:$Rn), + (f64 (vector_extract (v2f64 V128:$Rm), + VectorIndexD:$idx))))]> { + bits<1> idx; + let Inst{11} = idx{0}; + let Inst{21} = 0; + } +} + +multiclass SIMDFPIndexedTiedPatterns<string INST, SDPatternOperator OpNode> { + // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar. + def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), + (AArch64duplane32 (v4f32 V128:$Rm), + VectorIndexS:$idx))), + (!cast<Instruction>(INST # v2i32_indexed) + V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), + (AArch64dup (f32 FPR32Op:$Rm)))), + (!cast<Instruction>(INST # "v2i32_indexed") V64:$Rd, V64:$Rn, + (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; + + + // 2 variants for the .4s version: DUPLANE from 128-bit and DUP scalar. + def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), + (AArch64duplane32 (v4f32 V128:$Rm), + VectorIndexS:$idx))), + (!cast<Instruction>(INST # "v4i32_indexed") + V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), + (AArch64dup (f32 FPR32Op:$Rm)))), + (!cast<Instruction>(INST # "v4i32_indexed") V128:$Rd, V128:$Rn, + (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; + + // 2 variants for the .2d version: DUPLANE from 128-bit and DUP scalar. + def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), + (AArch64duplane64 (v2f64 V128:$Rm), + VectorIndexD:$idx))), + (!cast<Instruction>(INST # "v2i64_indexed") + V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), + (AArch64dup (f64 FPR64Op:$Rm)))), + (!cast<Instruction>(INST # "v2i64_indexed") V128:$Rd, V128:$Rn, + (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; + + // 2 variants for 32-bit scalar version: extract from .2s or from .4s + def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), + (vector_extract (v4f32 V128:$Rm), VectorIndexS:$idx))), + (!cast<Instruction>(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn, + V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), + (vector_extract (v2f32 V64:$Rm), VectorIndexS:$idx))), + (!cast<Instruction>(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn, + (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; + + // 1 variant for 64-bit scalar version: extract from .1d or from .2d + def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), + (vector_extract (v2f64 V128:$Rm), VectorIndexD:$idx))), + (!cast<Instruction>(INST # "v1i64_indexed") FPR64:$Rd, FPR64:$Rn, + V128:$Rm, VectorIndexD:$idx)>; +} + +multiclass SIMDFPIndexedTied<bit U, bits<4> opc, string asm> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b00, opc, V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b00, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64, + V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", []> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", []> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v2i64_indexed : BaseSIMDIndexedTied<1, U, 0, 0b11, opc, + V128, V128, + V128, VectorIndexD, + asm, ".2d", ".2d", ".2d", ".d", []> { + bits<1> idx; + let Inst{11} = idx{0}; + let Inst{21} = 0; + } + + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b00, opc, + FPR16Op, FPR16Op, V128_lo, VectorIndexH, + asm, ".h", "", "", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] + + def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, + FPR32Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", []> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b11, opc, + FPR64Op, FPR64Op, V128, VectorIndexD, + asm, ".d", "", "", ".d", []> { + bits<1> idx; + let Inst{11} = idx{0}; + let Inst{21} = 0; + } +} + +multiclass SIMDIndexedHS<bit U, bits<4> opc, string asm, + SDPatternOperator OpNode> { + def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4i16 V64:$Rd), + (OpNode (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8i16 V128:$Rd), + (OpNode (v8i16 V128:$Rn), + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, + V64, V64, + V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2i32 V64:$Rd), + (OpNode (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4i32 V128:$Rd), + (OpNode (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc, + FPR16Op, FPR16Op, V128_lo, VectorIndexH, + asm, ".h", "", "", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, + FPR32Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", + [(set (i32 FPR32Op:$Rd), + (OpNode FPR32Op:$Rn, + (i32 (vector_extract (v4i32 V128:$Rm), + VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} + +multiclass SIMDVectorIndexedHS<bit U, bits<4> opc, string asm, + SDPatternOperator OpNode> { + def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, + V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4i16 V64:$Rd), + (OpNode (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8i16 V128:$Rd), + (OpNode (v8i16 V128:$Rn), + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, + V64, V64, + V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2i32 V64:$Rd), + (OpNode (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4i32 V128:$Rd), + (OpNode (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} + +multiclass SIMDVectorIndexedHSTied<bit U, bits<4> opc, string asm, + SDPatternOperator OpNode> { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4i16 V64:$dst), + (OpNode (v4i16 V64:$Rd),(v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8i16 V128:$dst), + (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, + V64, V64, + V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2i32 V64:$dst), + (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} + +multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm, + SDPatternOperator OpNode> { + def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, + V128, V64, + V128_lo, VectorIndexH, + asm, ".4s", ".4s", ".4h", ".h", + [(set (v4i32 V128:$Rd), + (OpNode (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm#"2", ".4s", ".4s", ".8h", ".h", + [(set (v4i32 V128:$Rd), + (OpNode (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))]> { + + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, + V128, V64, + V128, VectorIndexS, + asm, ".2d", ".2d", ".2s", ".s", + [(set (v2i64 V128:$Rd), + (OpNode (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm#"2", ".2d", ".2d", ".4s", ".s", + [(set (v2i64 V128:$Rd), + (OpNode (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc, + FPR32Op, FPR16Op, V128_lo, VectorIndexH, + asm, ".h", "", "", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, + FPR64Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", []> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} + +multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm, + SDPatternOperator Accum> { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, + V128, V64, + V128_lo, VectorIndexH, + asm, ".4s", ".4s", ".4h", ".h", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqdmull + (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + // FIXME: it would be nice to use the scalar (v1i32) instruction here, but an + // intermediate EXTRACT_SUBREG would be untyped. + def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), + (i32 (vector_extract (v4i32 + (int_aarch64_neon_sqdmull (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx)))), + (i64 0))))), + (EXTRACT_SUBREG + (!cast<Instruction>(NAME # v4i16_indexed) + (SUBREG_TO_REG (i32 0), FPR32Op:$Rd, ssub), V64:$Rn, + V128_lo:$Rm, VectorIndexH:$idx), + ssub)>; + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm#"2", ".4s", ".4s", ".8h", ".h", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqdmull + (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 + (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, + V128, V64, + V128, VectorIndexS, + asm, ".2d", ".2d", ".2s", ".s", + [(set (v2i64 V128:$dst), + (Accum (v2i64 V128:$Rd), + (v2i64 (int_aarch64_neon_sqdmull + (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm#"2", ".2d", ".2d", ".4s", ".s", + [(set (v2i64 V128:$dst), + (Accum (v2i64 V128:$Rd), + (v2i64 (int_aarch64_neon_sqdmull + (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 + (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc, + FPR32Op, FPR16Op, V128_lo, VectorIndexH, + asm, ".h", "", "", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + + def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, + FPR64Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", + [(set (i64 FPR64Op:$dst), + (Accum (i64 FPR64Op:$Rd), + (i64 (int_aarch64_neon_sqdmulls_scalar + (i32 FPR32Op:$Rn), + (i32 (vector_extract (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} + +multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm, + SDPatternOperator OpNode> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, + V128, V64, + V128_lo, VectorIndexH, + asm, ".4s", ".4s", ".4h", ".h", + [(set (v4i32 V128:$Rd), + (OpNode (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm#"2", ".4s", ".4s", ".8h", ".h", + [(set (v4i32 V128:$Rd), + (OpNode (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))]> { + + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, + V128, V64, + V128, VectorIndexS, + asm, ".2d", ".2d", ".2s", ".s", + [(set (v2i64 V128:$Rd), + (OpNode (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm#"2", ".2d", ".2d", ".4s", ".s", + [(set (v2i64 V128:$Rd), + (OpNode (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + } +} + +multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm, + SDPatternOperator OpNode> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, + V128, V64, + V128_lo, VectorIndexH, + asm, ".4s", ".4s", ".4h", ".h", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm#"2", ".4s", ".4s", ".8h", ".h", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), + (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, + V128, V64, + V128, VectorIndexS, + asm, ".2d", ".2d", ".2s", ".s", + [(set (v2i64 V128:$dst), + (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm#"2", ".2d", ".2d", ".4s", ".s", + [(set (v2i64 V128:$dst), + (OpNode (v2i64 V128:$Rd), + (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + } +} + +//---------------------------------------------------------------------------- +// AdvSIMD scalar shift by immediate +//---------------------------------------------------------------------------- + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDScalarShift<bit U, bits<5> opc, bits<7> fixed_imm, + RegisterClass regtype1, RegisterClass regtype2, + Operand immtype, string asm, list<dag> pattern> + : I<(outs regtype1:$Rd), (ins regtype2:$Rn, immtype:$imm), + asm, "\t$Rd, $Rn, $imm", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<7> imm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-23} = 0b111110; + let Inst{22-16} = fixed_imm; + let Inst{15-11} = opc; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm, + RegisterClass regtype1, RegisterClass regtype2, + Operand immtype, string asm, list<dag> pattern> + : I<(outs regtype1:$dst), (ins regtype1:$Rd, regtype2:$Rn, immtype:$imm), + asm, "\t$Rd, $Rn, $imm", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<7> imm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-23} = 0b111110; + let Inst{22-16} = fixed_imm; + let Inst{15-11} = opc; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + + +multiclass SIMDFPScalarRShift<bit U, bits<5> opc, string asm> { + let Predicates = [HasNEON, HasFullFP16] in { + def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?}, + FPR16, FPR16, vecshiftR16, asm, []> { + let Inst{19-16} = imm{3-0}; + } + } // Predicates = [HasNEON, HasFullFP16] + def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?}, + FPR32, FPR32, vecshiftR32, asm, []> { + let Inst{20-16} = imm{4-0}; + } + def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?}, + FPR64, FPR64, vecshiftR64, asm, []> { + let Inst{21-16} = imm{5-0}; + } +} + +multiclass SIMDScalarRShiftD<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?}, + FPR64, FPR64, vecshiftR64, asm, + [(set (i64 FPR64:$Rd), + (OpNode (i64 FPR64:$Rn), (i32 vecshiftR64:$imm)))]> { + let Inst{21-16} = imm{5-0}; + } + + def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftR64:$imm))), + (!cast<Instruction>(NAME # "d") FPR64:$Rn, vecshiftR64:$imm)>; +} + +multiclass SIMDScalarRShiftDTied<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode = null_frag> { + def d : BaseSIMDScalarShiftTied<U, opc, {1,?,?,?,?,?,?}, + FPR64, FPR64, vecshiftR64, asm, + [(set (i64 FPR64:$dst), (OpNode (i64 FPR64:$Rd), (i64 FPR64:$Rn), + (i32 vecshiftR64:$imm)))]> { + let Inst{21-16} = imm{5-0}; + } + + def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), + (i32 vecshiftR64:$imm))), + (!cast<Instruction>(NAME # "d") FPR64:$Rd, FPR64:$Rn, + vecshiftR64:$imm)>; +} + +multiclass SIMDScalarLShiftD<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?}, + FPR64, FPR64, vecshiftL64, asm, + [(set (v1i64 FPR64:$Rd), + (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm)))]> { + let Inst{21-16} = imm{5-0}; + } +} + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +multiclass SIMDScalarLShiftDTied<bit U, bits<5> opc, string asm> { + def d : BaseSIMDScalarShiftTied<U, opc, {1,?,?,?,?,?,?}, + FPR64, FPR64, vecshiftL64, asm, []> { + let Inst{21-16} = imm{5-0}; + } +} + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +multiclass SIMDScalarRShiftBHS<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode = null_frag> { + def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?}, + FPR8, FPR16, vecshiftR8, asm, []> { + let Inst{18-16} = imm{2-0}; + } + + def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?}, + FPR16, FPR32, vecshiftR16, asm, []> { + let Inst{19-16} = imm{3-0}; + } + + def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?}, + FPR32, FPR64, vecshiftR32, asm, + [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn), vecshiftR32:$imm))]> { + let Inst{20-16} = imm{4-0}; + } +} + +multiclass SIMDScalarLShiftBHSD<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?}, + FPR8, FPR8, vecshiftL8, asm, []> { + let Inst{18-16} = imm{2-0}; + } + + def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?}, + FPR16, FPR16, vecshiftL16, asm, []> { + let Inst{19-16} = imm{3-0}; + } + + def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?}, + FPR32, FPR32, vecshiftL32, asm, + [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn), (i32 vecshiftL32:$imm)))]> { + let Inst{20-16} = imm{4-0}; + } + + def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?}, + FPR64, FPR64, vecshiftL64, asm, + [(set (i64 FPR64:$Rd), (OpNode (i64 FPR64:$Rn), (i32 vecshiftL64:$imm)))]> { + let Inst{21-16} = imm{5-0}; + } + + def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm))), + (!cast<Instruction>(NAME # "d") FPR64:$Rn, vecshiftL64:$imm)>; +} + +multiclass SIMDScalarRShiftBHSD<bit U, bits<5> opc, string asm> { + def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?}, + FPR8, FPR8, vecshiftR8, asm, []> { + let Inst{18-16} = imm{2-0}; + } + + def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?}, + FPR16, FPR16, vecshiftR16, asm, []> { + let Inst{19-16} = imm{3-0}; + } + + def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?}, + FPR32, FPR32, vecshiftR32, asm, []> { + let Inst{20-16} = imm{4-0}; + } + + def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?}, + FPR64, FPR64, vecshiftR64, asm, []> { + let Inst{21-16} = imm{5-0}; + } +} + +//---------------------------------------------------------------------------- +// AdvSIMD vector x indexed element +//---------------------------------------------------------------------------- + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDVectorShift<bit Q, bit U, bits<5> opc, bits<7> fixed_imm, + RegisterOperand dst_reg, RegisterOperand src_reg, + Operand immtype, + string asm, string dst_kind, string src_kind, + list<dag> pattern> + : I<(outs dst_reg:$Rd), (ins src_reg:$Rn, immtype:$imm), + asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" # + "|" # dst_kind # "\t$Rd, $Rn, $imm}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-23} = 0b011110; + let Inst{22-16} = fixed_imm; + let Inst{15-11} = opc; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDVectorShiftTied<bit Q, bit U, bits<5> opc, bits<7> fixed_imm, + RegisterOperand vectype1, RegisterOperand vectype2, + Operand immtype, + string asm, string dst_kind, string src_kind, + list<dag> pattern> + : I<(outs vectype1:$dst), (ins vectype1:$Rd, vectype2:$Rn, immtype:$imm), + asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" # + "|" # dst_kind # "\t$Rd, $Rn, $imm}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-23} = 0b011110; + let Inst{22-16} = fixed_imm; + let Inst{15-11} = opc; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm, + Intrinsic OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftR16, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR16, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + } // Predicates = [HasNEON, HasFullFP16] + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftR32, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 imm:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftR32, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 imm:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftR64, + asm, ".2d", ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 imm:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } +} + +multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm, + Intrinsic OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftR16, + asm, ".4h", ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR16, + asm, ".8h", ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + } // Predicates = [HasNEON, HasFullFP16] + + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftR32, + asm, ".2s", ".2s", + [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 imm:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftR32, + asm, ".4s", ".4s", + [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 imm:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftR64, + asm, ".2d", ".2d", + [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 imm:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } +} + +multiclass SIMDVectorRShiftNarrowBHS<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, + V64, V128, vecshiftR16Narrow, + asm, ".8b", ".8h", + [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftR16Narrow, + asm#"2", ".16b", ".8h", []> { + bits<3> imm; + let Inst{18-16} = imm; + let hasSideEffects = 0; + } + + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V128, vecshiftR32Narrow, + asm, ".4h", ".4s", + [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR32Narrow, + asm#"2", ".8h", ".4s", []> { + bits<4> imm; + let Inst{19-16} = imm; + let hasSideEffects = 0; + } + + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V64, V128, vecshiftR64Narrow, + asm, ".2s", ".2d", + [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftR64Narrow, + asm#"2", ".4s", ".2d", []> { + bits<5> imm; + let Inst{20-16} = imm; + let hasSideEffects = 0; + } + + // TableGen doesn't like patters w/ INSERT_SUBREG on the instructions + // themselves, so put them here instead. + + // Patterns involving what's effectively an insert high and a normal + // intrinsic, represented by CONCAT_VECTORS. + def : Pat<(concat_vectors (v8i8 V64:$Rd),(OpNode (v8i16 V128:$Rn), + vecshiftR16Narrow:$imm)), + (!cast<Instruction>(NAME # "v16i8_shift") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR16Narrow:$imm)>; + def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn), + vecshiftR32Narrow:$imm)), + (!cast<Instruction>(NAME # "v8i16_shift") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR32Narrow:$imm)>; + def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn), + vecshiftR64Narrow:$imm)), + (!cast<Instruction>(NAME # "v4i32_shift") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR64Narrow:$imm)>; +} + +multiclass SIMDVectorLShiftBHSD<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, + V64, V64, vecshiftL8, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), + (i32 vecshiftL8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftL8, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), + (i32 vecshiftL8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftL16, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), + (i32 vecshiftL16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftL16, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), + (i32 vecshiftL16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftL32, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), + (i32 vecshiftL32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftL32, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), + (i32 vecshiftL32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftL64, + asm, ".2d", ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), + (i32 vecshiftL64:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } +} + +multiclass SIMDVectorRShiftBHSD<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, + V64, V64, vecshiftR8, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), + (i32 vecshiftR8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftR8, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), + (i32 vecshiftR8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftR16, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), + (i32 vecshiftR16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR16, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), + (i32 vecshiftR16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftR32, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), + (i32 vecshiftR32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftR32, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), + (i32 vecshiftR32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftR64, + asm, ".2d", ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), + (i32 vecshiftR64:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDVectorRShiftBHSDTied<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?}, + V64, V64, vecshiftR8, asm, ".8b", ".8b", + [(set (v8i8 V64:$dst), + (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), + (i32 vecshiftR8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftR8, asm, ".16b", ".16b", + [(set (v16i8 V128:$dst), + (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), + (i32 vecshiftR8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftR16, asm, ".4h", ".4h", + [(set (v4i16 V64:$dst), + (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), + (i32 vecshiftR16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR16, asm, ".8h", ".8h", + [(set (v8i16 V128:$dst), + (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), + (i32 vecshiftR16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftR32, asm, ".2s", ".2s", + [(set (v2i32 V64:$dst), + (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), + (i32 vecshiftR32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftR32, asm, ".4s", ".4s", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), + (i32 vecshiftR32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftR64, + asm, ".2d", ".2d", [(set (v2i64 V128:$dst), + (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn), + (i32 vecshiftR64:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } +} + +multiclass SIMDVectorLShiftBHSDTied<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?}, + V64, V64, vecshiftL8, + asm, ".8b", ".8b", + [(set (v8i8 V64:$dst), + (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), + (i32 vecshiftL8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftL8, + asm, ".16b", ".16b", + [(set (v16i8 V128:$dst), + (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), + (i32 vecshiftL8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftL16, + asm, ".4h", ".4h", + [(set (v4i16 V64:$dst), + (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), + (i32 vecshiftL16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftL16, + asm, ".8h", ".8h", + [(set (v8i16 V128:$dst), + (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), + (i32 vecshiftL16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftL32, + asm, ".2s", ".2s", + [(set (v2i32 V64:$dst), + (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), + (i32 vecshiftL32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftL32, + asm, ".4s", ".4s", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), + (i32 vecshiftL32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftL64, + asm, ".2d", ".2d", + [(set (v2i64 V128:$dst), + (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn), + (i32 vecshiftL64:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } +} + +multiclass SIMDVectorLShiftLongBHSD<bit U, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, + V128, V64, vecshiftL8, asm, ".8h", ".8b", + [(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), vecshiftL8:$imm))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftL8, + asm#"2", ".8h", ".16b", + [(set (v8i16 V128:$Rd), + (OpNode (extract_high_v16i8 V128:$Rn), vecshiftL8:$imm))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V128, V64, vecshiftL16, asm, ".4s", ".4h", + [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), vecshiftL16:$imm))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftL16, + asm#"2", ".4s", ".8h", + [(set (v4i32 V128:$Rd), + (OpNode (extract_high_v8i16 V128:$Rn), vecshiftL16:$imm))]> { + + bits<4> imm; + let Inst{19-16} = imm; + } + + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V128, V64, vecshiftL32, asm, ".2d", ".2s", + [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), vecshiftL32:$imm))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftL32, + asm#"2", ".2d", ".4s", + [(set (v2i64 V128:$Rd), + (OpNode (extract_high_v4i32 V128:$Rn), vecshiftL32:$imm))]> { + bits<5> imm; + let Inst{20-16} = imm; + } +} + + +//--- +// Vector load/store +//--- +// SIMD ldX/stX no-index memory references don't allow the optional +// ", #0" constant and handle post-indexing explicitly, so we use +// a more specialized parse method for them. Otherwise, it's the same as +// the general GPR64sp handling. + +class BaseSIMDLdSt<bit Q, bit L, bits<4> opcode, bits<2> size, + string asm, dag oops, dag iops, list<dag> pattern> + : I<oops, iops, asm, "\t$Vt, [$Rn]", "", pattern> { + bits<5> Vt; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29-23} = 0b0011000; + let Inst{22} = L; + let Inst{21-16} = 0b000000; + let Inst{15-12} = opcode; + let Inst{11-10} = size; + let Inst{9-5} = Rn; + let Inst{4-0} = Vt; +} + +class BaseSIMDLdStPost<bit Q, bit L, bits<4> opcode, bits<2> size, + string asm, dag oops, dag iops> + : I<oops, iops, asm, "\t$Vt, [$Rn], $Xm", "$Rn = $wback", []> { + bits<5> Vt; + bits<5> Rn; + bits<5> Xm; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29-23} = 0b0011001; + let Inst{22} = L; + let Inst{21} = 0; + let Inst{20-16} = Xm; + let Inst{15-12} = opcode; + let Inst{11-10} = size; + let Inst{9-5} = Rn; + let Inst{4-0} = Vt; +} + +// The immediate form of AdvSIMD post-indexed addressing is encoded with +// register post-index addressing from the zero register. +multiclass SIMDLdStAliases<string BaseName, string asm, string layout, string Count, + int Offset, int Size> { + // E.g. "ld1 { v0.8b, v1.8b }, [x1], #16" + // "ld1\t$Vt, [$Rn], #16" + // may get mapped to + // (LD1Twov8b_POST VecListTwo8b:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias<asm # "\t$Vt, [$Rn], #" # Offset, + (!cast<Instruction>(BaseName # Count # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast<RegisterOperand>("VecList" # Count # layout):$Vt, + XZR), 1>; + + // E.g. "ld1.8b { v0, v1 }, [x1], #16" + // "ld1.8b\t$Vt, [$Rn], #16" + // may get mapped to + // (LD1Twov8b_POST VecListTwo64:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn], #" # Offset, + (!cast<Instruction>(BaseName # Count # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast<RegisterOperand>("VecList" # Count # Size):$Vt, + XZR), 0>; + + // E.g. "ld1.8b { v0, v1 }, [x1]" + // "ld1\t$Vt, [$Rn]" + // may get mapped to + // (LD1Twov8b VecListTwo64:$Vt, GPR64sp:$Rn) + def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn]", + (!cast<Instruction>(BaseName # Count # "v" # layout) + !cast<RegisterOperand>("VecList" # Count # Size):$Vt, + GPR64sp:$Rn), 0>; + + // E.g. "ld1.8b { v0, v1 }, [x1], x2" + // "ld1\t$Vt, [$Rn], $Xm" + // may get mapped to + // (LD1Twov8b_POST VecListTwo64:$Vt, GPR64sp:$Rn, GPR64pi8:$Xm) + def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn], $Xm", + (!cast<Instruction>(BaseName # Count # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast<RegisterOperand>("VecList" # Count # Size):$Vt, + !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>; +} + +multiclass BaseSIMDLdN<string BaseName, string Count, string asm, string veclist, + int Offset128, int Offset64, bits<4> opcode> { + let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { + def v16b: BaseSIMDLdSt<1, 1, opcode, 0b00, asm, + (outs !cast<RegisterOperand>(veclist # "16b"):$Vt), + (ins GPR64sp:$Rn), []>; + def v8h : BaseSIMDLdSt<1, 1, opcode, 0b01, asm, + (outs !cast<RegisterOperand>(veclist # "8h"):$Vt), + (ins GPR64sp:$Rn), []>; + def v4s : BaseSIMDLdSt<1, 1, opcode, 0b10, asm, + (outs !cast<RegisterOperand>(veclist # "4s"):$Vt), + (ins GPR64sp:$Rn), []>; + def v2d : BaseSIMDLdSt<1, 1, opcode, 0b11, asm, + (outs !cast<RegisterOperand>(veclist # "2d"):$Vt), + (ins GPR64sp:$Rn), []>; + def v8b : BaseSIMDLdSt<0, 1, opcode, 0b00, asm, + (outs !cast<RegisterOperand>(veclist # "8b"):$Vt), + (ins GPR64sp:$Rn), []>; + def v4h : BaseSIMDLdSt<0, 1, opcode, 0b01, asm, + (outs !cast<RegisterOperand>(veclist # "4h"):$Vt), + (ins GPR64sp:$Rn), []>; + def v2s : BaseSIMDLdSt<0, 1, opcode, 0b10, asm, + (outs !cast<RegisterOperand>(veclist # "2s"):$Vt), + (ins GPR64sp:$Rn), []>; + + + def v16b_POST: BaseSIMDLdStPost<1, 1, opcode, 0b00, asm, + (outs GPR64sp:$wback, + !cast<RegisterOperand>(veclist # "16b"):$Vt), + (ins GPR64sp:$Rn, + !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>; + def v8h_POST : BaseSIMDLdStPost<1, 1, opcode, 0b01, asm, + (outs GPR64sp:$wback, + !cast<RegisterOperand>(veclist # "8h"):$Vt), + (ins GPR64sp:$Rn, + !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>; + def v4s_POST : BaseSIMDLdStPost<1, 1, opcode, 0b10, asm, + (outs GPR64sp:$wback, + !cast<RegisterOperand>(veclist # "4s"):$Vt), + (ins GPR64sp:$Rn, + !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>; + def v2d_POST : BaseSIMDLdStPost<1, 1, opcode, 0b11, asm, + (outs GPR64sp:$wback, + !cast<RegisterOperand>(veclist # "2d"):$Vt), + (ins GPR64sp:$Rn, + !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>; + def v8b_POST : BaseSIMDLdStPost<0, 1, opcode, 0b00, asm, + (outs GPR64sp:$wback, + !cast<RegisterOperand>(veclist # "8b"):$Vt), + (ins GPR64sp:$Rn, + !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>; + def v4h_POST : BaseSIMDLdStPost<0, 1, opcode, 0b01, asm, + (outs GPR64sp:$wback, + !cast<RegisterOperand>(veclist # "4h"):$Vt), + (ins GPR64sp:$Rn, + !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>; + def v2s_POST : BaseSIMDLdStPost<0, 1, opcode, 0b10, asm, + (outs GPR64sp:$wback, + !cast<RegisterOperand>(veclist # "2s"):$Vt), + (ins GPR64sp:$Rn, + !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>; + } + + defm : SIMDLdStAliases<BaseName, asm, "16b", Count, Offset128, 128>; + defm : SIMDLdStAliases<BaseName, asm, "8h", Count, Offset128, 128>; + defm : SIMDLdStAliases<BaseName, asm, "4s", Count, Offset128, 128>; + defm : SIMDLdStAliases<BaseName, asm, "2d", Count, Offset128, 128>; + defm : SIMDLdStAliases<BaseName, asm, "8b", Count, Offset64, 64>; + defm : SIMDLdStAliases<BaseName, asm, "4h", Count, Offset64, 64>; + defm : SIMDLdStAliases<BaseName, asm, "2s", Count, Offset64, 64>; +} + +// Only ld1/st1 has a v1d version. +multiclass BaseSIMDStN<string BaseName, string Count, string asm, string veclist, + int Offset128, int Offset64, bits<4> opcode> { + let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in { + def v16b : BaseSIMDLdSt<1, 0, opcode, 0b00, asm, (outs), + (ins !cast<RegisterOperand>(veclist # "16b"):$Vt, + GPR64sp:$Rn), []>; + def v8h : BaseSIMDLdSt<1, 0, opcode, 0b01, asm, (outs), + (ins !cast<RegisterOperand>(veclist # "8h"):$Vt, + GPR64sp:$Rn), []>; + def v4s : BaseSIMDLdSt<1, 0, opcode, 0b10, asm, (outs), + (ins !cast<RegisterOperand>(veclist # "4s"):$Vt, + GPR64sp:$Rn), []>; + def v2d : BaseSIMDLdSt<1, 0, opcode, 0b11, asm, (outs), + (ins !cast<RegisterOperand>(veclist # "2d"):$Vt, + GPR64sp:$Rn), []>; + def v8b : BaseSIMDLdSt<0, 0, opcode, 0b00, asm, (outs), + (ins !cast<RegisterOperand>(veclist # "8b"):$Vt, + GPR64sp:$Rn), []>; + def v4h : BaseSIMDLdSt<0, 0, opcode, 0b01, asm, (outs), + (ins !cast<RegisterOperand>(veclist # "4h"):$Vt, + GPR64sp:$Rn), []>; + def v2s : BaseSIMDLdSt<0, 0, opcode, 0b10, asm, (outs), + (ins !cast<RegisterOperand>(veclist # "2s"):$Vt, + GPR64sp:$Rn), []>; + + def v16b_POST : BaseSIMDLdStPost<1, 0, opcode, 0b00, asm, + (outs GPR64sp:$wback), + (ins !cast<RegisterOperand>(veclist # "16b"):$Vt, + GPR64sp:$Rn, + !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>; + def v8h_POST : BaseSIMDLdStPost<1, 0, opcode, 0b01, asm, + (outs GPR64sp:$wback), + (ins !cast<RegisterOperand>(veclist # "8h"):$Vt, + GPR64sp:$Rn, + !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>; + def v4s_POST : BaseSIMDLdStPost<1, 0, opcode, 0b10, asm, + (outs GPR64sp:$wback), + (ins !cast<RegisterOperand>(veclist # "4s"):$Vt, + GPR64sp:$Rn, + !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>; + def v2d_POST : BaseSIMDLdStPost<1, 0, opcode, 0b11, asm, + (outs GPR64sp:$wback), + (ins !cast<RegisterOperand>(veclist # "2d"):$Vt, + GPR64sp:$Rn, + !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>; + def v8b_POST : BaseSIMDLdStPost<0, 0, opcode, 0b00, asm, + (outs GPR64sp:$wback), + (ins !cast<RegisterOperand>(veclist # "8b"):$Vt, + GPR64sp:$Rn, + !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>; + def v4h_POST : BaseSIMDLdStPost<0, 0, opcode, 0b01, asm, + (outs GPR64sp:$wback), + (ins !cast<RegisterOperand>(veclist # "4h"):$Vt, + GPR64sp:$Rn, + !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>; + def v2s_POST : BaseSIMDLdStPost<0, 0, opcode, 0b10, asm, + (outs GPR64sp:$wback), + (ins !cast<RegisterOperand>(veclist # "2s"):$Vt, + GPR64sp:$Rn, + !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>; + } + + defm : SIMDLdStAliases<BaseName, asm, "16b", Count, Offset128, 128>; + defm : SIMDLdStAliases<BaseName, asm, "8h", Count, Offset128, 128>; + defm : SIMDLdStAliases<BaseName, asm, "4s", Count, Offset128, 128>; + defm : SIMDLdStAliases<BaseName, asm, "2d", Count, Offset128, 128>; + defm : SIMDLdStAliases<BaseName, asm, "8b", Count, Offset64, 64>; + defm : SIMDLdStAliases<BaseName, asm, "4h", Count, Offset64, 64>; + defm : SIMDLdStAliases<BaseName, asm, "2s", Count, Offset64, 64>; +} + +multiclass BaseSIMDLd1<string BaseName, string Count, string asm, string veclist, + int Offset128, int Offset64, bits<4> opcode> + : BaseSIMDLdN<BaseName, Count, asm, veclist, Offset128, Offset64, opcode> { + + // LD1 instructions have extra "1d" variants. + let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { + def v1d : BaseSIMDLdSt<0, 1, opcode, 0b11, asm, + (outs !cast<RegisterOperand>(veclist # "1d"):$Vt), + (ins GPR64sp:$Rn), []>; + + def v1d_POST : BaseSIMDLdStPost<0, 1, opcode, 0b11, asm, + (outs GPR64sp:$wback, + !cast<RegisterOperand>(veclist # "1d"):$Vt), + (ins GPR64sp:$Rn, + !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>; + } + + defm : SIMDLdStAliases<BaseName, asm, "1d", Count, Offset64, 64>; +} + +multiclass BaseSIMDSt1<string BaseName, string Count, string asm, string veclist, + int Offset128, int Offset64, bits<4> opcode> + : BaseSIMDStN<BaseName, Count, asm, veclist, Offset128, Offset64, opcode> { + + // ST1 instructions have extra "1d" variants. + let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { + def v1d : BaseSIMDLdSt<0, 0, opcode, 0b11, asm, (outs), + (ins !cast<RegisterOperand>(veclist # "1d"):$Vt, + GPR64sp:$Rn), []>; + + def v1d_POST : BaseSIMDLdStPost<0, 0, opcode, 0b11, asm, + (outs GPR64sp:$wback), + (ins !cast<RegisterOperand>(veclist # "1d"):$Vt, + GPR64sp:$Rn, + !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>; + } + + defm : SIMDLdStAliases<BaseName, asm, "1d", Count, Offset64, 64>; +} + +multiclass SIMDLd1Multiple<string asm> { + defm One : BaseSIMDLd1<NAME, "One", asm, "VecListOne", 16, 8, 0b0111>; + defm Two : BaseSIMDLd1<NAME, "Two", asm, "VecListTwo", 32, 16, 0b1010>; + defm Three : BaseSIMDLd1<NAME, "Three", asm, "VecListThree", 48, 24, 0b0110>; + defm Four : BaseSIMDLd1<NAME, "Four", asm, "VecListFour", 64, 32, 0b0010>; +} + +multiclass SIMDSt1Multiple<string asm> { + defm One : BaseSIMDSt1<NAME, "One", asm, "VecListOne", 16, 8, 0b0111>; + defm Two : BaseSIMDSt1<NAME, "Two", asm, "VecListTwo", 32, 16, 0b1010>; + defm Three : BaseSIMDSt1<NAME, "Three", asm, "VecListThree", 48, 24, 0b0110>; + defm Four : BaseSIMDSt1<NAME, "Four", asm, "VecListFour", 64, 32, 0b0010>; +} + +multiclass SIMDLd2Multiple<string asm> { + defm Two : BaseSIMDLdN<NAME, "Two", asm, "VecListTwo", 32, 16, 0b1000>; +} + +multiclass SIMDSt2Multiple<string asm> { + defm Two : BaseSIMDStN<NAME, "Two", asm, "VecListTwo", 32, 16, 0b1000>; +} + +multiclass SIMDLd3Multiple<string asm> { + defm Three : BaseSIMDLdN<NAME, "Three", asm, "VecListThree", 48, 24, 0b0100>; +} + +multiclass SIMDSt3Multiple<string asm> { + defm Three : BaseSIMDStN<NAME, "Three", asm, "VecListThree", 48, 24, 0b0100>; +} + +multiclass SIMDLd4Multiple<string asm> { + defm Four : BaseSIMDLdN<NAME, "Four", asm, "VecListFour", 64, 32, 0b0000>; +} + +multiclass SIMDSt4Multiple<string asm> { + defm Four : BaseSIMDStN<NAME, "Four", asm, "VecListFour", 64, 32, 0b0000>; +} + +//--- +// AdvSIMD Load/store single-element +//--- + +class BaseSIMDLdStSingle<bit L, bit R, bits<3> opcode, + string asm, string operands, string cst, + dag oops, dag iops, list<dag> pattern> + : I<oops, iops, asm, operands, cst, pattern> { + bits<5> Vt; + bits<5> Rn; + let Inst{31} = 0; + let Inst{29-24} = 0b001101; + let Inst{22} = L; + let Inst{21} = R; + let Inst{15-13} = opcode; + let Inst{9-5} = Rn; + let Inst{4-0} = Vt; +} + +class BaseSIMDLdStSingleTied<bit L, bit R, bits<3> opcode, + string asm, string operands, string cst, + dag oops, dag iops, list<dag> pattern> + : I<oops, iops, asm, operands, "$Vt = $dst," # cst, pattern> { + bits<5> Vt; + bits<5> Rn; + let Inst{31} = 0; + let Inst{29-24} = 0b001101; + let Inst{22} = L; + let Inst{21} = R; + let Inst{15-13} = opcode; + let Inst{9-5} = Rn; + let Inst{4-0} = Vt; +} + + +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDLdR<bit Q, bit R, bits<3> opcode, bit S, bits<2> size, string asm, + DAGOperand listtype> + : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, [$Rn]", "", + (outs listtype:$Vt), (ins GPR64sp:$Rn), + []> { + let Inst{30} = Q; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = S; + let Inst{11-10} = size; +} +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDLdRPost<bit Q, bit R, bits<3> opcode, bit S, bits<2> size, + string asm, DAGOperand listtype, DAGOperand GPR64pi> + : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, [$Rn], $Xm", + "$Rn = $wback", + (outs GPR64sp:$wback, listtype:$Vt), + (ins GPR64sp:$Rn, GPR64pi:$Xm), []> { + bits<5> Xm; + let Inst{30} = Q; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = S; + let Inst{11-10} = size; +} + +multiclass SIMDLdrAliases<string BaseName, string asm, string layout, string Count, + int Offset, int Size> { + // E.g. "ld1r { v0.8b }, [x1], #1" + // "ld1r.8b\t$Vt, [$Rn], #1" + // may get mapped to + // (LD1Rv8b_POST VecListOne8b:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias<asm # "\t$Vt, [$Rn], #" # Offset, + (!cast<Instruction>(BaseName # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast<RegisterOperand>("VecList" # Count # layout):$Vt, + XZR), 1>; + + // E.g. "ld1r.8b { v0 }, [x1], #1" + // "ld1r.8b\t$Vt, [$Rn], #1" + // may get mapped to + // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn], #" # Offset, + (!cast<Instruction>(BaseName # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast<RegisterOperand>("VecList" # Count # Size):$Vt, + XZR), 0>; + + // E.g. "ld1r.8b { v0 }, [x1]" + // "ld1r.8b\t$Vt, [$Rn]" + // may get mapped to + // (LD1Rv8b VecListOne64:$Vt, GPR64sp:$Rn) + def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn]", + (!cast<Instruction>(BaseName # "v" # layout) + !cast<RegisterOperand>("VecList" # Count # Size):$Vt, + GPR64sp:$Rn), 0>; + + // E.g. "ld1r.8b { v0 }, [x1], x2" + // "ld1r.8b\t$Vt, [$Rn], $Xm" + // may get mapped to + // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, GPR64pi1:$Xm) + def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn], $Xm", + (!cast<Instruction>(BaseName # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast<RegisterOperand>("VecList" # Count # Size):$Vt, + !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>; +} + +multiclass SIMDLdR<bit R, bits<3> opcode, bit S, string asm, string Count, + int Offset1, int Offset2, int Offset4, int Offset8> { + def v8b : BaseSIMDLdR<0, R, opcode, S, 0b00, asm, + !cast<DAGOperand>("VecList" # Count # "8b")>; + def v16b: BaseSIMDLdR<1, R, opcode, S, 0b00, asm, + !cast<DAGOperand>("VecList" # Count #"16b")>; + def v4h : BaseSIMDLdR<0, R, opcode, S, 0b01, asm, + !cast<DAGOperand>("VecList" # Count #"4h")>; + def v8h : BaseSIMDLdR<1, R, opcode, S, 0b01, asm, + !cast<DAGOperand>("VecList" # Count #"8h")>; + def v2s : BaseSIMDLdR<0, R, opcode, S, 0b10, asm, + !cast<DAGOperand>("VecList" # Count #"2s")>; + def v4s : BaseSIMDLdR<1, R, opcode, S, 0b10, asm, + !cast<DAGOperand>("VecList" # Count #"4s")>; + def v1d : BaseSIMDLdR<0, R, opcode, S, 0b11, asm, + !cast<DAGOperand>("VecList" # Count #"1d")>; + def v2d : BaseSIMDLdR<1, R, opcode, S, 0b11, asm, + !cast<DAGOperand>("VecList" # Count #"2d")>; + + def v8b_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b00, asm, + !cast<DAGOperand>("VecList" # Count # "8b"), + !cast<DAGOperand>("GPR64pi" # Offset1)>; + def v16b_POST: BaseSIMDLdRPost<1, R, opcode, S, 0b00, asm, + !cast<DAGOperand>("VecList" # Count # "16b"), + !cast<DAGOperand>("GPR64pi" # Offset1)>; + def v4h_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b01, asm, + !cast<DAGOperand>("VecList" # Count # "4h"), + !cast<DAGOperand>("GPR64pi" # Offset2)>; + def v8h_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b01, asm, + !cast<DAGOperand>("VecList" # Count # "8h"), + !cast<DAGOperand>("GPR64pi" # Offset2)>; + def v2s_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b10, asm, + !cast<DAGOperand>("VecList" # Count # "2s"), + !cast<DAGOperand>("GPR64pi" # Offset4)>; + def v4s_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b10, asm, + !cast<DAGOperand>("VecList" # Count # "4s"), + !cast<DAGOperand>("GPR64pi" # Offset4)>; + def v1d_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b11, asm, + !cast<DAGOperand>("VecList" # Count # "1d"), + !cast<DAGOperand>("GPR64pi" # Offset8)>; + def v2d_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b11, asm, + !cast<DAGOperand>("VecList" # Count # "2d"), + !cast<DAGOperand>("GPR64pi" # Offset8)>; + + defm : SIMDLdrAliases<NAME, asm, "8b", Count, Offset1, 64>; + defm : SIMDLdrAliases<NAME, asm, "16b", Count, Offset1, 128>; + defm : SIMDLdrAliases<NAME, asm, "4h", Count, Offset2, 64>; + defm : SIMDLdrAliases<NAME, asm, "8h", Count, Offset2, 128>; + defm : SIMDLdrAliases<NAME, asm, "2s", Count, Offset4, 64>; + defm : SIMDLdrAliases<NAME, asm, "4s", Count, Offset4, 128>; + defm : SIMDLdrAliases<NAME, asm, "1d", Count, Offset8, 64>; + defm : SIMDLdrAliases<NAME, asm, "2d", Count, Offset8, 128>; +} + +class SIMDLdStSingleB<bit L, bit R, bits<3> opcode, string asm, + dag oops, dag iops, list<dag> pattern> + : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "", oops, iops, + pattern> { + // idx encoded in Q:S:size fields. + bits<4> idx; + let Inst{30} = idx{3}; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = idx{2}; + let Inst{11-10} = idx{1-0}; +} +class SIMDLdStSingleBTied<bit L, bit R, bits<3> opcode, string asm, + dag oops, dag iops, list<dag> pattern> + : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "", + oops, iops, pattern> { + // idx encoded in Q:S:size fields. + bits<4> idx; + let Inst{30} = idx{3}; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = idx{2}; + let Inst{11-10} = idx{1-0}; +} +class SIMDLdStSingleBPost<bit L, bit R, bits<3> opcode, string asm, + dag oops, dag iops> + : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm", + "$Rn = $wback", oops, iops, []> { + // idx encoded in Q:S:size fields. + bits<4> idx; + bits<5> Xm; + let Inst{30} = idx{3}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{2}; + let Inst{11-10} = idx{1-0}; +} +class SIMDLdStSingleBTiedPost<bit L, bit R, bits<3> opcode, string asm, + dag oops, dag iops> + : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm", + "$Rn = $wback", oops, iops, []> { + // idx encoded in Q:S:size fields. + bits<4> idx; + bits<5> Xm; + let Inst{30} = idx{3}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{2}; + let Inst{11-10} = idx{1-0}; +} + +class SIMDLdStSingleH<bit L, bit R, bits<3> opcode, bit size, string asm, + dag oops, dag iops, list<dag> pattern> + : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "", oops, iops, + pattern> { + // idx encoded in Q:S:size<1> fields. + bits<3> idx; + let Inst{30} = idx{2}; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = idx{1}; + let Inst{11} = idx{0}; + let Inst{10} = size; +} +class SIMDLdStSingleHTied<bit L, bit R, bits<3> opcode, bit size, string asm, + dag oops, dag iops, list<dag> pattern> + : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "", + oops, iops, pattern> { + // idx encoded in Q:S:size<1> fields. + bits<3> idx; + let Inst{30} = idx{2}; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = idx{1}; + let Inst{11} = idx{0}; + let Inst{10} = size; +} + +class SIMDLdStSingleHPost<bit L, bit R, bits<3> opcode, bit size, string asm, + dag oops, dag iops> + : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm", + "$Rn = $wback", oops, iops, []> { + // idx encoded in Q:S:size<1> fields. + bits<3> idx; + bits<5> Xm; + let Inst{30} = idx{2}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{1}; + let Inst{11} = idx{0}; + let Inst{10} = size; +} +class SIMDLdStSingleHTiedPost<bit L, bit R, bits<3> opcode, bit size, string asm, + dag oops, dag iops> + : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm", + "$Rn = $wback", oops, iops, []> { + // idx encoded in Q:S:size<1> fields. + bits<3> idx; + bits<5> Xm; + let Inst{30} = idx{2}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{1}; + let Inst{11} = idx{0}; + let Inst{10} = size; +} +class SIMDLdStSingleS<bit L, bit R, bits<3> opcode, bits<2> size, string asm, + dag oops, dag iops, list<dag> pattern> + : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "", oops, iops, + pattern> { + // idx encoded in Q:S fields. + bits<2> idx; + let Inst{30} = idx{1}; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = idx{0}; + let Inst{11-10} = size; +} +class SIMDLdStSingleSTied<bit L, bit R, bits<3> opcode, bits<2> size, string asm, + dag oops, dag iops, list<dag> pattern> + : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "", + oops, iops, pattern> { + // idx encoded in Q:S fields. + bits<2> idx; + let Inst{30} = idx{1}; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = idx{0}; + let Inst{11-10} = size; +} +class SIMDLdStSingleSPost<bit L, bit R, bits<3> opcode, bits<2> size, + string asm, dag oops, dag iops> + : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm", + "$Rn = $wback", oops, iops, []> { + // idx encoded in Q:S fields. + bits<2> idx; + bits<5> Xm; + let Inst{30} = idx{1}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{0}; + let Inst{11-10} = size; +} +class SIMDLdStSingleSTiedPost<bit L, bit R, bits<3> opcode, bits<2> size, + string asm, dag oops, dag iops> + : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm", + "$Rn = $wback", oops, iops, []> { + // idx encoded in Q:S fields. + bits<2> idx; + bits<5> Xm; + let Inst{30} = idx{1}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{0}; + let Inst{11-10} = size; +} +class SIMDLdStSingleD<bit L, bit R, bits<3> opcode, bits<2> size, string asm, + dag oops, dag iops, list<dag> pattern> + : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "", oops, iops, + pattern> { + // idx encoded in Q field. + bits<1> idx; + let Inst{30} = idx; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = 0; + let Inst{11-10} = size; +} +class SIMDLdStSingleDTied<bit L, bit R, bits<3> opcode, bits<2> size, string asm, + dag oops, dag iops, list<dag> pattern> + : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "", + oops, iops, pattern> { + // idx encoded in Q field. + bits<1> idx; + let Inst{30} = idx; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = 0; + let Inst{11-10} = size; +} +class SIMDLdStSingleDPost<bit L, bit R, bits<3> opcode, bits<2> size, + string asm, dag oops, dag iops> + : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm", + "$Rn = $wback", oops, iops, []> { + // idx encoded in Q field. + bits<1> idx; + bits<5> Xm; + let Inst{30} = idx; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = 0; + let Inst{11-10} = size; +} +class SIMDLdStSingleDTiedPost<bit L, bit R, bits<3> opcode, bits<2> size, + string asm, dag oops, dag iops> + : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm", + "$Rn = $wback", oops, iops, []> { + // idx encoded in Q field. + bits<1> idx; + bits<5> Xm; + let Inst{30} = idx; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = 0; + let Inst{11-10} = size; +} + +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDLdSingleBTied<bit R, bits<3> opcode, string asm, + RegisterOperand listtype, + RegisterOperand GPR64pi> { + def i8 : SIMDLdStSingleBTied<1, R, opcode, asm, + (outs listtype:$dst), + (ins listtype:$Vt, VectorIndexB:$idx, + GPR64sp:$Rn), []>; + + def i8_POST : SIMDLdStSingleBTiedPost<1, R, opcode, asm, + (outs GPR64sp:$wback, listtype:$dst), + (ins listtype:$Vt, VectorIndexB:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDLdSingleHTied<bit R, bits<3> opcode, bit size, string asm, + RegisterOperand listtype, + RegisterOperand GPR64pi> { + def i16 : SIMDLdStSingleHTied<1, R, opcode, size, asm, + (outs listtype:$dst), + (ins listtype:$Vt, VectorIndexH:$idx, + GPR64sp:$Rn), []>; + + def i16_POST : SIMDLdStSingleHTiedPost<1, R, opcode, size, asm, + (outs GPR64sp:$wback, listtype:$dst), + (ins listtype:$Vt, VectorIndexH:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDLdSingleSTied<bit R, bits<3> opcode, bits<2> size,string asm, + RegisterOperand listtype, + RegisterOperand GPR64pi> { + def i32 : SIMDLdStSingleSTied<1, R, opcode, size, asm, + (outs listtype:$dst), + (ins listtype:$Vt, VectorIndexS:$idx, + GPR64sp:$Rn), []>; + + def i32_POST : SIMDLdStSingleSTiedPost<1, R, opcode, size, asm, + (outs GPR64sp:$wback, listtype:$dst), + (ins listtype:$Vt, VectorIndexS:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDLdSingleDTied<bit R, bits<3> opcode, bits<2> size, string asm, + RegisterOperand listtype, RegisterOperand GPR64pi> { + def i64 : SIMDLdStSingleDTied<1, R, opcode, size, asm, + (outs listtype:$dst), + (ins listtype:$Vt, VectorIndexD:$idx, + GPR64sp:$Rn), []>; + + def i64_POST : SIMDLdStSingleDTiedPost<1, R, opcode, size, asm, + (outs GPR64sp:$wback, listtype:$dst), + (ins listtype:$Vt, VectorIndexD:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in +multiclass SIMDStSingleB<bit R, bits<3> opcode, string asm, + RegisterOperand listtype, RegisterOperand GPR64pi> { + def i8 : SIMDLdStSingleB<0, R, opcode, asm, + (outs), (ins listtype:$Vt, VectorIndexB:$idx, + GPR64sp:$Rn), []>; + + def i8_POST : SIMDLdStSingleBPost<0, R, opcode, asm, + (outs GPR64sp:$wback), + (ins listtype:$Vt, VectorIndexB:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in +multiclass SIMDStSingleH<bit R, bits<3> opcode, bit size, string asm, + RegisterOperand listtype, RegisterOperand GPR64pi> { + def i16 : SIMDLdStSingleH<0, R, opcode, size, asm, + (outs), (ins listtype:$Vt, VectorIndexH:$idx, + GPR64sp:$Rn), []>; + + def i16_POST : SIMDLdStSingleHPost<0, R, opcode, size, asm, + (outs GPR64sp:$wback), + (ins listtype:$Vt, VectorIndexH:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in +multiclass SIMDStSingleS<bit R, bits<3> opcode, bits<2> size,string asm, + RegisterOperand listtype, RegisterOperand GPR64pi> { + def i32 : SIMDLdStSingleS<0, R, opcode, size, asm, + (outs), (ins listtype:$Vt, VectorIndexS:$idx, + GPR64sp:$Rn), []>; + + def i32_POST : SIMDLdStSingleSPost<0, R, opcode, size, asm, + (outs GPR64sp:$wback), + (ins listtype:$Vt, VectorIndexS:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in +multiclass SIMDStSingleD<bit R, bits<3> opcode, bits<2> size, string asm, + RegisterOperand listtype, RegisterOperand GPR64pi> { + def i64 : SIMDLdStSingleD<0, R, opcode, size, asm, + (outs), (ins listtype:$Vt, VectorIndexD:$idx, + GPR64sp:$Rn), []>; + + def i64_POST : SIMDLdStSingleDPost<0, R, opcode, size, asm, + (outs GPR64sp:$wback), + (ins listtype:$Vt, VectorIndexD:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} + +multiclass SIMDLdStSingleAliases<string asm, string layout, string Type, + string Count, int Offset, Operand idxtype> { + // E.g. "ld1 { v0.8b }[0], [x1], #1" + // "ld1\t$Vt, [$Rn], #1" + // may get mapped to + // (LD1Rv8b_POST VecListOne8b:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias<asm # "\t$Vt$idx, [$Rn], #" # Offset, + (!cast<Instruction>(NAME # Type # "_POST") + GPR64sp:$Rn, + !cast<RegisterOperand>("VecList" # Count # layout):$Vt, + idxtype:$idx, XZR), 1>; + + // E.g. "ld1.8b { v0 }[0], [x1], #1" + // "ld1.8b\t$Vt, [$Rn], #1" + // may get mapped to + // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias<asm # "." # layout # "\t$Vt$idx, [$Rn], #" # Offset, + (!cast<Instruction>(NAME # Type # "_POST") + GPR64sp:$Rn, + !cast<RegisterOperand>("VecList" # Count # "128"):$Vt, + idxtype:$idx, XZR), 0>; + + // E.g. "ld1.8b { v0 }[0], [x1]" + // "ld1.8b\t$Vt, [$Rn]" + // may get mapped to + // (LD1Rv8b VecListOne64:$Vt, GPR64sp:$Rn) + def : InstAlias<asm # "." # layout # "\t$Vt$idx, [$Rn]", + (!cast<Instruction>(NAME # Type) + !cast<RegisterOperand>("VecList" # Count # "128"):$Vt, + idxtype:$idx, GPR64sp:$Rn), 0>; + + // E.g. "ld1.8b { v0 }[0], [x1], x2" + // "ld1.8b\t$Vt, [$Rn], $Xm" + // may get mapped to + // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, GPR64pi1:$Xm) + def : InstAlias<asm # "." # layout # "\t$Vt$idx, [$Rn], $Xm", + (!cast<Instruction>(NAME # Type # "_POST") + GPR64sp:$Rn, + !cast<RegisterOperand>("VecList" # Count # "128"):$Vt, + idxtype:$idx, + !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>; +} + +multiclass SIMDLdSt1SingleAliases<string asm> { + defm "" : SIMDLdStSingleAliases<asm, "b", "i8", "One", 1, VectorIndexB>; + defm "" : SIMDLdStSingleAliases<asm, "h", "i16", "One", 2, VectorIndexH>; + defm "" : SIMDLdStSingleAliases<asm, "s", "i32", "One", 4, VectorIndexS>; + defm "" : SIMDLdStSingleAliases<asm, "d", "i64", "One", 8, VectorIndexD>; +} + +multiclass SIMDLdSt2SingleAliases<string asm> { + defm "" : SIMDLdStSingleAliases<asm, "b", "i8", "Two", 2, VectorIndexB>; + defm "" : SIMDLdStSingleAliases<asm, "h", "i16", "Two", 4, VectorIndexH>; + defm "" : SIMDLdStSingleAliases<asm, "s", "i32", "Two", 8, VectorIndexS>; + defm "" : SIMDLdStSingleAliases<asm, "d", "i64", "Two", 16, VectorIndexD>; +} + +multiclass SIMDLdSt3SingleAliases<string asm> { + defm "" : SIMDLdStSingleAliases<asm, "b", "i8", "Three", 3, VectorIndexB>; + defm "" : SIMDLdStSingleAliases<asm, "h", "i16", "Three", 6, VectorIndexH>; + defm "" : SIMDLdStSingleAliases<asm, "s", "i32", "Three", 12, VectorIndexS>; + defm "" : SIMDLdStSingleAliases<asm, "d", "i64", "Three", 24, VectorIndexD>; +} + +multiclass SIMDLdSt4SingleAliases<string asm> { + defm "" : SIMDLdStSingleAliases<asm, "b", "i8", "Four", 4, VectorIndexB>; + defm "" : SIMDLdStSingleAliases<asm, "h", "i16", "Four", 8, VectorIndexH>; + defm "" : SIMDLdStSingleAliases<asm, "s", "i32", "Four", 16, VectorIndexS>; + defm "" : SIMDLdStSingleAliases<asm, "d", "i64", "Four", 32, VectorIndexD>; +} +} // end of 'let Predicates = [HasNEON]' + +//---------------------------------------------------------------------------- +// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract +//---------------------------------------------------------------------------- + +let Predicates = [HasNEON, HasRDM] in { + +class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode, + RegisterOperand regtype, string asm, + string kind, list<dag> pattern> + : BaseSIMDThreeSameVectorTied<Q, U, {size,0}, opcode, regtype, asm, kind, + pattern> { +} +multiclass SIMDThreeSameVectorSQRDMLxHTiedHS<bit U, bits<5> opc, string asm, + SDPatternOperator Accum> { + def v4i16 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b01, opc, V64, asm, ".4h", + [(set (v4i16 V64:$dst), + (Accum (v4i16 V64:$Rd), + (v4i16 (int_aarch64_neon_sqrdmulh (v4i16 V64:$Rn), + (v4i16 V64:$Rm)))))]>; + def v8i16 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b01, opc, V128, asm, ".8h", + [(set (v8i16 V128:$dst), + (Accum (v8i16 V128:$Rd), + (v8i16 (int_aarch64_neon_sqrdmulh (v8i16 V128:$Rn), + (v8i16 V128:$Rm)))))]>; + def v2i32 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b10, opc, V64, asm, ".2s", + [(set (v2i32 V64:$dst), + (Accum (v2i32 V64:$Rd), + (v2i32 (int_aarch64_neon_sqrdmulh (v2i32 V64:$Rn), + (v2i32 V64:$Rm)))))]>; + def v4i32 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b10, opc, V128, asm, ".4s", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqrdmulh (v4i32 V128:$Rn), + (v4i32 V128:$Rm)))))]>; +} + +multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm, + SDPatternOperator Accum> { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, + V64, V64, V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4i16 V64:$dst), + (Accum (v4i16 V64:$Rd), + (v4i16 (int_aarch64_neon_sqrdmulh + (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, + V128, V128, V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8i16 V128:$dst), + (Accum (v8i16 V128:$Rd), + (v8i16 (int_aarch64_neon_sqrdmulh + (v8i16 V128:$Rn), + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, + V64, V64, V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2i32 V64:$dst), + (Accum (v2i32 V64:$Rd), + (v2i32 (int_aarch64_neon_sqrdmulh + (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + // FIXME: it would be nice to use the scalar (v1i32) instruction here, but + // an intermediate EXTRACT_SUBREG would be untyped. + // FIXME: direct EXTRACT_SUBREG from v2i32 to i32 is illegal, that's why we + // got it lowered here as (i32 vector_extract (v4i32 insert_subvector(..))) + def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), + (i32 (vector_extract + (v4i32 (insert_subvector + (undef), + (v2i32 (int_aarch64_neon_sqrdmulh + (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 + (v4i32 V128:$Rm), + VectorIndexS:$idx)))), + (i32 0))), + (i64 0))))), + (EXTRACT_SUBREG + (v2i32 (!cast<Instruction>(NAME # v2i32_indexed) + (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), + FPR32Op:$Rd, + ssub)), + V64:$Rn, + V128:$Rm, + VectorIndexS:$idx)), + ssub)>; + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqrdmulh + (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + // FIXME: it would be nice to use the scalar (v1i32) instruction here, but + // an intermediate EXTRACT_SUBREG would be untyped. + def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), + (i32 (vector_extract + (v4i32 (int_aarch64_neon_sqrdmulh + (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 + (v4i32 V128:$Rm), + VectorIndexS:$idx)))), + (i64 0))))), + (EXTRACT_SUBREG + (v4i32 (!cast<Instruction>(NAME # v4i32_indexed) + (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + FPR32Op:$Rd, + ssub)), + V128:$Rn, + V128:$Rm, + VectorIndexS:$idx)), + ssub)>; + + def i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc, + FPR16Op, FPR16Op, V128_lo, + VectorIndexH, asm, ".h", "", "", ".h", + []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, + FPR32Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", + [(set (i32 FPR32Op:$dst), + (Accum (i32 FPR32Op:$Rd), + (i32 (int_aarch64_neon_sqrdmulh + (i32 FPR32Op:$Rn), + (i32 (vector_extract (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} +} // let Predicates = [HasNeon, HasRDM] + +//---------------------------------------------------------------------------- +// ARMv8.3 Complex ADD/MLA instructions +//---------------------------------------------------------------------------- + +class ComplexRotationOperand<int Angle, int Remainder, string Type> + : AsmOperandClass { + let PredicateMethod = "isComplexRotation<" # Angle # ", " # Remainder # ">"; + let DiagnosticType = "InvalidComplexRotation" # Type; + let Name = "ComplexRotation" # Type; +} +def complexrotateop : Operand<i32> { + let ParserMatchClass = ComplexRotationOperand<90, 0, "Even">; + let PrintMethod = "printComplexRotationOp<90, 0>"; +} +def complexrotateopodd : Operand<i32> { + let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">; + let PrintMethod = "printComplexRotationOp<180, 90>"; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode, + RegisterOperand regtype, Operand rottype, + string asm, string kind, list<dag> pattern> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, rottype:$rot), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot" + "|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<1> rot; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21} = 0; + let Inst{20-16} = Rm; + let Inst{15-13} = opcode; + // Non-tied version (FCADD) only has one rotation bit + let Inst{12} = rot; + let Inst{11} = 0; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype, + string asm, SDPatternOperator OpNode>{ + let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVectorComplex<0, U, 0b01, opcode, V64, rottype, + asm, ".4h", + [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd), + (v4f16 V64:$Rn), + (v4f16 V64:$Rm), + (rottype i32:$rot)))]>; + + def v8f16 : BaseSIMDThreeSameVectorComplex<1, U, 0b01, opcode, V128, rottype, + asm, ".8h", + [(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd), + (v8f16 V128:$Rn), + (v8f16 V128:$Rm), + (rottype i32:$rot)))]>; + } + + let Predicates = [HasV8_3a, HasNEON] in { + def v2f32 : BaseSIMDThreeSameVectorComplex<0, U, 0b10, opcode, V64, rottype, + asm, ".2s", + [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd), + (v2f32 V64:$Rn), + (v2f32 V64:$Rm), + (rottype i32:$rot)))]>; + + def v4f32 : BaseSIMDThreeSameVectorComplex<1, U, 0b10, opcode, V128, rottype, + asm, ".4s", + [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd), + (v4f32 V128:$Rn), + (v4f32 V128:$Rm), + (rottype i32:$rot)))]>; + + def v2f64 : BaseSIMDThreeSameVectorComplex<1, U, 0b11, opcode, V128, rottype, + asm, ".2d", + [(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd), + (v2f64 V128:$Rn), + (v2f64 V128:$Rm), + (rottype i32:$rot)))]>; + } +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDThreeSameVectorTiedComplex<bit Q, bit U, bits<2> size, + bits<3> opcode, + RegisterOperand regtype, + Operand rottype, string asm, + string kind, list<dag> pattern> + : I<(outs regtype:$dst), + (ins regtype:$Rd, regtype:$Rn, regtype:$Rm, rottype:$rot), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot" + "|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<2> rot; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21} = 0; + let Inst{20-16} = Rm; + let Inst{15-13} = opcode; + let Inst{12-11} = rot; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode, + Operand rottype, string asm, + SDPatternOperator OpNode> { + let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b01, opcode, V64, + rottype, asm, ".4h", + [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd), + (v4f16 V64:$Rn), + (v4f16 V64:$Rm), + (rottype i32:$rot)))]>; + + def v8f16 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b01, opcode, V128, + rottype, asm, ".8h", + [(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd), + (v8f16 V128:$Rn), + (v8f16 V128:$Rm), + (rottype i32:$rot)))]>; + } + + let Predicates = [HasV8_3a, HasNEON] in { + def v2f32 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b10, opcode, V64, + rottype, asm, ".2s", + [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd), + (v2f32 V64:$Rn), + (v2f32 V64:$Rm), + (rottype i32:$rot)))]>; + + def v4f32 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b10, opcode, V128, + rottype, asm, ".4s", + [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd), + (v4f32 V128:$Rn), + (v4f32 V128:$Rm), + (rottype i32:$rot)))]>; + + def v2f64 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b11, opcode, V128, + rottype, asm, ".2d", + [(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd), + (v2f64 V128:$Rn), + (v2f64 V128:$Rm), + (rottype i32:$rot)))]>; + } +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDIndexedTiedComplex<bit Q, bit U, bit Scalar, bits<2> size, + bit opc1, bit opc2, RegisterOperand dst_reg, + RegisterOperand lhs_reg, + RegisterOperand rhs_reg, Operand vec_idx, + Operand rottype, string asm, string apple_kind, + string dst_kind, string lhs_kind, + string rhs_kind, list<dag> pattern> + : I<(outs dst_reg:$dst), + (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx, rottype:$rot), + asm, + "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # + "$idx, $rot" # "|" # apple_kind # + "\t$Rd, $Rn, $Rm$idx, $rot}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<2> rot; + + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28} = Scalar; + let Inst{27-24} = 0b1111; + let Inst{23-22} = size; + // Bit 21 must be set by the derived class. + let Inst{20-16} = Rm; + let Inst{15} = opc1; + let Inst{14-13} = rot; + let Inst{12} = opc2; + // Bit 11 must be set by the derived class. + let Inst{10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +// The complex instructions index by pairs of elements, so the VectorIndexes +// don't match the lane types, and the index bits are different to the other +// classes. +multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype, + string asm, SDPatternOperator OpNode> { + let Predicates = [HasV8_3a,HasNEON,HasFullFP16] in { + def v4f16_indexed : BaseSIMDIndexedTiedComplex<0, 1, 0, 0b01, opc1, opc2, V64, + V64, V128, VectorIndexD, rottype, asm, ".4h", ".4h", + ".4h", ".h", []> { + bits<1> idx; + let Inst{11} = 0; + let Inst{21} = idx{0}; + } + + def v8f16_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b01, opc1, opc2, + V128, V128, V128, VectorIndexS, rottype, asm, ".8h", + ".8h", ".8h", ".h", []> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + } // Predicates = [HasV8_3a,HasNEON,HasFullFP16] + + let Predicates = [HasV8_3a,HasNEON] in { + def v4f32_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b10, opc1, opc2, + V128, V128, V128, VectorIndexD, rottype, asm, ".4s", + ".4s", ".4s", ".s", []> { + bits<1> idx; + let Inst{11} = idx{0}; + let Inst{21} = 0; + } + } // Predicates = [HasV8_3a,HasNEON] +} + +//---------------------------------------------------------------------------- +// Crypto extensions +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class AESBase<bits<4> opc, string asm, dag outs, dag ins, string cstr, + list<dag> pat> + : I<outs, ins, asm, "{\t$Rd.16b, $Rn.16b|.16b\t$Rd, $Rn}", cstr, pat>, + Sched<[WriteV]>{ + bits<5> Rd; + bits<5> Rn; + let Inst{31-16} = 0b0100111000101000; + let Inst{15-12} = opc; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class AESInst<bits<4> opc, string asm, Intrinsic OpNode> + : AESBase<opc, asm, (outs V128:$Rd), (ins V128:$Rn), "", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; + +class AESTiedInst<bits<4> opc, string asm, Intrinsic OpNode> + : AESBase<opc, asm, (outs V128:$dst), (ins V128:$Rd, V128:$Rn), + "$Rd = $dst", + [(set (v16i8 V128:$dst), + (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>; + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class SHA3OpTiedInst<bits<3> opc, string asm, string dst_lhs_kind, + dag oops, dag iops, list<dag> pat> + : I<oops, iops, asm, + "{\t$Rd" # dst_lhs_kind # ", $Rn" # dst_lhs_kind # ", $Rm.4s" # + "|.4s\t$Rd, $Rn, $Rm}", "$Rd = $dst", pat>, + Sched<[WriteV]>{ + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-21} = 0b01011110000; + let Inst{20-16} = Rm; + let Inst{15} = 0; + let Inst{14-12} = opc; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class SHATiedInstQSV<bits<3> opc, string asm, Intrinsic OpNode> + : SHA3OpTiedInst<opc, asm, "", (outs FPR128:$dst), + (ins FPR128:$Rd, FPR32:$Rn, V128:$Rm), + [(set (v4i32 FPR128:$dst), + (OpNode (v4i32 FPR128:$Rd), (i32 FPR32:$Rn), + (v4i32 V128:$Rm)))]>; + +class SHATiedInstVVV<bits<3> opc, string asm, Intrinsic OpNode> + : SHA3OpTiedInst<opc, asm, ".4s", (outs V128:$dst), + (ins V128:$Rd, V128:$Rn, V128:$Rm), + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), + (v4i32 V128:$Rm)))]>; + +class SHATiedInstQQV<bits<3> opc, string asm, Intrinsic OpNode> + : SHA3OpTiedInst<opc, asm, "", (outs FPR128:$dst), + (ins FPR128:$Rd, FPR128:$Rn, V128:$Rm), + [(set (v4i32 FPR128:$dst), + (OpNode (v4i32 FPR128:$Rd), (v4i32 FPR128:$Rn), + (v4i32 V128:$Rm)))]>; + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class SHA2OpInst<bits<4> opc, string asm, string kind, + string cstr, dag oops, dag iops, + list<dag> pat> + : I<oops, iops, asm, "{\t$Rd" # kind # ", $Rn" # kind # + "|" # kind # "\t$Rd, $Rn}", cstr, pat>, + Sched<[WriteV]>{ + bits<5> Rd; + bits<5> Rn; + let Inst{31-16} = 0b0101111000101000; + let Inst{15-12} = opc; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class SHATiedInstVV<bits<4> opc, string asm, Intrinsic OpNode> + : SHA2OpInst<opc, asm, ".4s", "$Rd = $dst", (outs V128:$dst), + (ins V128:$Rd, V128:$Rn), + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>; + +class SHAInstSS<bits<4> opc, string asm, Intrinsic OpNode> + : SHA2OpInst<opc, asm, "", "", (outs FPR32:$Rd), (ins FPR32:$Rn), + [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>; + +// Armv8.2-A Crypto extensions +class BaseCryptoV82<dag oops, dag iops, string asm, string asmops, string cst, + list<dag> pattern> + : I <oops, iops, asm, asmops, cst, pattern>, Sched<[WriteV]> { + bits<5> Vd; + bits<5> Vn; + let Inst{31-25} = 0b1100111; + let Inst{9-5} = Vn; + let Inst{4-0} = Vd; +} + +class CryptoRRTied<bits<1>op0, bits<2>op1, string asm, string asmops> + : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm), asm, asmops, + "$Vm = $Vd", []> { + let Inst{31-25} = 0b1100111; + let Inst{24-21} = 0b0110; + let Inst{20-15} = 0b000001; + let Inst{14} = op0; + let Inst{13-12} = 0b00; + let Inst{11-10} = op1; +} +class CryptoRRTied_2D<bits<1>op0, bits<2>op1, string asm> + : CryptoRRTied<op0, op1, asm, "{\t$Vd.2d, $Vn.2d}">; +class CryptoRRTied_4S<bits<1>op0, bits<2>op1, string asm> + : CryptoRRTied<op0, op1, asm, "{\t$Vd.4s, $Vn.4s}">; + +class CryptoRRR<bits<1> op0, bits<2>op1, dag oops, dag iops, string asm, + string asmops, string cst> + : BaseCryptoV82<oops, iops, asm , asmops, cst, []> { + bits<5> Vm; + let Inst{24-21} = 0b0011; + let Inst{20-16} = Vm; + let Inst{15} = 0b1; + let Inst{14} = op0; + let Inst{13-12} = 0b00; + let Inst{11-10} = op1; +} +class CryptoRRR_2D<bits<1> op0, bits<2>op1, string asm> + : CryptoRRR<op0, op1, (outs V128:$Vd), (ins V128:$Vn, V128:$Vm), asm, + "{\t$Vd.2d, $Vn.2d, $Vm.2d}", "">; +class CryptoRRRTied_2D<bits<1> op0, bits<2>op1, string asm> + : CryptoRRR<op0, op1, (outs V128:$Vdst), (ins V128:$Vd, V128:$Vn, V128:$Vm), asm, + "{\t$Vd.2d, $Vn.2d, $Vm.2d}", "$Vd = $Vdst">; +class CryptoRRR_4S<bits<1> op0, bits<2>op1, string asm> + : CryptoRRR<op0, op1, (outs V128:$Vd), (ins V128:$Vn, V128:$Vm), asm, + "{\t$Vd.4s, $Vn.4s, $Vm.4s}", "">; +class CryptoRRRTied_4S<bits<1> op0, bits<2>op1, string asm> + : CryptoRRR<op0, op1, (outs V128:$Vdst), (ins V128:$Vd, V128:$Vn, V128:$Vm), asm, + "{\t$Vd.4s, $Vn.4s, $Vm.4s}", "$Vd = $Vdst">; +class CryptoRRRTied<bits<1> op0, bits<2>op1, string asm> + : CryptoRRR<op0, op1, (outs FPR128:$Vdst), (ins FPR128:$Vd, FPR128:$Vn, V128:$Vm), + asm, "{\t$Vd, $Vn, $Vm.2d}", "$Vd = $Vdst">; + +class CryptoRRRR<bits<2>op0, string asm, string asmops> + : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm, V128:$Va), asm, + asmops, "", []> { + bits<5> Vm; + bits<5> Va; + let Inst{24-23} = 0b00; + let Inst{22-21} = op0; + let Inst{20-16} = Vm; + let Inst{15} = 0b0; + let Inst{14-10} = Va; +} +class CryptoRRRR_16B<bits<2>op0, string asm> + : CryptoRRRR<op0, asm, "{\t$Vd.16b, $Vn.16b, $Vm.16b, $Va.16b}"> { +} +class CryptoRRRR_4S<bits<2>op0, string asm> + : CryptoRRRR<op0, asm, "{\t$Vd.4s, $Vn.4s, $Vm.4s, $Va.4s}"> { +} + +class CryptoRRRi6<string asm> + : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm, uimm6:$imm), asm, + "{\t$Vd.2d, $Vn.2d, $Vm.2d, $imm}", "", []> { + bits<6> imm; + bits<5> Vm; + let Inst{24-21} = 0b0100; + let Inst{20-16} = Vm; + let Inst{15-10} = imm; + let Inst{9-5} = Vn; + let Inst{4-0} = Vd; +} + +class CryptoRRRi2Tied<bits<1>op0, bits<2>op1, string asm> + : BaseCryptoV82<(outs V128:$Vdst), + (ins V128:$Vd, V128:$Vn, V128:$Vm, VectorIndexS:$imm), + asm, "{\t$Vd.4s, $Vn.4s, $Vm.s$imm}", "$Vd = $Vdst", []> { + bits<2> imm; + bits<5> Vm; + let Inst{24-21} = 0b0010; + let Inst{20-16} = Vm; + let Inst{15} = 0b1; + let Inst{14} = op0; + let Inst{13-12} = imm; + let Inst{11-10} = op1; +} + +//---------------------------------------------------------------------------- +// v8.1 atomic instructions extension: +// * CAS +// * CASP +// * SWP +// * LDOPregister<OP>, and aliases STOPregister<OP> + +// Instruction encodings: +// +// 31 30|29 24|23|22|21|20 16|15|14 10|9 5|4 0 +// CAS SZ |001000|1 |A |1 |Rs |R |11111 |Rn |Rt +// CASP 0|SZ|001000|0 |A |1 |Rs |R |11111 |Rn |Rt +// SWP SZ |111000|A |R |1 |Rs |1 |OPC|00|Rn |Rt +// LD SZ |111000|A |R |1 |Rs |0 |OPC|00|Rn |Rt +// ST SZ |111000|A |R |1 |Rs |0 |OPC|00|Rn |11111 + +// Instruction syntax: +// +// CAS{<order>}[<size>] <Ws>, <Wt>, [<Xn|SP>] +// CAS{<order>} <Xs>, <Xt>, [<Xn|SP>] +// CASP{<order>} <Ws>, <W(s+1)>, <Wt>, <W(t+1)>, [<Xn|SP>] +// CASP{<order>} <Xs>, <X(s+1)>, <Xt>, <X(t+1)>, [<Xn|SP>] +// SWP{<order>}[<size>] <Ws>, <Wt>, [<Xn|SP>] +// SWP{<order>} <Xs>, <Xt>, [<Xn|SP>] +// LD<OP>{<order>}[<size>] <Ws>, <Wt>, [<Xn|SP>] +// LD<OP>{<order>} <Xs>, <Xt>, [<Xn|SP>] +// ST<OP>{<order>}[<size>] <Ws>, [<Xn|SP>] +// ST<OP>{<order>} <Xs>, [<Xn|SP>] + +let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseCASEncoding<dag oops, dag iops, string asm, string operands, + string cstr, list<dag> pattern> + : I<oops, iops, asm, operands, cstr, pattern> { + bits<2> Sz; + bit NP; + bit Acq; + bit Rel; + bits<5> Rs; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b001000; + let Inst{23} = NP; + let Inst{22} = Acq; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = Rel; + let Inst{14-10} = 0b11111; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + let Predicates = [HasLSE]; +} + +class BaseCAS<string order, string size, RegisterClass RC> + : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), + "cas" # order # size, "\t$Rs, $Rt, [$Rn]", + "$out = $Rs",[]>, + Sched<[WriteAtomic]> { + let NP = 1; +} + +multiclass CompareAndSwap<bits<1> Acq, bits<1> Rel, string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel in def B : BaseCAS<order, "b", GPR32>; + let Sz = 0b01, Acq = Acq, Rel = Rel in def H : BaseCAS<order, "h", GPR32>; + let Sz = 0b10, Acq = Acq, Rel = Rel in def W : BaseCAS<order, "", GPR32>; + let Sz = 0b11, Acq = Acq, Rel = Rel in def X : BaseCAS<order, "", GPR64>; +} + +class BaseCASP<string order, string size, RegisterOperand RC> + : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), + "casp" # order # size, "\t$Rs, $Rt, [$Rn]", + "$out = $Rs",[]>, + Sched<[WriteAtomic]> { + let NP = 0; +} + +multiclass CompareAndSwapPair<bits<1> Acq, bits<1> Rel, string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel in + def W : BaseCASP<order, "", WSeqPairClassOperand>; + let Sz = 0b01, Acq = Acq, Rel = Rel in + def X : BaseCASP<order, "", XSeqPairClassOperand>; +} + +let Predicates = [HasLSE] in +class BaseSWP<string order, string size, RegisterClass RC> + : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swp" # order # size, + "\t$Rs, $Rt, [$Rn]","",[]>, + Sched<[WriteAtomic]> { + bits<2> Sz; + bit Acq; + bit Rel; + bits<5> Rs; + bits<3> opc = 0b000; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b111000; + let Inst{23} = Acq; + let Inst{22} = Rel; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = 0b1; + let Inst{14-12} = opc; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + let Predicates = [HasLSE]; +} + +multiclass Swap<bits<1> Acq, bits<1> Rel, string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel in def B : BaseSWP<order, "b", GPR32>; + let Sz = 0b01, Acq = Acq, Rel = Rel in def H : BaseSWP<order, "h", GPR32>; + let Sz = 0b10, Acq = Acq, Rel = Rel in def W : BaseSWP<order, "", GPR32>; + let Sz = 0b11, Acq = Acq, Rel = Rel in def X : BaseSWP<order, "", GPR64>; +} + +let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseLDOPregister<string op, string order, string size, RegisterClass RC> + : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ld" # op # order # size, + "\t$Rs, $Rt, [$Rn]","",[]>, + Sched<[WriteAtomic]> { + bits<2> Sz; + bit Acq; + bit Rel; + bits<5> Rs; + bits<3> opc; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b111000; + let Inst{23} = Acq; + let Inst{22} = Rel; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = 0b0; + let Inst{14-12} = opc; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + let Predicates = [HasLSE]; +} + +multiclass LDOPregister<bits<3> opc, string op, bits<1> Acq, bits<1> Rel, + string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel, opc = opc in + def B : BaseLDOPregister<op, order, "b", GPR32>; + let Sz = 0b01, Acq = Acq, Rel = Rel, opc = opc in + def H : BaseLDOPregister<op, order, "h", GPR32>; + let Sz = 0b10, Acq = Acq, Rel = Rel, opc = opc in + def W : BaseLDOPregister<op, order, "", GPR32>; + let Sz = 0b11, Acq = Acq, Rel = Rel, opc = opc in + def X : BaseLDOPregister<op, order, "", GPR64>; +} + +// Differing SrcRHS and DstRHS allow you to cover CLR & SUB by giving a more +// complex DAG for DstRHS. +let Predicates = [HasLSE] in +multiclass LDOPregister_patterns_ord_dag<string inst, string suffix, string op, + string size, dag SrcRHS, dag DstRHS> { + def : Pat<(!cast<PatFrag>(op#"_"#size#"_monotonic") GPR64sp:$Rn, SrcRHS), + (!cast<Instruction>(inst # suffix) DstRHS, GPR64sp:$Rn)>; + def : Pat<(!cast<PatFrag>(op#"_"#size#"_acquire") GPR64sp:$Rn, SrcRHS), + (!cast<Instruction>(inst # "A" # suffix) DstRHS, GPR64sp:$Rn)>; + def : Pat<(!cast<PatFrag>(op#"_"#size#"_release") GPR64sp:$Rn, SrcRHS), + (!cast<Instruction>(inst # "L" # suffix) DstRHS, GPR64sp:$Rn)>; + def : Pat<(!cast<PatFrag>(op#"_"#size#"_acq_rel") GPR64sp:$Rn, SrcRHS), + (!cast<Instruction>(inst # "AL" # suffix) DstRHS, GPR64sp:$Rn)>; + def : Pat<(!cast<PatFrag>(op#"_"#size#"_seq_cst") GPR64sp:$Rn, SrcRHS), + (!cast<Instruction>(inst # "AL" # suffix) DstRHS, GPR64sp:$Rn)>; +} + +multiclass LDOPregister_patterns_ord<string inst, string suffix, string op, + string size, dag RHS> { + defm : LDOPregister_patterns_ord_dag<inst, suffix, op, size, RHS, RHS>; +} + +multiclass LDOPregister_patterns_ord_mod<string inst, string suffix, string op, + string size, dag LHS, dag RHS> { + defm : LDOPregister_patterns_ord_dag<inst, suffix, op, size, LHS, RHS>; +} + +multiclass LDOPregister_patterns<string inst, string op> { + defm : LDOPregister_patterns_ord<inst, "X", op, "64", (i64 GPR64:$Rm)>; + defm : LDOPregister_patterns_ord<inst, "W", op, "32", (i32 GPR32:$Rm)>; + defm : LDOPregister_patterns_ord<inst, "H", op, "16", (i32 GPR32:$Rm)>; + defm : LDOPregister_patterns_ord<inst, "B", op, "8", (i32 GPR32:$Rm)>; +} + +multiclass LDOPregister_patterns_mod<string inst, string op, string mod> { + defm : LDOPregister_patterns_ord_mod<inst, "X", op, "64", + (i64 GPR64:$Rm), + (i64 (!cast<Instruction>(mod#Xrr) XZR, GPR64:$Rm))>; + defm : LDOPregister_patterns_ord_mod<inst, "W", op, "32", + (i32 GPR32:$Rm), + (i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>; + defm : LDOPregister_patterns_ord_mod<inst, "H", op, "16", + (i32 GPR32:$Rm), + (i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>; + defm : LDOPregister_patterns_ord_mod<inst, "B", op, "8", + (i32 GPR32:$Rm), + (i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>; +} + +let Predicates = [HasLSE] in +multiclass CASregister_patterns_ord_dag<string inst, string suffix, string op, + string size, dag OLD, dag NEW> { + def : Pat<(!cast<PatFrag>(op#"_"#size#"_monotonic") GPR64sp:$Rn, OLD, NEW), + (!cast<Instruction>(inst # suffix) OLD, NEW, GPR64sp:$Rn)>; + def : Pat<(!cast<PatFrag>(op#"_"#size#"_acquire") GPR64sp:$Rn, OLD, NEW), + (!cast<Instruction>(inst # "A" # suffix) OLD, NEW, GPR64sp:$Rn)>; + def : Pat<(!cast<PatFrag>(op#"_"#size#"_release") GPR64sp:$Rn, OLD, NEW), + (!cast<Instruction>(inst # "L" # suffix) OLD, NEW, GPR64sp:$Rn)>; + def : Pat<(!cast<PatFrag>(op#"_"#size#"_acq_rel") GPR64sp:$Rn, OLD, NEW), + (!cast<Instruction>(inst # "AL" # suffix) OLD, NEW, GPR64sp:$Rn)>; + def : Pat<(!cast<PatFrag>(op#"_"#size#"_seq_cst") GPR64sp:$Rn, OLD, NEW), + (!cast<Instruction>(inst # "AL" # suffix) OLD, NEW, GPR64sp:$Rn)>; +} + +multiclass CASregister_patterns_ord<string inst, string suffix, string op, + string size, dag OLD, dag NEW> { + defm : CASregister_patterns_ord_dag<inst, suffix, op, size, OLD, NEW>; +} + +multiclass CASregister_patterns<string inst, string op> { + defm : CASregister_patterns_ord<inst, "X", op, "64", + (i64 GPR64:$Rold), (i64 GPR64:$Rnew)>; + defm : CASregister_patterns_ord<inst, "W", op, "32", + (i32 GPR32:$Rold), (i32 GPR32:$Rnew)>; + defm : CASregister_patterns_ord<inst, "H", op, "16", + (i32 GPR32:$Rold), (i32 GPR32:$Rnew)>; + defm : CASregister_patterns_ord<inst, "B", op, "8", + (i32 GPR32:$Rold), (i32 GPR32:$Rnew)>; +} + +let Predicates = [HasLSE] in +class BaseSTOPregister<string asm, RegisterClass OP, Register Reg, + Instruction inst> : + InstAlias<asm # "\t$Rs, [$Rn]", (inst Reg, OP:$Rs, GPR64sp:$Rn)>; + +multiclass STOPregister<string asm, string instr> { + def : BaseSTOPregister<asm # "lb", GPR32, WZR, + !cast<Instruction>(instr # "LB")>; + def : BaseSTOPregister<asm # "lh", GPR32, WZR, + !cast<Instruction>(instr # "LH")>; + def : BaseSTOPregister<asm # "l", GPR32, WZR, + !cast<Instruction>(instr # "LW")>; + def : BaseSTOPregister<asm # "l", GPR64, XZR, + !cast<Instruction>(instr # "LX")>; + def : BaseSTOPregister<asm # "b", GPR32, WZR, + !cast<Instruction>(instr # "B")>; + def : BaseSTOPregister<asm # "h", GPR32, WZR, + !cast<Instruction>(instr # "H")>; + def : BaseSTOPregister<asm, GPR32, WZR, + !cast<Instruction>(instr # "W")>; + def : BaseSTOPregister<asm, GPR64, XZR, + !cast<Instruction>(instr # "X")>; +} + +//---------------------------------------------------------------------------- +// Allow the size specifier tokens to be upper case, not just lower. +def : TokenAlias<".4B", ".4b">; // Add dot product +def : TokenAlias<".8B", ".8b">; +def : TokenAlias<".4H", ".4h">; +def : TokenAlias<".2S", ".2s">; +def : TokenAlias<".1D", ".1d">; +def : TokenAlias<".16B", ".16b">; +def : TokenAlias<".8H", ".8h">; +def : TokenAlias<".4S", ".4s">; +def : TokenAlias<".2D", ".2d">; +def : TokenAlias<".1Q", ".1q">; +def : TokenAlias<".2H", ".2h">; +def : TokenAlias<".B", ".b">; +def : TokenAlias<".H", ".h">; +def : TokenAlias<".S", ".s">; +def : TokenAlias<".D", ".d">; +def : TokenAlias<".Q", ".q">; diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td new file mode 100644 index 000000000..d6b8bb5d8 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64InstrInfo.td @@ -0,0 +1,6494 @@ +//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AArch64 Instruction definitions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ARM Instruction Predicate Definitions. +// +def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, + AssemblerPredicate<"HasV8_1aOps", "armv8.1a">; +def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, + AssemblerPredicate<"HasV8_2aOps", "armv8.2a">; +def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">, + AssemblerPredicate<"HasV8_3aOps", "armv8.3a">; +def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">, + AssemblerPredicate<"HasV8_4aOps", "armv8.4a">; +def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, + AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">; +def HasNEON : Predicate<"Subtarget->hasNEON()">, + AssemblerPredicate<"FeatureNEON", "neon">; +def HasCrypto : Predicate<"Subtarget->hasCrypto()">, + AssemblerPredicate<"FeatureCrypto", "crypto">; +def HasSM4 : Predicate<"Subtarget->hasSM4()">, + AssemblerPredicate<"FeatureSM4", "sm4">; +def HasSHA3 : Predicate<"Subtarget->hasSHA3()">, + AssemblerPredicate<"FeatureSHA3", "sha3">; +def HasSHA2 : Predicate<"Subtarget->hasSHA2()">, + AssemblerPredicate<"FeatureSHA2", "sha2">; +def HasAES : Predicate<"Subtarget->hasAES()">, + AssemblerPredicate<"FeatureAES", "aes">; +def HasDotProd : Predicate<"Subtarget->hasDotProd()">, + AssemblerPredicate<"FeatureDotProd", "dotprod">; +def HasCRC : Predicate<"Subtarget->hasCRC()">, + AssemblerPredicate<"FeatureCRC", "crc">; +def HasLSE : Predicate<"Subtarget->hasLSE()">, + AssemblerPredicate<"FeatureLSE", "lse">; +def HasRAS : Predicate<"Subtarget->hasRAS()">, + AssemblerPredicate<"FeatureRAS", "ras">; +def HasRDM : Predicate<"Subtarget->hasRDM()">, + AssemblerPredicate<"FeatureRDM", "rdm">; +def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">; +def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, + AssemblerPredicate<"FeatureFullFP16", "fullfp16">; +def HasSPE : Predicate<"Subtarget->hasSPE()">, + AssemblerPredicate<"FeatureSPE", "spe">; +def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">, + AssemblerPredicate<"FeatureFuseAES", + "fuse-aes">; +def HasSVE : Predicate<"Subtarget->hasSVE()">, + AssemblerPredicate<"FeatureSVE", "sve">; +def HasRCPC : Predicate<"Subtarget->hasRCPC()">, + AssemblerPredicate<"FeatureRCPC", "rcpc">; + +def IsLE : Predicate<"Subtarget->isLittleEndian()">; +def IsBE : Predicate<"!Subtarget->isLittleEndian()">; +def UseAlternateSExtLoadCVTF32 + : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">; + +def UseNegativeImmediates + : Predicate<"false">, AssemblerPredicate<"!FeatureNoNegativeImmediates", + "NegativeImmediates">; + + +//===----------------------------------------------------------------------===// +// AArch64-specific DAG Nodes. +// + +// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS +def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, + [SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, SDTCisVT<1, i32>]>; + +// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS +def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisInt<0>, + SDTCisVT<3, i32>]>; + +// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS +def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, + [SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, + SDTCisVT<1, i32>, + SDTCisVT<4, i32>]>; + +def SDT_AArch64Brcond : SDTypeProfile<0, 3, + [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>]>; +def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; +def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, + SDTCisVT<2, OtherVT>]>; + + +def SDT_AArch64CSel : SDTypeProfile<1, 4, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisInt<3>, + SDTCisVT<4, i32>]>; +def SDT_AArch64CCMP : SDTypeProfile<1, 5, + [SDTCisVT<0, i32>, + SDTCisInt<1>, + SDTCisSameAs<1, 2>, + SDTCisInt<3>, + SDTCisInt<4>, + SDTCisVT<5, i32>]>; +def SDT_AArch64FCCMP : SDTypeProfile<1, 5, + [SDTCisVT<0, i32>, + SDTCisFP<1>, + SDTCisSameAs<1, 2>, + SDTCisInt<3>, + SDTCisInt<4>, + SDTCisVT<5, i32>]>; +def SDT_AArch64FCmp : SDTypeProfile<0, 2, + [SDTCisFP<0>, + SDTCisSameAs<0, 1>]>; +def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; +def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; +def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>]>; +def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; +def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; +def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisInt<2>, SDTCisInt<3>]>; +def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; +def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, SDTCisInt<3>]>; +def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; + +def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; +def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; +def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; +def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>; +def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, + SDTCisSameAs<0,3>]>; +def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; +def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; + +def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; + +def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, + SDTCisPtrTy<1>]>; + +// Generates the general dynamic sequences, i.e. +// adrp x0, :tlsdesc:var +// ldr x1, [x0, #:tlsdesc_lo12:var] +// add x0, x0, #:tlsdesc_lo12:var +// .tlsdesccall var +// blr x1 + +// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here) +// number of operands (the variable) +def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1, + [SDTCisPtrTy<0>]>; + +def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, + [SDTCisVT<0, i64>, SDTCisVT<1, i32>, + SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, + SDTCisSameAs<1, 4>]>; + + +// Node definitions. +def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; +def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; +def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; +def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", + SDCallSeqStart<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>, + [SDNPHasChain, SDNPOutGlue]>; +def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", + SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64call : SDNode<"AArch64ISD::CALL", + SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, + [SDNPHasChain]>; +def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, + [SDNPHasChain]>; +def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, + [SDNPHasChain]>; +def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz, + [SDNPHasChain]>; +def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, + [SDNPHasChain]>; + + +def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; +def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; +def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; +def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; +def AArch64retflag : SDNode<"AArch64ISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; +def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; +def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, + [SDNPCommutative]>; +def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; +def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, + [SDNPCommutative]>; +def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; +def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; + +def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; +def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; +def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; + +def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; + +def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; + +def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; +def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; +def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; +def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; +def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; + +def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; +def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; +def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; +def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; +def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; +def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; + +def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; +def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; +def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; +def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>; +def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>; +def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>; +def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>; + +def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>; +def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>; +def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; +def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; + +def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; +def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; +def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; +def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; +def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; +def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; +def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; +def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; + +def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>; +def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; +def AArch64bsl: SDNode<"AArch64ISD::BSL", SDT_AArch64trivec>; + +def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; +def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; +def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>; +def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>; +def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>; + +def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; +def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; +def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; + +def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>; +def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>; +def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>; +def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; +def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; +def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), + (AArch64not (AArch64cmeqz (and node:$LHS, node:$RHS)))>; + +def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; +def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; +def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>; +def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>; +def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>; + +def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; +def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; + +def AArch64neg : SDNode<"AArch64ISD::NEG", SDT_AArch64unvec>; + +def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, + [SDNPHasChain, SDNPSideEffect]>; + +def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; +def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; + +def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", + SDT_AArch64TLSDescCallSeq, + [SDNPInGlue, SDNPOutGlue, SDNPHasChain, + SDNPVariadic]>; + + +def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", + SDT_AArch64WrapperLarge>; + +def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>; + +def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, + SDTCisSameAs<1, 2>]>; +def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>; +def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>; + +def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>; +def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>; +def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>; +def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>; + +def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; +def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; +def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; +def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; +def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; +def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; + +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// + +// AArch64 Instruction Predicate Definitions. +// We could compute these on a per-module basis but doing so requires accessing +// the Function object through the <Target>Subtarget and objections were raised +// to that (see post-commit review comments for r301750). +let RecomputePerFunction = 1 in { + def ForCodeSize : Predicate<"MF->getFunction().optForSize()">; + def NotForCodeSize : Predicate<"!MF->getFunction().optForSize()">; + // Avoid generating STRQro if it is slow, unless we're optimizing for code size. + def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction().optForSize()">; +} + +include "AArch64InstrFormats.td" +include "SVEInstrFormats.td" + +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Miscellaneous instructions. +//===----------------------------------------------------------------------===// + +let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { +// We set Sched to empty list because we expect these instructions to simply get +// removed in most cases. +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(AArch64callseq_start timm:$amt1, timm:$amt2)]>, + Sched<[]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(AArch64callseq_end timm:$amt1, timm:$amt2)]>, + Sched<[]>; +} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 + +let isReMaterializable = 1, isCodeGenOnly = 1 in { +// FIXME: The following pseudo instructions are only needed because remat +// cannot handle multiple instructions. When that changes, they can be +// removed, along with the AArch64Wrapper node. + +let AddedComplexity = 10 in +def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr), + [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, + Sched<[WriteLDAdr]>; + +// The MOVaddr instruction should match only when the add is not folded +// into a load or store address. +def MOVaddr + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), + tglobaladdr:$low))]>, + Sched<[WriteAdrAdr]>; +def MOVaddrJT + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), + tjumptable:$low))]>, + Sched<[WriteAdrAdr]>; +def MOVaddrCP + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), + tconstpool:$low))]>, + Sched<[WriteAdrAdr]>; +def MOVaddrBA + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), + tblockaddress:$low))]>, + Sched<[WriteAdrAdr]>; +def MOVaddrTLS + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), + tglobaltlsaddr:$low))]>, + Sched<[WriteAdrAdr]>; +def MOVaddrEXT + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), + texternalsym:$low))]>, + Sched<[WriteAdrAdr]>; +// Normally AArch64addlow either gets folded into a following ldr/str, +// or together with an adrp into MOVaddr above. For cases with TLS, it +// might appear without either of them, so allow lowering it into a plain +// add. +def ADDlowTLS + : Pseudo<(outs GPR64:$dst), (ins GPR64:$src, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow GPR64:$src, + tglobaltlsaddr:$low))]>, + Sched<[WriteAdr]>; + +} // isReMaterializable, isCodeGenOnly + +def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), + (LOADgot tglobaltlsaddr:$addr)>; + +def : Pat<(AArch64LOADgot texternalsym:$addr), + (LOADgot texternalsym:$addr)>; + +def : Pat<(AArch64LOADgot tconstpool:$addr), + (LOADgot tconstpool:$addr)>; + +//===----------------------------------------------------------------------===// +// System instructions. +//===----------------------------------------------------------------------===// + +def HINT : HintI<"hint">; +def : InstAlias<"nop", (HINT 0b000)>; +def : InstAlias<"yield",(HINT 0b001)>; +def : InstAlias<"wfe", (HINT 0b010)>; +def : InstAlias<"wfi", (HINT 0b011)>; +def : InstAlias<"sev", (HINT 0b100)>; +def : InstAlias<"sevl", (HINT 0b101)>; +def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>; +def : InstAlias<"csdb", (HINT 20)>; + +// v8.2a Statistical Profiling extension +def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>; + +// As far as LLVM is concerned this writes to the system's exclusive monitors. +let mayLoad = 1, mayStore = 1 in +def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">; + +// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot +// model patterns with sufficiently fine granularity. +let mayLoad = ?, mayStore = ? in { +def DMB : CRmSystemI<barrier_op, 0b101, "dmb", + [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>; + +def DSB : CRmSystemI<barrier_op, 0b100, "dsb", + [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>; + +def ISB : CRmSystemI<barrier_op, 0b110, "isb", + [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>; + +def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> { + let CRm = 0b0010; + let Inst{12} = 0; + let Predicates = [HasV8_4a]; +} +} + +// ARMv8.2 Dot Product +let Predicates = [HasDotProd] in { +defm SDOT : SIMDThreeSameVectorDot<0, "sdot", int_aarch64_neon_sdot>; +defm UDOT : SIMDThreeSameVectorDot<1, "udot", int_aarch64_neon_udot>; +defm SDOTlane : SIMDThreeSameVectorDotIndex<0, "sdot", int_aarch64_neon_sdot>; +defm UDOTlane : SIMDThreeSameVectorDotIndex<1, "udot", int_aarch64_neon_udot>; +} + +// Armv8.2-A Crypto extensions +let Predicates = [HasSHA3] in { +def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">; +def SHA512H2 : CryptoRRRTied<0b0, 0b01, "sha512h2">; +def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">; +def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">; +def RAX1 : CryptoRRR_2D<0b0,0b11, "rax1">; +def EOR3 : CryptoRRRR_16B<0b00, "eor3">; +def BCAX : CryptoRRRR_16B<0b01, "bcax">; +def XAR : CryptoRRRi6<"xar">; +} // HasSHA3 + +let Predicates = [HasSM4] in { +def SM3TT1A : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">; +def SM3TT1B : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">; +def SM3TT2A : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">; +def SM3TT2B : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">; +def SM3SS1 : CryptoRRRR_4S<0b10, "sm3ss1">; +def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">; +def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">; +def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">; +def SM4E : CryptoRRTied_4S<0b0, 0b01, "sm4e">; +} // HasSM4 + +let Predicates = [HasRCPC] in { + // v8.3 Release Consistent Processor Consistent support, optional in v8.2. + def LDAPRB : RCPCLoad<0b00, "ldaprb", GPR32>; + def LDAPRH : RCPCLoad<0b01, "ldaprh", GPR32>; + def LDAPRW : RCPCLoad<0b10, "ldapr", GPR32>; + def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>; +} + +// v8.3a complex add and multiply-accumulate. No predicate here, that is done +// inside the multiclass as the FP16 versions need different predicates. +defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop, + "fcmla", null_frag>; +defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd, + "fcadd", null_frag>; +defm FCMLA : SIMDIndexedTiedComplexHSD<1, 0, 1, complexrotateop, "fcmla", + null_frag>; + +// v8.3a Pointer Authentication +// These instructions inhabit part of the hint space and so can be used for +// armv8 targets +let Uses = [LR], Defs = [LR] in { + def PACIAZ : SystemNoOperands<0b000, "paciaz">; + def PACIBZ : SystemNoOperands<0b010, "pacibz">; + def AUTIAZ : SystemNoOperands<0b100, "autiaz">; + def AUTIBZ : SystemNoOperands<0b110, "autibz">; +} +let Uses = [LR, SP], Defs = [LR] in { + def PACIASP : SystemNoOperands<0b001, "paciasp">; + def PACIBSP : SystemNoOperands<0b011, "pacibsp">; + def AUTIASP : SystemNoOperands<0b101, "autiasp">; + def AUTIBSP : SystemNoOperands<0b111, "autibsp">; +} +let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in { + def PACIA1716 : SystemNoOperands<0b000, "pacia1716">; + def PACIB1716 : SystemNoOperands<0b010, "pacib1716">; + def AUTIA1716 : SystemNoOperands<0b100, "autia1716">; + def AUTIB1716 : SystemNoOperands<0b110, "autib1716">; +} + +let Uses = [LR], Defs = [LR], CRm = 0b0000 in { + def XPACLRI : SystemNoOperands<0b111, "xpaclri">; +} + +// These pointer authentication isntructions require armv8.3a +let Predicates = [HasV8_3a] in { + multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm> { + def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia")>; + def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib")>; + def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da")>; + def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db")>; + def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza")>; + def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza")>; + def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb")>; + def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb")>; + } + + defm PAC : SignAuth<0b000, 0b010, "pac">; + defm AUT : SignAuth<0b001, 0b011, "aut">; + + def XPACI : SignAuthZero<0b100, 0b00, "xpaci">; + def XPACD : SignAuthZero<0b100, 0b01, "xpacd">; + def PACGA : SignAuthTwoOperand<0b1100, "pacga", null_frag>; + + // Combined Instructions + def BRAA : AuthBranchTwoOperands<0, 0, "braa">; + def BRAB : AuthBranchTwoOperands<0, 1, "brab">; + def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">; + def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">; + + def BRAAZ : AuthOneOperand<0b000, 0, "braaz">; + def BRABZ : AuthOneOperand<0b000, 1, "brabz">; + def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">; + def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">; + + let isReturn = 1, isTerminator = 1, isBarrier = 1 in { + def RETAA : AuthReturn<0b010, 0, "retaa">; + def RETAB : AuthReturn<0b010, 1, "retab">; + def ERETAA : AuthReturn<0b100, 0, "eretaa">; + def ERETAB : AuthReturn<0b100, 1, "eretab">; + } + + defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>; + defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>; + + // v8.3a floating point conversion for javascript + let Predicates = [HasV8_3a, HasFPARMv8] in + def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32, + "fjcvtzs", []> { + let Inst{31} = 0; + } + +} // HasV8_3a + +// v8.4 Flag manipulation instructions +let Predicates = [HasV8_4a] in { +def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> { + let Inst{20-5} = 0b0000001000000000; +} +def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">; +def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">; +def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif", + "{\t$Rn, $imm, $mask}">; +} // HasV8_4a + +def : InstAlias<"clrex", (CLREX 0xf)>; +def : InstAlias<"isb", (ISB 0xf)>; + +def MRS : MRSI; +def MSR : MSRI; +def MSRpstateImm1 : MSRpstateImm0_1; +def MSRpstateImm4 : MSRpstateImm0_15; + +// The thread pointer (on Linux, at least, where this has been implemented) is +// TPIDR_EL0. +def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), + [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>; + +// The cycle counter PMC register is PMCCNTR_EL0. +let Predicates = [HasPerfMon] in +def : Pat<(readcyclecounter), (MRS 0xdce8)>; + +// FPCR register +def : Pat<(i64 (int_aarch64_get_fpcr)), (MRS 0xda20)>; + +// Generic system instructions +def SYSxt : SystemXtI<0, "sys">; +def SYSLxt : SystemLXtI<1, "sysl">; + +def : InstAlias<"sys $op1, $Cn, $Cm, $op2", + (SYSxt imm0_7:$op1, sys_cr_op:$Cn, + sys_cr_op:$Cm, imm0_7:$op2, XZR)>; + +//===----------------------------------------------------------------------===// +// Move immediate instructions. +//===----------------------------------------------------------------------===// + +defm MOVK : InsertImmediate<0b11, "movk">; +defm MOVN : MoveImmediate<0b00, "movn">; + +let PostEncoderMethod = "fixMOVZ" in +defm MOVZ : MoveImmediate<0b10, "movz">; + +// First group of aliases covers an implicit "lsl #0". +def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0), 0>; +def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0), 0>; +def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>; +def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>; +def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>; +def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>; + +// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; + +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; + +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g3:$sym, 48), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0), 0>; + +def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; +def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; + +def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; +def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; + +def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0), 0>; + +// Final group of aliases covers true "mov $Rd, $imm" cases. +multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR, + int width, int shift> { + def _asmoperand : AsmOperandClass { + let Name = basename # width # "_lsl" # shift # "MovAlias"; + let PredicateMethod = "is" # basename # "MovAlias<" # width # ", " + # shift # ">"; + let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">"; + } + + def _movimm : Operand<i32> { + let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand"); + } + + def : InstAlias<"mov $Rd, $imm", + (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>; +} + +defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>; +defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>; + +defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>; +defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>; +defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>; +defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>; + +defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>; +defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>; + +defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>; +defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>; +defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>; +defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>; + +let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, + isAsCheapAsAMove = 1 in { +// FIXME: The following pseudo instructions are only needed because remat +// cannot handle multiple instructions. When that changes, we can select +// directly to the real instructions and get rid of these pseudos. + +def MOVi32imm + : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), + [(set GPR32:$dst, imm:$src)]>, + Sched<[WriteImm]>; +def MOVi64imm + : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), + [(set GPR64:$dst, imm:$src)]>, + Sched<[WriteImm]>; +} // isReMaterializable, isCodeGenOnly + +// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the +// eventual expansion code fewer bits to worry about getting right. Marshalling +// the types is a little tricky though: +def i64imm_32bit : ImmLeaf<i64, [{ + return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); +}]>; + +def s64imm_32bit : ImmLeaf<i64, [{ + int64_t Imm64 = static_cast<int64_t>(Imm); + return Imm64 >= std::numeric_limits<int32_t>::min() && + Imm64 <= std::numeric_limits<int32_t>::max(); +}]>; + +def trunc_imm : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">, + GISDNodeXFormEquiv<trunc_imm>; + +def : Pat<(i64 i64imm_32bit:$src), + (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; + +// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model). +def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ +return CurDAG->getTargetConstant( + N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ +return CurDAG->getTargetConstant( + N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); +}]>; + + +def : Pat<(f32 fpimm:$in), + (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>; +def : Pat<(f64 fpimm:$in), + (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>; + + +// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK +// sequences. +def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, + tglobaladdr:$g1, tglobaladdr:$g0), + (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0), + tglobaladdr:$g1, 16), + tglobaladdr:$g2, 32), + tglobaladdr:$g3, 48)>; + +def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, + tblockaddress:$g1, tblockaddress:$g0), + (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0), + tblockaddress:$g1, 16), + tblockaddress:$g2, 32), + tblockaddress:$g3, 48)>; + +def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2, + tconstpool:$g1, tconstpool:$g0), + (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0), + tconstpool:$g1, 16), + tconstpool:$g2, 32), + tconstpool:$g3, 48)>; + +def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2, + tjumptable:$g1, tjumptable:$g0), + (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0), + tjumptable:$g1, 16), + tjumptable:$g2, 32), + tjumptable:$g3, 48)>; + + +//===----------------------------------------------------------------------===// +// Arithmetic instructions. +//===----------------------------------------------------------------------===// + +// Add/subtract with carry. +defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>; +defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>; + +def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; +def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; +def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; +def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; + +// Add/subtract +defm ADD : AddSub<0, "add", "sub", add>; +defm SUB : AddSub<1, "sub", "add">; + +def : InstAlias<"mov $dst, $src", + (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; +def : InstAlias<"mov $dst, $src", + (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>; +def : InstAlias<"mov $dst, $src", + (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>; +def : InstAlias<"mov $dst, $src", + (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; + +defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">; +defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; + +// Use SUBS instead of SUB to enable CSE between SUBS and SUB. +def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), + (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; +def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), + (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; +def : Pat<(sub GPR32:$Rn, GPR32:$Rm), + (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(sub GPR64:$Rn, GPR64:$Rm), + (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; +def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), + (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; +def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), + (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; +let AddedComplexity = 1 in { +def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3), + (SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>; +def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3), + (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>; +} + +// Because of the immediate format for add/sub-imm instructions, the +// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). +// These patterns capture that transformation. +let AddedComplexity = 1 in { +def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), + (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; +def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), + (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; +def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), + (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; +def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), + (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; +} + +// Because of the immediate format for add/sub-imm instructions, the +// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). +// These patterns capture that transformation. +let AddedComplexity = 1 in { +def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), + (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; +def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), + (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; +def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), + (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; +def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), + (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; +} + +def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; +def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; +def : InstAlias<"neg $dst, $src$shift", + (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; +def : InstAlias<"neg $dst, $src$shift", + (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; + +def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; +def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; +def : InstAlias<"negs $dst, $src$shift", + (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; +def : InstAlias<"negs $dst, $src$shift", + (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; + + +// Unsigned/Signed divide +defm UDIV : Div<0, "udiv", udiv>; +defm SDIV : Div<1, "sdiv", sdiv>; + +def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>; +def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>; + +// Variable shift +defm ASRV : Shift<0b10, "asr", sra>; +defm LSLV : Shift<0b00, "lsl", shl>; +defm LSRV : Shift<0b01, "lsr", srl>; +defm RORV : Shift<0b11, "ror", rotr>; + +def : ShiftAlias<"asrv", ASRVWr, GPR32>; +def : ShiftAlias<"asrv", ASRVXr, GPR64>; +def : ShiftAlias<"lslv", LSLVWr, GPR32>; +def : ShiftAlias<"lslv", LSLVXr, GPR64>; +def : ShiftAlias<"lsrv", LSRVWr, GPR32>; +def : ShiftAlias<"lsrv", LSRVXr, GPR64>; +def : ShiftAlias<"rorv", RORVWr, GPR32>; +def : ShiftAlias<"rorv", RORVXr, GPR64>; + +// Multiply-add +let AddedComplexity = 5 in { +defm MADD : MulAccum<0, "madd", add>; +defm MSUB : MulAccum<1, "msub", sub>; + +def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), + (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; +def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), + (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; + +def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), + (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; +def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), + (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; +def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)), + (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; +def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)), + (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; +} // AddedComplexity = 5 + +let AddedComplexity = 5 in { +def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; +def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; +def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; +def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; + +def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), + (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; +def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), + (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; + +def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), + (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; +def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), + (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; + +def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))), + (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; +def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))), + (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; +def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))), + (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), + (MOVi32imm (trunc_imm imm:$C)), XZR)>; + +def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), + (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; +def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), + (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; +def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))), + (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), + (MOVi32imm (trunc_imm imm:$C)), XZR)>; + +def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)), + (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; +def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)), + (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; +def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)), + GPR64:$Ra)), + (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), + (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; + +def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), + (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; +def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), + (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; +def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32), + (s64imm_32bit:$C)))), + (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), + (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; +} // AddedComplexity = 5 + +def : MulAccumWAlias<"mul", MADDWrrr>; +def : MulAccumXAlias<"mul", MADDXrrr>; +def : MulAccumWAlias<"mneg", MSUBWrrr>; +def : MulAccumXAlias<"mneg", MSUBXrrr>; +def : WideMulAccumAlias<"smull", SMADDLrrr>; +def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; +def : WideMulAccumAlias<"umull", UMADDLrrr>; +def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; + +// Multiply-high +def SMULHrr : MulHi<0b010, "smulh", mulhs>; +def UMULHrr : MulHi<0b110, "umulh", mulhu>; + +// CRC32 +def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">; +def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">; +def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">; +def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">; + +def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">; +def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; +def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; +def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; + +// v8.1 atomic CAS +defm CAS : CompareAndSwap<0, 0, "">; +defm CASA : CompareAndSwap<1, 0, "a">; +defm CASL : CompareAndSwap<0, 1, "l">; +defm CASAL : CompareAndSwap<1, 1, "al">; + +// v8.1 atomic CASP +defm CASP : CompareAndSwapPair<0, 0, "">; +defm CASPA : CompareAndSwapPair<1, 0, "a">; +defm CASPL : CompareAndSwapPair<0, 1, "l">; +defm CASPAL : CompareAndSwapPair<1, 1, "al">; + +// v8.1 atomic SWP +defm SWP : Swap<0, 0, "">; +defm SWPA : Swap<1, 0, "a">; +defm SWPL : Swap<0, 1, "l">; +defm SWPAL : Swap<1, 1, "al">; + +// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register) +defm LDADD : LDOPregister<0b000, "add", 0, 0, "">; +defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">; +defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">; +defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">; + +defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">; +defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">; +defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">; +defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">; + +defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">; +defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">; +defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">; +defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">; + +defm LDSET : LDOPregister<0b011, "set", 0, 0, "">; +defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">; +defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">; +defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">; + +defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">; +defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">; +defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">; +defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">; + +defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">; +defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">; +defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">; +defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">; + +defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">; +defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">; +defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">; +defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">; + +defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">; +defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">; +defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">; +defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">; + +// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR" +defm : STOPregister<"stadd","LDADD">; // STADDx +defm : STOPregister<"stclr","LDCLR">; // STCLRx +defm : STOPregister<"steor","LDEOR">; // STEORx +defm : STOPregister<"stset","LDSET">; // STSETx +defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx +defm : STOPregister<"stsmin","LDSMIN">;// STSMINx +defm : STOPregister<"stumax","LDUMAX">;// STUMAXx +defm : STOPregister<"stumin","LDUMIN">;// STUMINx + +//===----------------------------------------------------------------------===// +// Logical instructions. +//===----------------------------------------------------------------------===// + +// (immediate) +defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">; +defm AND : LogicalImm<0b00, "and", and, "bic">; +defm EOR : LogicalImm<0b10, "eor", xor, "eon">; +defm ORR : LogicalImm<0b01, "orr", or, "orn">; + +// FIXME: these aliases *are* canonical sometimes (when movz can't be +// used). Actually, it seems to be working right now, but putting logical_immXX +// here is a bit dodgy on the AsmParser side too. +def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, + logical_imm32:$imm), 0>; +def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, + logical_imm64:$imm), 0>; + + +// (register) +defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; +defm BICS : LogicalRegS<0b11, 1, "bics", + BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>; +defm AND : LogicalReg<0b00, 0, "and", and>; +defm BIC : LogicalReg<0b00, 1, "bic", + BinOpFrag<(and node:$LHS, (not node:$RHS))>>; +defm EON : LogicalReg<0b10, 1, "eon", + BinOpFrag<(not (xor node:$LHS, node:$RHS))>>; +defm EOR : LogicalReg<0b10, 0, "eor", xor>; +defm ORN : LogicalReg<0b01, 1, "orn", + BinOpFrag<(or node:$LHS, (not node:$RHS))>>; +defm ORR : LogicalReg<0b01, 0, "orr", or>; + +def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>; +def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>; + +def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>; +def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>; + +def : InstAlias<"mvn $Wd, $Wm$sh", + (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>; +def : InstAlias<"mvn $Xd, $Xm$sh", + (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>; + +def : InstAlias<"tst $src1, $src2", + (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>; +def : InstAlias<"tst $src1, $src2", + (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>; + +def : InstAlias<"tst $src1, $src2", + (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>; +def : InstAlias<"tst $src1, $src2", + (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>; + +def : InstAlias<"tst $src1, $src2$sh", + (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>; +def : InstAlias<"tst $src1, $src2$sh", + (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>; + + +def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; +def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; + + +//===----------------------------------------------------------------------===// +// One operand data processing instructions. +//===----------------------------------------------------------------------===// + +defm CLS : OneOperandData<0b101, "cls">; +defm CLZ : OneOperandData<0b100, "clz", ctlz>; +defm RBIT : OneOperandData<0b000, "rbit", bitreverse>; + +def REV16Wr : OneWRegData<0b001, "rev16", + UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; +def REV16Xr : OneXRegData<0b001, "rev16", null_frag>; + +def : Pat<(cttz GPR32:$Rn), + (CLZWr (RBITWr GPR32:$Rn))>; +def : Pat<(cttz GPR64:$Rn), + (CLZXr (RBITXr GPR64:$Rn))>; +def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)), + (i32 1))), + (CLSWr GPR32:$Rn)>; +def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), + (i64 1))), + (CLSXr GPR64:$Rn)>; + +// Unlike the other one operand instructions, the instructions with the "rev" +// mnemonic do *not* just different in the size bit, but actually use different +// opcode bits for the different sizes. +def REVWr : OneWRegData<0b010, "rev", bswap>; +def REVXr : OneXRegData<0b011, "rev", bswap>; +def REV32Xr : OneXRegData<0b010, "rev32", + UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; + +def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>; + +// The bswap commutes with the rotr so we want a pattern for both possible +// orders. +def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; +def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; + +//===----------------------------------------------------------------------===// +// Bitfield immediate extraction instruction. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in +defm EXTR : ExtractImm<"extr">; +def : InstAlias<"ror $dst, $src, $shift", + (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; +def : InstAlias<"ror $dst, $src, $shift", + (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; + +def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), + (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; +def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), + (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; + +//===----------------------------------------------------------------------===// +// Other bitfield immediate instructions. +//===----------------------------------------------------------------------===// +let hasSideEffects = 0 in { +defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; +defm SBFM : BitfieldImm<0b00, "sbfm">; +defm UBFM : BitfieldImm<0b10, "ubfm">; +} + +def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{ + uint64_t enc = (32 - N->getZExtValue()) & 0x1f; + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{ + uint64_t enc = 31 - N->getZExtValue(); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +// min(7, 31 - shift_amt) +def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ + uint64_t enc = 31 - N->getZExtValue(); + enc = enc > 7 ? 7 : enc; + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +// min(15, 31 - shift_amt) +def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ + uint64_t enc = 31 - N->getZExtValue(); + enc = enc > 15 ? 15 : enc; + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{ + uint64_t enc = (64 - N->getZExtValue()) & 0x3f; + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{ + uint64_t enc = 63 - N->getZExtValue(); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +// min(7, 63 - shift_amt) +def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ + uint64_t enc = 63 - N->getZExtValue(); + enc = enc > 7 ? 7 : enc; + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +// min(15, 63 - shift_amt) +def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ + uint64_t enc = 63 - N->getZExtValue(); + enc = enc > 15 ? 15 : enc; + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +// min(31, 63 - shift_amt) +def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{ + uint64_t enc = 63 - N->getZExtValue(); + enc = enc > 31 ? 31 : enc; + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); +}]>; + +def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), + (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), + (i64 (i32shift_b imm0_31:$imm)))>; +def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), + (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), + (i64 (i64shift_b imm0_63:$imm)))>; + +let AddedComplexity = 10 in { +def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), + (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; +def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), + (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; +} + +def : InstAlias<"asr $dst, $src, $shift", + (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; +def : InstAlias<"asr $dst, $src, $shift", + (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; +def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; +def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; +def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; +def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; +def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; + +def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), + (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; +def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), + (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; + +def : InstAlias<"lsr $dst, $src, $shift", + (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; +def : InstAlias<"lsr $dst, $src, $shift", + (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; +def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; +def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; +def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; +def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; +def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; + +//===----------------------------------------------------------------------===// +// Conditional comparison instructions. +//===----------------------------------------------------------------------===// +defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; +defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; + +//===----------------------------------------------------------------------===// +// Conditional select instructions. +//===----------------------------------------------------------------------===// +defm CSEL : CondSelect<0, 0b00, "csel">; + +def inc : PatFrag<(ops node:$in), (add node:$in, 1)>; +defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; +defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; +defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; + +def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), + (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; +def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), + (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; +def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), + (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; +def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), + (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; +def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), + (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; +def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), + (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; + +def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), + (CSINCWr WZR, WZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), + (CSINCXr XZR, XZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV), + (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV), + (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV), + (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; +def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV), + (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; +def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), + (CSINVWr WZR, WZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), + (CSINVXr XZR, XZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV), + (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV), + (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV), + (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; +def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV), + (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; + +// The inverse of the condition code from the alias instruction is what is used +// in the aliased instruction. The parser all ready inverts the condition code +// for these aliases. +def : InstAlias<"cset $dst, $cc", + (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; +def : InstAlias<"cset $dst, $cc", + (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; + +def : InstAlias<"csetm $dst, $cc", + (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; +def : InstAlias<"csetm $dst, $cc", + (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; + +def : InstAlias<"cinc $dst, $src, $cc", + (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; +def : InstAlias<"cinc $dst, $src, $cc", + (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; + +def : InstAlias<"cinv $dst, $src, $cc", + (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; +def : InstAlias<"cinv $dst, $src, $cc", + (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; + +def : InstAlias<"cneg $dst, $src, $cc", + (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; +def : InstAlias<"cneg $dst, $src, $cc", + (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; + +//===----------------------------------------------------------------------===// +// PC-relative instructions. +//===----------------------------------------------------------------------===// +let isReMaterializable = 1 in { +let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { +def ADR : ADRI<0, "adr", adrlabel, []>; +} // hasSideEffects = 0 + +def ADRP : ADRI<1, "adrp", adrplabel, + [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>; +} // isReMaterializable = 1 + +// page address of a constant pool entry, block address +def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; +def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; +def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>; + +//===----------------------------------------------------------------------===// +// Unconditional branch (register) instructions. +//===----------------------------------------------------------------------===// + +let isReturn = 1, isTerminator = 1, isBarrier = 1 in { +def RET : BranchReg<0b0010, "ret", []>; +def DRPS : SpecialReturn<0b0101, "drps">; +def ERET : SpecialReturn<0b0100, "eret">; +} // isReturn = 1, isTerminator = 1, isBarrier = 1 + +// Default to the LR register. +def : InstAlias<"ret", (RET LR)>; + +let isCall = 1, Defs = [LR], Uses = [SP] in { +def BLR : BranchReg<0b0001, "blr", [(AArch64call GPR64:$Rn)]>; +} // isCall + +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { +def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; +} // isBranch, isTerminator, isBarrier, isIndirectBranch + +// Create a separate pseudo-instruction for codegen to use so that we don't +// flag lr as used in every function. It'll be restored before the RET by the +// epilogue if it's legitimately used. +def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]>, + Sched<[WriteBrReg]> { + let isTerminator = 1; + let isBarrier = 1; + let isReturn = 1; +} + +// This is a directive-like pseudo-instruction. The purpose is to insert an +// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction +// (which in the usual case is a BLR). +let hasSideEffects = 1 in +def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> { + let AsmString = ".tlsdesccall $sym"; +} + +// FIXME: maybe the scratch register used shouldn't be fixed to X1? +// FIXME: can "hasSideEffects be dropped? +let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, + isCodeGenOnly = 1 in +def TLSDESC_CALLSEQ + : Pseudo<(outs), (ins i64imm:$sym), + [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>, + Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>; +def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), + (TLSDESC_CALLSEQ texternalsym:$sym)>; + +//===----------------------------------------------------------------------===// +// Conditional branch (immediate) instruction. +//===----------------------------------------------------------------------===// +def Bcc : BranchCond; + +//===----------------------------------------------------------------------===// +// Compare-and-branch instructions. +//===----------------------------------------------------------------------===// +defm CBZ : CmpBranch<0, "cbz", AArch64cbz>; +defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>; + +//===----------------------------------------------------------------------===// +// Test-bit-and-branch instructions. +//===----------------------------------------------------------------------===// +defm TBZ : TestBranch<0, "tbz", AArch64tbz>; +defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>; + +//===----------------------------------------------------------------------===// +// Unconditional branch (immediate) instructions. +//===----------------------------------------------------------------------===// +let isBranch = 1, isTerminator = 1, isBarrier = 1 in { +def B : BranchImm<0, "b", [(br bb:$addr)]>; +} // isBranch, isTerminator, isBarrier + +let isCall = 1, Defs = [LR], Uses = [SP] in { +def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>; +} // isCall +def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; + +//===----------------------------------------------------------------------===// +// Exception generation instructions. +//===----------------------------------------------------------------------===// +let isTrap = 1 in { +def BRK : ExceptionGeneration<0b001, 0b00, "brk">; +} +def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; +def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; +def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">; +def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; +def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; +def SMC : ExceptionGeneration<0b000, 0b11, "smc">; +def SVC : ExceptionGeneration<0b000, 0b01, "svc">; + +// DCPSn defaults to an immediate operand of zero if unspecified. +def : InstAlias<"dcps1", (DCPS1 0)>; +def : InstAlias<"dcps2", (DCPS2 0)>; +def : InstAlias<"dcps3", (DCPS3 0)>; + +//===----------------------------------------------------------------------===// +// Load instructions. +//===----------------------------------------------------------------------===// + +// Pair (indexed, offset) +defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">; +defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">; +defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">; +defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">; +defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">; + +defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">; + +// Pair (pre-indexed) +def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">; +def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">; +def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; +def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; +def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; + +def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; + +// Pair (post-indexed) +def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">; +def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">; +def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; +def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; +def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; + +def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; + + +// Pair (no allocate) +defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">; +defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">; +defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">; +defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">; +defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">; + +//--- +// (register offset) +//--- + +// Integer +defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>; +defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>; +defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>; +defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; + +// Floating-point +defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", untyped, load>; +defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>; +defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>; +defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>; +defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>; + +// Load sign-extended half-word +defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>; +defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>; + +// Load sign-extended byte +defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>; +defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>; + +// Load sign-extended word +defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>; + +// Pre-fetch. +defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">; + +// For regular load, we do not have any alignment requirement. +// Thus, it is safe to directly map the vector loads with interesting +// addressing modes. +// FIXME: We could do the same for bitconvert to floating point vectors. +multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop, + ValueType ScalTy, ValueType VecTy, + Instruction LOADW, Instruction LOADX, + SubRegIndex sub> { + def : Pat<(VecTy (scalar_to_vector (ScalTy + (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))), + (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), + (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset), + sub)>; + + def : Pat<(VecTy (scalar_to_vector (ScalTy + (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))), + (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), + (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset), + sub)>; +} + +let AddedComplexity = 10 in { +defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>; +defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>; + +defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>; +defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>; + +defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>; +defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>; + +defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>; +defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>; + +defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>; +defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>; + +defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>; + +defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>; + + +def : Pat <(v1i64 (scalar_to_vector (i64 + (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend64:$extend))))), + (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; + +def : Pat <(v1i64 (scalar_to_vector (i64 + (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend64:$extend))))), + (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; +} + +// Match all load 64 bits width whose type is compatible with FPR64 +multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy, + Instruction LOADW, Instruction LOADX> { + + def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), + (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; + + def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), + (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; +} + +let AddedComplexity = 10 in { +let Predicates = [IsLE] in { + // We must do vector loads with LD1 in big-endian. + defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>; + defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>; + defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>; + defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>; + defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>; +} + +defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>; +defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>; + +// Match all load 128 bits width whose type is compatible with FPR128 +let Predicates = [IsLE] in { + // We must do vector loads with LD1 in big-endian. + defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>; + defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>; + defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>; + defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>; + defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>; + defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>; + defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>; +} +} // AddedComplexity = 10 + +// zextload -> i64 +multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop, + Instruction INSTW, Instruction INSTX> { + def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), + (SUBREG_TO_REG (i64 0), + (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), + sub_32)>; + + def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), + (SUBREG_TO_REG (i64 0), + (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), + sub_32)>; +} + +let AddedComplexity = 10 in { + defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>; + defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>; + defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>; + + // zextloadi1 -> zextloadi8 + defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; + + // extload -> zextload + defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; + defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; + defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; + + // extloadi1 -> zextloadi8 + defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>; +} + + +// zextload -> i64 +multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop, + Instruction INSTW, Instruction INSTX> { + def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), + (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; + + def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), + (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; + +} + +let AddedComplexity = 10 in { + // extload -> zextload + defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; + defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; + defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; + + // zextloadi1 -> zextloadi8 + defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; +} + +//--- +// (unsigned immediate) +//--- +defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr", + [(set GPR64z:$Rt, + (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; +defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr", + [(set GPR32z:$Rt, + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; +defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr", + [(set FPR8Op:$Rt, + (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; +defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr", + [(set (f16 FPR16Op:$Rt), + (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; +defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr", + [(set (f32 FPR32Op:$Rt), + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; +defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr", + [(set (f64 FPR64Op:$Rt), + (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; +defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr", + [(set (f128 FPR128Op:$Rt), + (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; + +// For regular load, we do not have any alignment requirement. +// Thus, it is safe to directly map the vector loads with interesting +// addressing modes. +// FIXME: We could do the same for bitconvert to floating point vectors. +def : Pat <(v8i8 (scalar_to_vector (i32 + (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), + (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; +def : Pat <(v16i8 (scalar_to_vector (i32 + (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; +def : Pat <(v4i16 (scalar_to_vector (i32 + (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; +def : Pat <(v8i16 (scalar_to_vector (i32 + (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; +def : Pat <(v2i32 (scalar_to_vector (i32 + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), + (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), + (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; +def : Pat <(v4i32 (scalar_to_vector (i32 + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; +def : Pat <(v1i64 (scalar_to_vector (i64 + (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat <(v2i64 (scalar_to_vector (i64 + (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>; + +// Match all load 64 bits width whose type is compatible with FPR64 +let Predicates = [IsLE] in { + // We must use LD1 to perform vector loads in big-endian. + def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; +} +def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; + +// Match all load 128 bits width whose type is compatible with FPR128 +let Predicates = [IsLE] in { + // We must use LD1 to perform vector loads in big-endian. + def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; +} +def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + +defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh", + [(set GPR32:$Rt, + (zextloadi16 (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)))]>; +defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb", + [(set GPR32:$Rt, + (zextloadi8 (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)))]>; +// zextload -> i64 +def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; +def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), + (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; + +// zextloadi1 -> zextloadi8 +def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; + +// extload -> zextload +def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), + (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), + (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; +def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), + (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; +def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; +def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; + +// load sign-extended half-word +defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh", + [(set GPR32:$Rt, + (sextloadi16 (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)))]>; +defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh", + [(set GPR64:$Rt, + (sextloadi16 (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)))]>; + +// load sign-extended byte +defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb", + [(set GPR32:$Rt, + (sextloadi8 (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)))]>; +defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb", + [(set GPR64:$Rt, + (sextloadi8 (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)))]>; + +// load sign-extended word +defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", + [(set GPR64:$Rt, + (sextloadi32 (am_indexed32 GPR64sp:$Rn, + uimm12s4:$offset)))]>; + +// load zero-extended word +def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), + (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; + +// Pre-fetch. +def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", + [(AArch64Prefetch imm:$Rt, + (am_indexed64 GPR64sp:$Rn, + uimm12s8:$offset))]>; + +def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; + +//--- +// (literal) +def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr">; +def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr">; +def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr">; +def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr">; +def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr">; + +// load sign-extended word +def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw">; + +// prefetch +def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; +// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>; + +//--- +// (unscaled immediate) +defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur", + [(set GPR64z:$Rt, + (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur", + [(set GPR32z:$Rt, + (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur", + [(set FPR8Op:$Rt, + (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur", + [(set FPR16Op:$Rt, + (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur", + [(set (f32 FPR32Op:$Rt), + (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur", + [(set (f64 FPR64Op:$Rt), + (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur", + [(set (f128 FPR128Op:$Rt), + (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>; + +defm LDURHH + : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh", + [(set GPR32:$Rt, + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURBB + : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb", + [(set GPR32:$Rt, + (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; + +// Match all load 64 bits width whose type is compatible with FPR64 +let Predicates = [IsLE] in { + def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; +} +def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + +// Match all load 128 bits width whose type is compatible with FPR128 +let Predicates = [IsLE] in { + def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; +} + +// anyext -> zext +def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (LDURHHi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; +// unscaled zext +def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (LDURHHi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; + + +//--- +// LDR mnemonics fall back to LDUR for negative or unaligned offsets. + +// Define new assembler match classes as we want to only match these when +// the don't otherwise match the scaled addressing mode for LDR/STR. Don't +// associate a DiagnosticType either, as we want the diagnostic for the +// canonical form (the scaled operand) to take precedence. +class SImm9OffsetOperand<int Width> : AsmOperandClass { + let Name = "SImm9OffsetFB" # Width; + let PredicateMethod = "isSImm9OffsetFB<" # Width # ">"; + let RenderMethod = "addImmOperands"; +} + +def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>; +def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>; +def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>; +def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>; +def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>; + +def simm9_offset_fb8 : Operand<i64> { + let ParserMatchClass = SImm9OffsetFB8Operand; +} +def simm9_offset_fb16 : Operand<i64> { + let ParserMatchClass = SImm9OffsetFB16Operand; +} +def simm9_offset_fb32 : Operand<i64> { + let ParserMatchClass = SImm9OffsetFB32Operand; +} +def simm9_offset_fb64 : Operand<i64> { + let ParserMatchClass = SImm9OffsetFB64Operand; +} +def simm9_offset_fb128 : Operand<i64> { + let ParserMatchClass = SImm9OffsetFB128Operand; +} + +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; + +// zextload -> i64 +def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; + +// load sign-extended half-word +defm LDURSHW + : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh", + [(set GPR32:$Rt, + (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURSHX + : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh", + [(set GPR64:$Rt, + (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; + +// load sign-extended byte +defm LDURSBW + : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb", + [(set GPR32:$Rt, + (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURSBX + : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb", + [(set GPR64:$Rt, + (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; + +// load sign-extended word +defm LDURSW + : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw", + [(set GPR64:$Rt, + (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; + +// zero and sign extending aliases from generic LDR* mnemonics to LDUR*. +def : InstAlias<"ldrb $Rt, [$Rn, $offset]", + (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"ldrh $Rt, [$Rn, $offset]", + (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", + (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", + (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", + (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", + (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"ldrsw $Rt, [$Rn, $offset]", + (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; + +// Pre-fetch. +defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", + [(AArch64Prefetch imm:$Rt, + (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; + +//--- +// (unscaled immediate, unprivileged) +defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; +defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; + +defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; +defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; + +// load sign-extended half-word +defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; +defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; + +// load sign-extended byte +defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; +defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; + +// load sign-extended word +defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; + +//--- +// (immediate pre-indexed) +def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">; +def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">; +def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op, "ldr">; +def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">; +def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">; +def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">; +def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">; + +// load sign-extended half-word +def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; +def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; + +// load sign-extended byte +def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; +def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; + +// load zero-extended byte +def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">; +def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">; + +// load sign-extended word +def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; + +//--- +// (immediate post-indexed) +def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">; +def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">; +def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op, "ldr">; +def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">; +def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">; +def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">; +def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">; + +// load sign-extended half-word +def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; +def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; + +// load sign-extended byte +def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; +def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; + +// load zero-extended byte +def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">; +def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">; + +// load sign-extended word +def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; + +//===----------------------------------------------------------------------===// +// Store instructions. +//===----------------------------------------------------------------------===// + +// Pair (indexed, offset) +// FIXME: Use dedicated range-checked addressing mode operand here. +defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">; +defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">; +defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">; +defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">; +defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">; + +// Pair (pre-indexed) +def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">; +def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">; +def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">; +def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">; +def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">; + +// Pair (pre-indexed) +def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">; +def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">; +def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">; +def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">; +def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">; + +// Pair (no allocate) +defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">; +defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">; +defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">; +defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">; +defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">; + +//--- +// (Register offset) + +// Integer +defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>; +defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>; +defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>; +defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>; + + +// Floating-point +defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", untyped, store>; +defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>; +defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>; +defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>; +defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str", f128, store>; + +let Predicates = [UseSTRQro], AddedComplexity = 10 in { + def : Pat<(store (f128 FPR128:$Rt), + (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend128:$extend)), + (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>; + def : Pat<(store (f128 FPR128:$Rt), + (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend128:$extend)), + (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>; +} + +multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop, + Instruction STRW, Instruction STRX> { + + def : Pat<(storeop GPR64:$Rt, + (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), + (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32), + GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; + + def : Pat<(storeop GPR64:$Rt, + (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), + (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32), + GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; +} + +let AddedComplexity = 10 in { + // truncstore i64 + defm : TruncStoreFrom64ROPat<ro8, truncstorei8, STRBBroW, STRBBroX>; + defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>; + defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW, STRWroX>; +} + +multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR, + Instruction STRW, Instruction STRX> { + def : Pat<(store (VecTy FPR:$Rt), + (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), + (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; + + def : Pat<(store (VecTy FPR:$Rt), + (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), + (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; +} + +let AddedComplexity = 10 in { +// Match all store 64 bits width whose type is compatible with FPR64 +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>; + defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>; + defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>; + defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>; + defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>; +} + +defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>; +defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>; + +// Match all store 128 bits width whose type is compatible with FPR128 +let Predicates = [IsLE, UseSTRQro] in { + // We must use ST1 to store vectors in big-endian. + defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>; + defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>; + defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>; + defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>; + defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>; + defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>; + defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>; +} +} // AddedComplexity = 10 + +// Match stores from lane 0 to the appropriate subreg's store. +multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop, + ValueType VecTy, ValueType STy, + SubRegIndex SubRegIdx, + Instruction STRW, Instruction STRX> { + + def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)), + (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), + (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), + GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; + + def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)), + (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), + (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), + GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; +} + +let AddedComplexity = 19 in { + defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>; + defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, hsub, STRHroW, STRHroX>; + defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, ssub, STRSroW, STRSroX>; + defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, ssub, STRSroW, STRSroX>; + defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, dsub, STRDroW, STRDroX>; + defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, dsub, STRDroW, STRDroX>; +} + +//--- +// (unsigned immediate) +defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str", + [(store GPR64z:$Rt, + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; +defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str", + [(store GPR32z:$Rt, + (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; +defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str", + [(store FPR8Op:$Rt, + (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; +defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str", + [(store (f16 FPR16Op:$Rt), + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; +defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str", + [(store (f32 FPR32Op:$Rt), + (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; +defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str", + [(store (f64 FPR64Op:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; +defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>; + +defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh", + [(truncstorei16 GPR32z:$Rt, + (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset))]>; +defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb", + [(truncstorei8 GPR32z:$Rt, + (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset))]>; + +let AddedComplexity = 10 in { + +// Match all store 64 bits width whose type is compatible with FPR64 +def : Pat<(store (v1i64 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(store (v1f64 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; + +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + def : Pat<(store (v2f32 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(store (v8i8 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(store (v4i16 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(store (v2i32 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(store (v4f16 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; +} + +// Match all store 128 bits width whose type is compatible with FPR128 +def : Pat<(store (f128 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + def : Pat<(store (v4f32 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v2f64 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v16i8 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v8i16 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v4i32 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v2i64 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v8f16 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; +} + +// truncstore i64 +def : Pat<(truncstorei32 GPR64:$Rt, + (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), + (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>; +def : Pat<(truncstorei16 GPR64:$Rt, + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), + (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)), + (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>; + +} // AddedComplexity = 10 + +// Match stores from lane 0 to the appropriate subreg's store. +multiclass VecStoreLane0Pat<Operand UIAddrMode, SDPatternOperator storeop, + ValueType VTy, ValueType STy, + SubRegIndex SubRegIdx, Operand IndexType, + Instruction STR> { + def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), 0)), + (UIAddrMode GPR64sp:$Rn, IndexType:$offset)), + (STR (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), + GPR64sp:$Rn, IndexType:$offset)>; +} + +let AddedComplexity = 19 in { + defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, hsub, uimm12s2, STRHui>; + defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, hsub, uimm12s2, STRHui>; + defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, ssub, uimm12s4, STRSui>; + defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, ssub, uimm12s4, STRSui>; + defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, dsub, uimm12s8, STRDui>; + defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, dsub, uimm12s8, STRDui>; +} + +//--- +// (unscaled immediate) +defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur", + [(store GPR64z:$Rt, + (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; +defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur", + [(store GPR32z:$Rt, + (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; +defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur", + [(store FPR8Op:$Rt, + (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; +defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur", + [(store (f16 FPR16Op:$Rt), + (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; +defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur", + [(store (f32 FPR32Op:$Rt), + (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; +defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur", + [(store (f64 FPR64Op:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; +defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur", + [(store (f128 FPR128Op:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>; +defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh", + [(truncstorei16 GPR32z:$Rt, + (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; +defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb", + [(truncstorei8 GPR32z:$Rt, + (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; + +// Armv8.4 LDAPR & STLR with Immediate Offset instruction +let Predicates = [HasV8_4a] in { +defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>; +defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>; +defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>; +defm STLURX : BaseStoreUnscaleV84<"stlur", 0b11, 0b00, GPR64>; +defm LDAPURB : BaseLoadUnscaleV84<"ldapurb", 0b00, 0b01, GPR32>; +defm LDAPURSBW : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>; +defm LDAPURSBX : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>; +defm LDAPURH : BaseLoadUnscaleV84<"ldapurh", 0b01, 0b01, GPR32>; +defm LDAPURSHW : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>; +defm LDAPURSHX : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>; +defm LDAPUR : BaseLoadUnscaleV84<"ldapur", 0b10, 0b01, GPR32>; +defm LDAPURSW : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>; +defm LDAPURX : BaseLoadUnscaleV84<"ldapur", 0b11, 0b01, GPR64>; +} + +// Match all store 64 bits width whose type is compatible with FPR64 +def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; + +let AddedComplexity = 10 in { + +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + def : Pat<(store (v2f32 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v8i8 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v4i16 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v2i32 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v4f16 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; +} + +// Match all store 128 bits width whose type is compatible with FPR128 +def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + def : Pat<(store (v4f32 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v2f64 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v16i8 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v8i16 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v4i32 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v2i64 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v2f64 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v8f16 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; +} + +} // AddedComplexity = 10 + +// unscaled i64 truncating stores +def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), + (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), + (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), + (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; + +// Match stores from lane 0 to the appropriate subreg's store. +multiclass VecStoreULane0Pat<SDPatternOperator StoreOp, + ValueType VTy, ValueType STy, + SubRegIndex SubRegIdx, Instruction STR> { + defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegIdx, simm9, STR>; +} + +let AddedComplexity = 19 in { + defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, hsub, STURHi>; + defm : VecStoreULane0Pat<store, v8f16, f16, hsub, STURHi>; + defm : VecStoreULane0Pat<store, v4i32, i32, ssub, STURSi>; + defm : VecStoreULane0Pat<store, v4f32, f32, ssub, STURSi>; + defm : VecStoreULane0Pat<store, v2i64, i64, dsub, STURDi>; + defm : VecStoreULane0Pat<store, v2f64, f64, dsub, STURDi>; +} + +//--- +// STR mnemonics fall back to STUR for negative or unaligned offsets. +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; + +def : InstAlias<"strb $Rt, [$Rn, $offset]", + (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"strh $Rt, [$Rn, $offset]", + (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; + +//--- +// (unscaled immediate, unprivileged) +defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; +defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; + +defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; +defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; + +//--- +// (immediate pre-indexed) +def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>; +def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>; +def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, untyped>; +def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>; +def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>; +def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>; +def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>; + +def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8, i32>; +def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>; + +// truncstore i64 +def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; +def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; +def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; + +def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + +def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + +//--- +// (immediate post-indexed) +def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>; +def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>; +def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, untyped>; +def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>; +def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>; +def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>; +def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>; + +def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>; +def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>; + +// truncstore i64 +def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; +def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; +def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; + +def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + +def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + +//===----------------------------------------------------------------------===// +// Load/store exclusive instructions. +//===----------------------------------------------------------------------===// + +def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">; +def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">; +def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">; +def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">; + +def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">; +def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">; +def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">; +def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">; + +def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">; +def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">; +def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">; +def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">; + +def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">; +def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">; +def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">; +def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">; + +def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">; +def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">; +def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">; +def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">; + +def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">; +def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">; +def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">; +def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">; + +def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">; +def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">; + +def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">; +def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">; + +def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">; +def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; + +def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; +def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; + +let Predicates = [HasV8_1a] in { + // v8.1a "Limited Order Region" extension load-acquire instructions + def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">; + def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">; + def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">; + def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">; + + // v8.1a "Limited Order Region" extension store-release instructions + def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">; + def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">; + def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">; + def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">; +} + +//===----------------------------------------------------------------------===// +// Scaled floating point to integer conversion instructions. +//===----------------------------------------------------------------------===// + +defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>; +defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>; +defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>; +defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>; +defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>; +defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>; +defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>; +defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; +defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", fp_to_sint>; +defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", fp_to_uint>; +defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", fp_to_sint>; +defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", fp_to_uint>; + +multiclass FPToIntegerIntPats<Intrinsic round, string INST> { + def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>; + def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>; + def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>; + def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>; + def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>; + def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>; + + def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), + (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; + def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), + (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; + def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), + (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; + def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), + (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; + def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))), + (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; + def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))), + (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; +} + +defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">; +defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">; + +multiclass FPToIntegerPats<SDNode to_int, SDNode round, string INST> { + def : Pat<(i32 (to_int (round f32:$Rn))), + (!cast<Instruction>(INST # UWSr) f32:$Rn)>; + def : Pat<(i64 (to_int (round f32:$Rn))), + (!cast<Instruction>(INST # UXSr) f32:$Rn)>; + def : Pat<(i32 (to_int (round f64:$Rn))), + (!cast<Instruction>(INST # UWDr) f64:$Rn)>; + def : Pat<(i64 (to_int (round f64:$Rn))), + (!cast<Instruction>(INST # UXDr) f64:$Rn)>; +} + +defm : FPToIntegerPats<fp_to_sint, fceil, "FCVTPS">; +defm : FPToIntegerPats<fp_to_uint, fceil, "FCVTPU">; +defm : FPToIntegerPats<fp_to_sint, ffloor, "FCVTMS">; +defm : FPToIntegerPats<fp_to_uint, ffloor, "FCVTMU">; +defm : FPToIntegerPats<fp_to_sint, ftrunc, "FCVTZS">; +defm : FPToIntegerPats<fp_to_uint, ftrunc, "FCVTZU">; +defm : FPToIntegerPats<fp_to_sint, fround, "FCVTAS">; +defm : FPToIntegerPats<fp_to_uint, fround, "FCVTAU">; + +//===----------------------------------------------------------------------===// +// Scaled integer to floating point conversion instructions. +//===----------------------------------------------------------------------===// + +defm SCVTF : IntegerToFP<0, "scvtf", sint_to_fp>; +defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>; + +//===----------------------------------------------------------------------===// +// Unscaled integer to floating point conversion instruction. +//===----------------------------------------------------------------------===// + +defm FMOV : UnscaledConversion<"fmov">; + +// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable +let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in { +def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>, + Sched<[WriteF]>, Requires<[HasFullFP16]>; +def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, + Sched<[WriteF]>; +def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, + Sched<[WriteF]>; +} +// Similarly add aliases +def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>, + Requires<[HasFullFP16]>; +def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>; +def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>; + +//===----------------------------------------------------------------------===// +// Floating point conversion instruction. +//===----------------------------------------------------------------------===// + +defm FCVT : FPConversion<"fcvt">; + +//===----------------------------------------------------------------------===// +// Floating point single operand instructions. +//===----------------------------------------------------------------------===// + +defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>; +defm FMOV : SingleOperandFPData<0b0000, "fmov">; +defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>; +defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>; +defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>; +defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>; +defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_aarch64_neon_frintn>; +defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>; + +def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))), + (FRINTNDr FPR64:$Rn)>; + +defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>; +defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>; + +let SchedRW = [WriteFDiv] in { +defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>; +} + +//===----------------------------------------------------------------------===// +// Floating point two operand instructions. +//===----------------------------------------------------------------------===// + +defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>; +let SchedRW = [WriteFDiv] in { +defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>; +} +defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>; +defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaxnan>; +defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>; +defm FMIN : TwoOperandFPData<0b0101, "fmin", fminnan>; +let SchedRW = [WriteFMul] in { +defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>; +defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>; +} +defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>; + +def : Pat<(v1f64 (fmaxnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(v1f64 (fminnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FMINDrr FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; + +//===----------------------------------------------------------------------===// +// Floating point three operand instructions. +//===----------------------------------------------------------------------===// + +defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>; +defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", + TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; +defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", + TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >; +defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", + TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; + +// The following def pats catch the case where the LHS of an FMA is negated. +// The TriOpFrag above catches the case where the middle operand is negated. + +// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike +// the NEON variant. +def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), + (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; + +def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), + (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; + +// We handled -(a + b*c) for FNMADD above, now it's time for "(-a) + (-b)*c" and +// "(-a) + b*(-c)". +def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), + (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; + +def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), + (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; + +def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))), + (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; + +def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))), + (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; + +//===----------------------------------------------------------------------===// +// Floating point comparison instructions. +//===----------------------------------------------------------------------===// + +defm FCMPE : FPComparison<1, "fcmpe">; +defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>; + +//===----------------------------------------------------------------------===// +// Floating point conditional comparison instructions. +//===----------------------------------------------------------------------===// + +defm FCCMPE : FPCondComparison<1, "fccmpe">; +defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>; + +//===----------------------------------------------------------------------===// +// Floating point conditional select instruction. +//===----------------------------------------------------------------------===// + +defm FCSEL : FPCondSelect<"fcsel">; + +// CSEL instructions providing f128 types need to be handled by a +// pseudo-instruction since the eventual code will need to introduce basic +// blocks and control flow. +def F128CSEL : Pseudo<(outs FPR128:$Rd), + (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), + [(set (f128 FPR128:$Rd), + (AArch64csel FPR128:$Rn, FPR128:$Rm, + (i32 imm:$cond), NZCV))]> { + let Uses = [NZCV]; + let usesCustomInserter = 1; + let hasNoSchedulingInfo = 1; +} + + +//===----------------------------------------------------------------------===// +// Floating point immediate move. +//===----------------------------------------------------------------------===// + +let isReMaterializable = 1 in { +defm FMOV : FPMoveImmediate<"fmov">; +} + +//===----------------------------------------------------------------------===// +// Advanced SIMD two vector instructions. +//===----------------------------------------------------------------------===// + +defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", + int_aarch64_neon_uabd>; +// Match UABDL in log2-shuffle patterns. +def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)), + (zext (v8i8 V64:$opB))))), + (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; +def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), + (v8i16 (add (sub (zext (v8i8 V64:$opA)), + (zext (v8i8 V64:$opB))), + (AArch64vashr v8i16:$src, (i32 15))))), + (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; +def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 V128:$opA)), + (zext (extract_high_v16i8 V128:$opB))))), + (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; +def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), + (v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)), + (zext (extract_high_v16i8 V128:$opB))), + (AArch64vashr v8i16:$src, (i32 15))))), + (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; +def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)), + (zext (v4i16 V64:$opB))))), + (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; +def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 V128:$opA)), + (zext (extract_high_v8i16 V128:$opB))))), + (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>; +def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)), + (zext (v2i32 V64:$opB))))), + (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>; +def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 V128:$opA)), + (zext (extract_high_v4i32 V128:$opB))))), + (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>; + +defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>; +defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; +defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; +defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>; +defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>; +defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>; +defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>; +defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; +defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; +defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>; + +defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; +defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; +defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; +defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>; +defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; +defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>; +defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>; +defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; +def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))), + (FCVTLv4i16 V64:$Rn)>; +def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), + (i64 4)))), + (FCVTLv8i16 V128:$Rn)>; +def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; +def : Pat<(v2f64 (fpextend (v2f32 (extract_subvector (v4f32 V128:$Rn), + (i64 2))))), + (FCVTLv4i32 V128:$Rn)>; + +def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; +def : Pat<(v4f32 (fpextend (v4f16 (extract_subvector (v8f16 V128:$Rn), + (i64 4))))), + (FCVTLv8i16 V128:$Rn)>; + +defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; +defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; +defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>; +defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>; +defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; +def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))), + (FCVTNv4i16 V128:$Rn)>; +def : Pat<(concat_vectors V64:$Rd, + (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), + (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; +def : Pat<(v2f32 (fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; +def : Pat<(v4f16 (fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>; +def : Pat<(concat_vectors V64:$Rd, (v2f32 (fpround (v2f64 V128:$Rn)))), + (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; +defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; +defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; +defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", + int_aarch64_neon_fcvtxn>; +defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>; +defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>; + +def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>; +def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>; +def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>; +def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>; +def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>; + +def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>; +def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>; +def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>; +def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>; +def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>; + +defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>; +defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; +defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>; +defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>; +defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>; +defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>; +defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>; +defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>; +defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>; +defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; +defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>; +defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", + UnOpFrag<(sub immAllZerosV, node:$LHS)> >; +defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; +// Aliases for MVN -> NOT. +def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}", + (NOTv8i8 V64:$Vd, V64:$Vn)>; +def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}", + (NOTv16i8 V128:$Vd, V128:$Vn)>; + +def : Pat<(AArch64neg (v8i8 V64:$Rn)), (NEGv8i8 V64:$Rn)>; +def : Pat<(AArch64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>; +def : Pat<(AArch64neg (v4i16 V64:$Rn)), (NEGv4i16 V64:$Rn)>; +def : Pat<(AArch64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>; +def : Pat<(AArch64neg (v2i32 V64:$Rn)), (NEGv2i32 V64:$Rn)>; +def : Pat<(AArch64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>; +def : Pat<(AArch64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>; + +def : Pat<(AArch64not (v8i8 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(AArch64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(AArch64not (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(AArch64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(AArch64not (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(AArch64not (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(AArch64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(AArch64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; + +def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; + +defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>; +defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>; +defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>; +defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>; +defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", + BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >; +defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>; +defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>; +defm SHLL : SIMDVectorLShiftLongBySizeBHS; +defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; +defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; +defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>; +defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>; +defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>; +defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", + BinOpFrag<(add node:$LHS, (int_aarch64_neon_uaddlp node:$RHS))> >; +defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", + int_aarch64_neon_uaddlp>; +defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>; +defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; +defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; +defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; +defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>; +defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; + +def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; +def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; +def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; +def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; +def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; +def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; + +// Patterns for vector long shift (by element width). These need to match all +// three of zext, sext and anyext so it's easier to pull the patterns out of the +// definition. +multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> { + def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), + (SHLLv8i8 V64:$Rn)>; + def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)), + (SHLLv16i8 V128:$Rn)>; + def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), + (SHLLv4i16 V64:$Rn)>; + def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)), + (SHLLv8i16 V128:$Rn)>; + def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), + (SHLLv2i32 V64:$Rn)>; + def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)), + (SHLLv4i32 V128:$Rn)>; +} + +defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>; +defm : SIMDVectorLShiftLongBySizeBHSPats<zext>; +defm : SIMDVectorLShiftLongBySizeBHSPats<sext>; + +//===----------------------------------------------------------------------===// +// Advanced SIMD three vector instructions. +//===----------------------------------------------------------------------===// + +defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; +defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>; +defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>; +defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>; +defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; +defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; +defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; +defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; +defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>; +let Predicates = [HasNEON] in { +foreach VT = [ v2f32, v4f32, v2f64 ] in +def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>; +} +let Predicates = [HasNEON, HasFullFP16] in { +foreach VT = [ v4f16, v8f16 ] in +def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>; +} +defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>; +defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>; +defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>; +defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>; +defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; +defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; +defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; +defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>; +defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; +defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>; +defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; +defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaxnan>; +defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; +defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>; +defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; +defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminnan>; + +// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the +// instruction expects the addend first, while the fma intrinsic puts it last. +defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", + TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; +defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", + TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; + +// The following def pats catch the case where the LHS of an FMA is negated. +// The TriOpFrag above catches the case where the middle operand is negated. +def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)), + (FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>; + +def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), + (FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>; + +def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), + (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>; + +defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; +defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>; +defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; +defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; +defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>; +defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", + TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >; +defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", + TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >; +defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; +defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; +defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >; +defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>; +defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>; +defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; +defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; +defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>; +defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>; +defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>; +defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; +defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; +defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; +defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; +defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; +defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; +defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_aarch64_neon_srhadd>; +defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; +defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; +defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; +defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >; +defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>; +defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>; +defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; +defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; +defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>; +defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>; +defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>; +defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; +defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; +defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; +defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; +defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>; +defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; +defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; +defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", + int_aarch64_neon_sqadd>; +defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", + int_aarch64_neon_sqsub>; + +defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; +defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", + BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; +defm BIF : SIMDLogicalThreeVector<1, 0b11, "bif">; +defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>; +defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl", + TriOpFrag<(or (and node:$LHS, node:$MHS), (and (vnot node:$LHS), node:$RHS))>>; +defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; +defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", + BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; +defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; + + +def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), + (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; +def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), + (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; +def : Pat<(AArch64bsl (v2i32 V64:$Rd), V64:$Rn, V64:$Rm), + (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; +def : Pat<(AArch64bsl (v1i64 V64:$Rd), V64:$Rn, V64:$Rm), + (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; + +def : Pat<(AArch64bsl (v16i8 V128:$Rd), V128:$Rn, V128:$Rm), + (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; +def : Pat<(AArch64bsl (v8i16 V128:$Rd), V128:$Rn, V128:$Rm), + (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; +def : Pat<(AArch64bsl (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), + (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; +def : Pat<(AArch64bsl (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), + (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; + +def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", + (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>; +def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}", + (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; +def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}", + (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; +def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}", + (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; + +def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}", + (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>; +def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}", + (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; +def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}", + (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; +def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}", + (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; + +def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" # + "|cmls.8b\t$dst, $src1, $src2}", + (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" # + "|cmls.16b\t$dst, $src1, $src2}", + (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" # + "|cmls.4h\t$dst, $src1, $src2}", + (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" # + "|cmls.8h\t$dst, $src1, $src2}", + (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" # + "|cmls.2s\t$dst, $src1, $src2}", + (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" # + "|cmls.4s\t$dst, $src1, $src2}", + (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" # + "|cmls.2d\t$dst, $src1, $src2}", + (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; + +def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" # + "|cmlo.8b\t$dst, $src1, $src2}", + (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" # + "|cmlo.16b\t$dst, $src1, $src2}", + (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" # + "|cmlo.4h\t$dst, $src1, $src2}", + (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" # + "|cmlo.8h\t$dst, $src1, $src2}", + (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" # + "|cmlo.2s\t$dst, $src1, $src2}", + (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" # + "|cmlo.4s\t$dst, $src1, $src2}", + (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" # + "|cmlo.2d\t$dst, $src1, $src2}", + (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; + +def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" # + "|cmle.8b\t$dst, $src1, $src2}", + (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" # + "|cmle.16b\t$dst, $src1, $src2}", + (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" # + "|cmle.4h\t$dst, $src1, $src2}", + (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" # + "|cmle.8h\t$dst, $src1, $src2}", + (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" # + "|cmle.2s\t$dst, $src1, $src2}", + (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" # + "|cmle.4s\t$dst, $src1, $src2}", + (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" # + "|cmle.2d\t$dst, $src1, $src2}", + (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; + +def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" # + "|cmlt.8b\t$dst, $src1, $src2}", + (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" # + "|cmlt.16b\t$dst, $src1, $src2}", + (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" # + "|cmlt.4h\t$dst, $src1, $src2}", + (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" # + "|cmlt.8h\t$dst, $src1, $src2}", + (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" # + "|cmlt.2s\t$dst, $src1, $src2}", + (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" # + "|cmlt.4s\t$dst, $src1, $src2}", + (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # + "|cmlt.2d\t$dst, $src1, $src2}", + (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; + +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" # + "|fcmle.4h\t$dst, $src1, $src2}", + (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" # + "|fcmle.8h\t$dst, $src1, $src2}", + (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} +def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # + "|fcmle.2s\t$dst, $src1, $src2}", + (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" # + "|fcmle.4s\t$dst, $src1, $src2}", + (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # + "|fcmle.2d\t$dst, $src1, $src2}", + (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; + +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" # + "|fcmlt.4h\t$dst, $src1, $src2}", + (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" # + "|fcmlt.8h\t$dst, $src1, $src2}", + (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} +def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # + "|fcmlt.2s\t$dst, $src1, $src2}", + (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" # + "|fcmlt.4s\t$dst, $src1, $src2}", + (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # + "|fcmlt.2d\t$dst, $src1, $src2}", + (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; + +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" # + "|facle.4h\t$dst, $src1, $src2}", + (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" # + "|facle.8h\t$dst, $src1, $src2}", + (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} +def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # + "|facle.2s\t$dst, $src1, $src2}", + (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" # + "|facle.4s\t$dst, $src1, $src2}", + (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # + "|facle.2d\t$dst, $src1, $src2}", + (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; + +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" # + "|faclt.4h\t$dst, $src1, $src2}", + (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" # + "|faclt.8h\t$dst, $src1, $src2}", + (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} +def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # + "|faclt.2s\t$dst, $src1, $src2}", + (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" # + "|faclt.4s\t$dst, $src1, $src2}", + (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # + "|faclt.2d\t$dst, $src1, $src2}", + (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; + +//===----------------------------------------------------------------------===// +// Advanced SIMD three scalar instructions. +//===----------------------------------------------------------------------===// + +defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; +defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>; +defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>; +defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; +defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; +defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; +defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; +defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>; +def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FABD64 FPR64:$Rn, FPR64:$Rm)>; +let Predicates = [HasFullFP16] in { +def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>; +} +def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>; +def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>; +defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge", + int_aarch64_neon_facge>; +defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", + int_aarch64_neon_facgt>; +defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; +defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; +defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; +defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx>; +defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps>; +defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts>; +defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; +defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; +defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; +defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; +defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; +defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; +defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; +defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; +defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; +defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>; +defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>; +defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>; +defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; +defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; +defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; +let Predicates = [HasRDM] in { + defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">; + defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">; + def : Pat<(i32 (int_aarch64_neon_sqadd + (i32 FPR32:$Rd), + (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; + def : Pat<(i32 (int_aarch64_neon_sqsub + (i32 FPR32:$Rd), + (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; +} + +def : InstAlias<"cmls $dst, $src1, $src2", + (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"cmle $dst, $src1, $src2", + (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"cmlo $dst, $src1, $src2", + (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"cmlt $dst, $src1, $src2", + (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"fcmle $dst, $src1, $src2", + (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; +def : InstAlias<"fcmle $dst, $src1, $src2", + (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"fcmlt $dst, $src1, $src2", + (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; +def : InstAlias<"fcmlt $dst, $src1, $src2", + (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"facle $dst, $src1, $src2", + (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; +def : InstAlias<"facle $dst, $src1, $src2", + (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"faclt $dst, $src1, $src2", + (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; +def : InstAlias<"faclt $dst, $src1, $src2", + (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; + +//===----------------------------------------------------------------------===// +// Advanced SIMD three scalar instructions (mixed operands). +//===----------------------------------------------------------------------===// +defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", + int_aarch64_neon_sqdmulls_scalar>; +defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; +defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; + +def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd), + (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; +def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), + (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; + +//===----------------------------------------------------------------------===// +// Advanced SIMD two scalar instructions. +//===----------------------------------------------------------------------===// + +defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", abs>; +defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>; +defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; +defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; +defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>; +defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>; +defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; +defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>; +defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; +defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>; +defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; +defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">; +defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">; +defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">; +defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">; +defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">; +defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">; +defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">; +defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">; +def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; +defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; +defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; +defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">; +defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">; +defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">; +defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", + UnOpFrag<(sub immAllZerosV, node:$LHS)> >; +defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>; +defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; +defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; +defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; +defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; +defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", + int_aarch64_neon_suqadd>; +defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>; +defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; +defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", + int_aarch64_neon_usqadd>; + +def : Pat<(AArch64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>; + +def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), + (FCVTASv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), + (FCVTAUv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))), + (FCVTMSv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))), + (FCVTMUv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))), + (FCVTNSv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))), + (FCVTNUv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), + (FCVTPSv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), + (FCVTPUv1i64 FPR64:$Rn)>; + +def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))), + (FRECPEv1f16 FPR16:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))), + (FRECPEv1i32 FPR32:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))), + (FRECPEv1i64 FPR64:$Rn)>; +def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))), + (FRECPEv1i64 FPR64:$Rn)>; + +def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))), + (FRECPEv1i32 FPR32:$Rn)>; +def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))), + (FRECPEv2f32 V64:$Rn)>; +def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))), + (FRECPEv4f32 FPR128:$Rn)>; +def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))), + (FRECPEv1i64 FPR64:$Rn)>; +def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))), + (FRECPEv1i64 FPR64:$Rn)>; +def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))), + (FRECPEv2f64 FPR128:$Rn)>; + +def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))), + (FRECPS32 FPR32:$Rn, FPR32:$Rm)>; +def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))), + (FRECPSv2f32 V64:$Rn, V64:$Rm)>; +def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), + (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>; +def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))), + (FRECPS64 FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), + (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>; + +def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))), + (FRECPXv1f16 FPR16:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))), + (FRECPXv1i32 FPR32:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))), + (FRECPXv1i64 FPR64:$Rn)>; + +def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))), + (FRSQRTEv1f16 FPR16:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))), + (FRSQRTEv1i32 FPR32:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))), + (FRSQRTEv1i64 FPR64:$Rn)>; +def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))), + (FRSQRTEv1i64 FPR64:$Rn)>; + +def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))), + (FRSQRTEv1i32 FPR32:$Rn)>; +def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))), + (FRSQRTEv2f32 V64:$Rn)>; +def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))), + (FRSQRTEv4f32 FPR128:$Rn)>; +def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))), + (FRSQRTEv1i64 FPR64:$Rn)>; +def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))), + (FRSQRTEv1i64 FPR64:$Rn)>; +def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))), + (FRSQRTEv2f64 FPR128:$Rn)>; + +def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))), + (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>; +def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))), + (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>; +def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), + (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>; +def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))), + (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), + (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>; + +// If an integer is about to be converted to a floating point value, +// just load it on the floating point unit. +// Here are the patterns for 8 and 16-bits to float. +// 8-bits -> float. +multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy, + SDPatternOperator loadop, Instruction UCVTF, + ROAddrMode ro, Instruction LDRW, Instruction LDRX, + SubRegIndex sub> { + def : Pat<(DstTy (uint_to_fp (SrcTy + (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, + ro.Wext:$extend))))), + (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), + (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), + sub))>; + + def : Pat<(DstTy (uint_to_fp (SrcTy + (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, + ro.Wext:$extend))))), + (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), + (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), + sub))>; +} + +defm : UIntToFPROLoadPat<f32, i32, zextloadi8, + UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>; +def : Pat <(f32 (uint_to_fp (i32 + (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), + (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; +def : Pat <(f32 (uint_to_fp (i32 + (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), + (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), + (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; +// 16-bits -> float. +defm : UIntToFPROLoadPat<f32, i32, zextloadi16, + UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>; +def : Pat <(f32 (uint_to_fp (i32 + (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; +def : Pat <(f32 (uint_to_fp (i32 + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), + (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), + (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; +// 32-bits are handled in target specific dag combine: +// performIntToFpCombine. +// 64-bits integer to 32-bits floating point, not possible with +// UCVTF on floating point registers (both source and destination +// must have the same size). + +// Here are the patterns for 8, 16, 32, and 64-bits to double. +// 8-bits -> double. +defm : UIntToFPROLoadPat<f64, i32, zextloadi8, + UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>; +def : Pat <(f64 (uint_to_fp (i32 + (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; +def : Pat <(f64 (uint_to_fp (i32 + (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; +// 16-bits -> double. +defm : UIntToFPROLoadPat<f64, i32, zextloadi16, + UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>; +def : Pat <(f64 (uint_to_fp (i32 + (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; +def : Pat <(f64 (uint_to_fp (i32 + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; +// 32-bits -> double. +defm : UIntToFPROLoadPat<f64, i32, load, + UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>; +def : Pat <(f64 (uint_to_fp (i32 + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>; +def : Pat <(f64 (uint_to_fp (i32 + (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; +// 64-bits -> double are handled in target specific dag combine: +// performIntToFpCombine. + +//===----------------------------------------------------------------------===// +// Advanced SIMD three different-sized vector instructions. +//===----------------------------------------------------------------------===// + +defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>; +defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>; +defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; +defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; +defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>; +defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", + int_aarch64_neon_sabd>; +defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", + int_aarch64_neon_sabd>; +defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", + BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; +defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", + BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; +defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; +defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", + TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; +defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>; +defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", + int_aarch64_neon_sqadd>; +defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", + int_aarch64_neon_sqsub>; +defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", + int_aarch64_neon_sqdmull>; +defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", + BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; +defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", + BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; +defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", + int_aarch64_neon_uabd>; +defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", + BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>; +defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", + BinOpFrag<(add node:$LHS, (zext node:$RHS))>>; +defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; +defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", + TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; +defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>; +defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", + BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>; +defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", + BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>; + +// Additional patterns for SMULL and UMULL +multiclass Neon_mul_widen_patterns<SDPatternOperator opnode, + Instruction INST8B, Instruction INST4H, Instruction INST2S> { + def : Pat<(v8i16 (opnode (v8i8 V64:$Rn), (v8i8 V64:$Rm))), + (INST8B V64:$Rn, V64:$Rm)>; + def : Pat<(v4i32 (opnode (v4i16 V64:$Rn), (v4i16 V64:$Rm))), + (INST4H V64:$Rn, V64:$Rm)>; + def : Pat<(v2i64 (opnode (v2i32 V64:$Rn), (v2i32 V64:$Rm))), + (INST2S V64:$Rn, V64:$Rm)>; +} + +defm : Neon_mul_widen_patterns<AArch64smull, SMULLv8i8_v8i16, + SMULLv4i16_v4i32, SMULLv2i32_v2i64>; +defm : Neon_mul_widen_patterns<AArch64umull, UMULLv8i8_v8i16, + UMULLv4i16_v4i32, UMULLv2i32_v2i64>; + +// Patterns for smull2/umull2. +multiclass Neon_mul_high_patterns<SDPatternOperator opnode, + Instruction INST8B, Instruction INST4H, Instruction INST2S> { + def : Pat<(v8i16 (opnode (extract_high_v16i8 V128:$Rn), + (extract_high_v16i8 V128:$Rm))), + (INST8B V128:$Rn, V128:$Rm)>; + def : Pat<(v4i32 (opnode (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 V128:$Rm))), + (INST4H V128:$Rn, V128:$Rm)>; + def : Pat<(v2i64 (opnode (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 V128:$Rm))), + (INST2S V128:$Rn, V128:$Rm)>; +} + +defm : Neon_mul_high_patterns<AArch64smull, SMULLv16i8_v8i16, + SMULLv8i16_v4i32, SMULLv4i32_v2i64>; +defm : Neon_mul_high_patterns<AArch64umull, UMULLv16i8_v8i16, + UMULLv8i16_v4i32, UMULLv4i32_v2i64>; + +// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL +multiclass Neon_mulacc_widen_patterns<SDPatternOperator opnode, + Instruction INST8B, Instruction INST4H, Instruction INST2S> { + def : Pat<(v8i16 (opnode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm))), + (INST8B V128:$Rd, V64:$Rn, V64:$Rm)>; + def : Pat<(v4i32 (opnode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm))), + (INST4H V128:$Rd, V64:$Rn, V64:$Rm)>; + def : Pat<(v2i64 (opnode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm))), + (INST2S V128:$Rd, V64:$Rn, V64:$Rm)>; +} + +defm : Neon_mulacc_widen_patterns< + TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>, + SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>; +defm : Neon_mulacc_widen_patterns< + TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>, + UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>; +defm : Neon_mulacc_widen_patterns< + TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>, + SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>; +defm : Neon_mulacc_widen_patterns< + TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>, + UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>; + +// Patterns for 64-bit pmull +def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm), + (PMULLv1i64 V64:$Rn, V64:$Rm)>; +def : Pat<(int_aarch64_neon_pmull64 (extractelt (v2i64 V128:$Rn), (i64 1)), + (extractelt (v2i64 V128:$Rm), (i64 1))), + (PMULLv2i64 V128:$Rn, V128:$Rm)>; + +// CodeGen patterns for addhn and subhn instructions, which can actually be +// written in LLVM IR without too much difficulty. + +// ADDHN +def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), + (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), + (i32 16))))), + (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), + (i32 32))))), + (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v8i8 V64:$Rd), + (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), + (i32 8))))), + (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v4i16 V64:$Rd), + (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), + (i32 16))))), + (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v2i32 V64:$Rd), + (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), + (i32 32))))), + (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; + +// SUBHN +def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), + (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), + (i32 16))))), + (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), + (i32 32))))), + (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v8i8 V64:$Rd), + (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), + (i32 8))))), + (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v4i16 V64:$Rd), + (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), + (i32 16))))), + (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v2i32 V64:$Rd), + (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), + (i32 32))))), + (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; + +//---------------------------------------------------------------------------- +// AdvSIMD bitwise extract from vector instruction. +//---------------------------------------------------------------------------- + +defm EXT : SIMDBitwiseExtract<"ext">; + +def : Pat<(v4i16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), + (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; +def : Pat<(v8i16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), + (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; +def : Pat<(v2i32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), + (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; +def : Pat<(v2f32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), + (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; +def : Pat<(v4i32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), + (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; +def : Pat<(v4f32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), + (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; +def : Pat<(v2i64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), + (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; +def : Pat<(v2f64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), + (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; +def : Pat<(v4f16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), + (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; +def : Pat<(v8f16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), + (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; + +// We use EXT to handle extract_subvector to copy the upper 64-bits of a +// 128-bit vector. +def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 8))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; +def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 4))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; +def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; +def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; +def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 4))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; +def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; +def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; + + +//---------------------------------------------------------------------------- +// AdvSIMD zip vector +//---------------------------------------------------------------------------- + +defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>; +defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>; +defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>; +defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; +defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; +defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; + +//---------------------------------------------------------------------------- +// AdvSIMD TBL/TBX instructions +//---------------------------------------------------------------------------- + +defm TBL : SIMDTableLookup< 0, "tbl">; +defm TBX : SIMDTableLookupTied<1, "tbx">; + +def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), + (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; +def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), + (TBLv16i8One V128:$Ri, V128:$Rn)>; + +def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd), + (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), + (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; +def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), + (v16i8 V128:$Ri), (v16i8 V128:$Rn))), + (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; + + +//---------------------------------------------------------------------------- +// AdvSIMD scalar CPY instruction +//---------------------------------------------------------------------------- + +defm CPY : SIMDScalarCPY<"cpy">; + +//---------------------------------------------------------------------------- +// AdvSIMD scalar pairwise instructions +//---------------------------------------------------------------------------- + +defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; +defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">; +defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">; +defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">; +defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">; +defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">; +def : Pat<(v2i64 (AArch64saddv V128:$Rn)), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; +def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; +def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), + (FADDPv2i32p V64:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), + (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; +def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), + (FADDPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))), + (FMAXNMPv2i32p V64:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))), + (FMAXNMPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))), + (FMAXPv2i32p V64:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))), + (FMAXPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))), + (FMINNMPv2i32p V64:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))), + (FMINNMPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))), + (FMINPv2i32p V64:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))), + (FMINPv2i64p V128:$Rn)>; + +//---------------------------------------------------------------------------- +// AdvSIMD INS/DUP instructions +//---------------------------------------------------------------------------- + +def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>; +def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>; +def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>; +def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>; +def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>; +def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>; +def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>; + +def DUPv2i64lane : SIMDDup64FromElement; +def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; +def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>; +def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>; +def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; +def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; +def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; + +def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))), + (v2f32 (DUPv2i32lane + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), + (i64 0)))>; +def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))), + (v4f32 (DUPv4i32lane + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), + (i64 0)))>; +def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))), + (v2f64 (DUPv2i64lane + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), + (i64 0)))>; +def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))), + (v4f16 (DUPv4i16lane + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), + (i64 0)))>; +def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))), + (v8f16 (DUPv8i16lane + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), + (i64 0)))>; + +def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), + (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; +def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), + (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; + +def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), + (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; +def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), + (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; +def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), + (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; + +// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane +// instruction even if the types don't match: we just have to remap the lane +// carefully. N.b. this trick only applies to truncations. +def VecIndex_x2 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64); +}]>; +def VecIndex_x4 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64); +}]>; +def VecIndex_x8 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64); +}]>; + +multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT, + ValueType Src128VT, ValueType ScalVT, + Instruction DUP, SDNodeXForm IdxXFORM> { + def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn), + imm:$idx)))), + (DUP V128:$Rn, (IdxXFORM imm:$idx))>; + + def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn), + imm:$idx)))), + (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; +} + +defm : DUPWithTruncPats<v8i8, v4i16, v8i16, i32, DUPv8i8lane, VecIndex_x2>; +defm : DUPWithTruncPats<v8i8, v2i32, v4i32, i32, DUPv8i8lane, VecIndex_x4>; +defm : DUPWithTruncPats<v4i16, v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>; + +defm : DUPWithTruncPats<v16i8, v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>; +defm : DUPWithTruncPats<v16i8, v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>; +defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>; + +multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP, + SDNodeXForm IdxXFORM> { + def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn), + imm:$idx))))), + (DUP V128:$Rn, (IdxXFORM imm:$idx))>; + + def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn), + imm:$idx))))), + (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; +} + +defm : DUPWithTrunci64Pats<v8i8, DUPv8i8lane, VecIndex_x8>; +defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane, VecIndex_x4>; +defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane, VecIndex_x2>; + +defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>; +defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>; +defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>; + +// SMOV and UMOV definitions, with some extra patterns for convenience +defm SMOV : SMov; +defm UMOV : UMov; + +def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), + (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>; +def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), + (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; +def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), + (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; +def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), + (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; +def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), + (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; +def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), + (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; + +def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), + VectorIndexB:$idx)))), i8), + (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; +def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), + VectorIndexH:$idx)))), i16), + (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; + +// Extracting i8 or i16 elements will have the zero-extend transformed to +// an 'and' mask by type legalization since neither i8 nor i16 are legal types +// for AArch64. Match these patterns here since UMOV already zeroes out the high +// bits of the destination register. +def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), + (i32 0xff)), + (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>; +def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), + (i32 0xffff)), + (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; + +defm INS : SIMDIns; + +def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), + (SUBREG_TO_REG (i32 0), + (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; +def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), + (SUBREG_TO_REG (i32 0), + (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; + +def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), + (SUBREG_TO_REG (i32 0), + (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; +def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), + (SUBREG_TO_REG (i32 0), + (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; + +def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), + (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; +def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), + (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; + +def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), + (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), + (i32 FPR32:$Rn), ssub))>; +def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))), + (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (i32 FPR32:$Rn), ssub))>; + +def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), + (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), + (i64 FPR64:$Rn), dsub))>; + +def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), + (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; +def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), + (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; + +def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; +def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; + +def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>; + +def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn), + (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))), + (EXTRACT_SUBREG + (INSvi16lane + (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), + VectorIndexS:$imm, + (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), + (i64 0)), + dsub)>; + +def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn), + (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))), + (INSvi16lane + V128:$Rn, VectorIndexH:$imm, + (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), + (i64 0))>; + +def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), + (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), + (EXTRACT_SUBREG + (INSvi32lane + (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), + VectorIndexS:$imm, + (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), + (i64 0)), + dsub)>; +def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn), + (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), + (INSvi32lane + V128:$Rn, VectorIndexS:$imm, + (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), + (i64 0))>; +def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), + (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))), + (INSvi64lane + V128:$Rn, VectorIndexD:$imm, + (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)), + (i64 0))>; + +// Copy an element at a constant index in one vector into a constant indexed +// element of another. +// FIXME refactor to a shared class/dev parameterized on vector type, vector +// index type and INS extension +def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane + (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), + VectorIndexB:$idx2)), + (v16i8 (INSvi8lane + V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) + )>; +def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane + (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), + VectorIndexH:$idx2)), + (v8i16 (INSvi16lane + V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) + )>; +def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane + (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), + VectorIndexS:$idx2)), + (v4i32 (INSvi32lane + V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) + )>; +def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane + (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), + VectorIndexD:$idx2)), + (v2i64 (INSvi64lane + V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2) + )>; + +multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, + ValueType VTScal, Instruction INS> { + def : Pat<(VT128 (vector_insert V128:$src, + (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), + imm:$Immd)), + (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>; + + def : Pat<(VT128 (vector_insert V128:$src, + (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), + imm:$Immd)), + (INS V128:$src, imm:$Immd, + (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>; + + def : Pat<(VT64 (vector_insert V64:$src, + (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), + imm:$Immd)), + (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), + imm:$Immd, V128:$Rn, imm:$Immn), + dsub)>; + + def : Pat<(VT64 (vector_insert V64:$src, + (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), + imm:$Immd)), + (EXTRACT_SUBREG + (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, + (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn), + dsub)>; +} + +defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>; +defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>; +defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>; + + +// Floating point vector extractions are codegen'd as either a sequence of +// subregister extractions, or a MOV (aka CPY here, alias for DUP) if +// the lane number is anything other than zero. +def : Pat<(vector_extract (v2f64 V128:$Rn), 0), + (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; +def : Pat<(vector_extract (v4f32 V128:$Rn), 0), + (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; +def : Pat<(vector_extract (v8f16 V128:$Rn), 0), + (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>; + +def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), + (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>; +def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), + (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>; +def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx), + (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>; + +// All concat_vectors operations are canonicalised to act on i64 vectors for +// AArch64. In the general case we need an instruction, which had just as well be +// INS. +class ConcatPat<ValueType DstTy, ValueType SrcTy> + : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), + (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; + +def : ConcatPat<v2i64, v1i64>; +def : ConcatPat<v2f64, v1f64>; +def : ConcatPat<v4i32, v2i32>; +def : ConcatPat<v4f32, v2f32>; +def : ConcatPat<v8i16, v4i16>; +def : ConcatPat<v8f16, v4f16>; +def : ConcatPat<v16i8, v8i8>; + +// If the high lanes are undef, though, we can just ignore them: +class ConcatUndefPat<ValueType DstTy, ValueType SrcTy> + : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; + +def : ConcatUndefPat<v2i64, v1i64>; +def : ConcatUndefPat<v2f64, v1f64>; +def : ConcatUndefPat<v4i32, v2i32>; +def : ConcatUndefPat<v4f32, v2f32>; +def : ConcatUndefPat<v8i16, v4i16>; +def : ConcatUndefPat<v16i8, v8i8>; + +//---------------------------------------------------------------------------- +// AdvSIMD across lanes instructions +//---------------------------------------------------------------------------- + +defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">; +defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">; +defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">; +defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; +defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; +defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; +defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; +defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>; +defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; +defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; +defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>; + +// Patterns for across-vector intrinsics, that have a node equivalent, that +// returns a vector (with only the low lane defined) instead of a scalar. +// In effect, opNode is the same as (scalar_to_vector (IntNode)). +multiclass SIMDAcrossLanesIntrinsic<string baseOpc, + SDPatternOperator opNode> { +// If a lane instruction caught the vector_extract around opNode, we can +// directly match the latter to the instruction. +def : Pat<(v8i8 (opNode V64:$Rn)), + (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>; +def : Pat<(v16i8 (opNode V128:$Rn)), + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>; +def : Pat<(v4i16 (opNode V64:$Rn)), + (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>; +def : Pat<(v8i16 (opNode V128:$Rn)), + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>; +def : Pat<(v4i32 (opNode V128:$Rn)), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>; + + +// If none did, fallback to the explicit patterns, consuming the vector_extract. +def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)), + (i32 0)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), + bsub), ssub)>; +def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), + bsub), ssub)>; +def : Pat<(i32 (vector_extract (insert_subvector undef, + (v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), + hsub), ssub)>; +def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), + hsub), ssub)>; +def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))), + (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), + ssub), ssub)>; + +} + +multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, + SDPatternOperator opNode> + : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { +// If there is a sign extension after this intrinsic, consume it as smov already +// performed it +def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, + (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)), + (i32 (SMOVvi8to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), + (i64 0)))>; +def : Pat<(i32 (sext_inreg (i32 (vector_extract + (opNode (v16i8 V128:$Rn)), (i64 0))), i8)), + (i32 (SMOVvi8to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), + (i64 0)))>; +def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, + (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)), + (i32 (SMOVvi16to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), + (i64 0)))>; +def : Pat<(i32 (sext_inreg (i32 (vector_extract + (opNode (v8i16 V128:$Rn)), (i64 0))), i16)), + (i32 (SMOVvi16to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), + (i64 0)))>; +} + +multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, + SDPatternOperator opNode> + : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { +// If there is a masking operation keeping only what has been actually +// generated, consume it. +def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, + (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), + ssub))>; +def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))), + maski8_or_more)), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), + ssub))>; +def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, + (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), + ssub))>; +def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))), + maski16_or_more)), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), + ssub))>; +} + +defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>; +// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm +def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))), + (ADDPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>; +// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm +def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))), + (ADDPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>; +def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))), + (SMAXPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>; +def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))), + (SMINPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>; +def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))), + (UMAXPv2i32 V64:$Rn, V64:$Rn)>; + +defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>; +def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))), + (UMINPv2i32 V64:$Rn, V64:$Rn)>; + +multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> { + def : Pat<(i32 (intOp (v8i8 V64:$Rn))), + (i32 (SMOVvi16to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), + (i64 0)))>; +def : Pat<(i32 (intOp (v16i8 V128:$Rn))), + (i32 (SMOVvi16to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), + (i64 0)))>; + +def : Pat<(i32 (intOp (v4i16 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), + ssub))>; +def : Pat<(i32 (intOp (v8i16 V128:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), + ssub))>; + +def : Pat<(i64 (intOp (v4i32 V128:$Rn))), + (i64 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), + dsub))>; +} + +multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc, + Intrinsic intOp> { + def : Pat<(i32 (intOp (v8i8 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), + ssub))>; +def : Pat<(i32 (intOp (v16i8 V128:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), + ssub))>; + +def : Pat<(i32 (intOp (v4i16 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), + ssub))>; +def : Pat<(i32 (intOp (v8i16 V128:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), + ssub))>; + +def : Pat<(i64 (intOp (v4i32 V128:$Rn))), + (i64 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), + dsub))>; +} + +defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; +defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; + +// The vaddlv_s32 intrinsic gets mapped to SADDLP. +def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))), + (i64 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (SADDLPv2i32_v1i64 V64:$Rn), dsub), + dsub))>; +// The vaddlv_u32 intrinsic gets mapped to UADDLP. +def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))), + (i64 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (UADDLPv2i32_v1i64 V64:$Rn), dsub), + dsub))>; + +//------------------------------------------------------------------------------ +// AdvSIMD modified immediate instructions +//------------------------------------------------------------------------------ + +// AdvSIMD BIC +defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>; +// AdvSIMD ORR +defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>; + +def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; + +def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; + +def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; + +def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; + +// AdvSIMD FMOV +def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8, + "fmov", ".2d", + [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; +def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8, + "fmov", ".2s", + [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; +def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8, + "fmov", ".4s", + [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; +let Predicates = [HasNEON, HasFullFP16] in { +def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8, + "fmov", ".4h", + [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; +def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8, + "fmov", ".8h", + [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; +} // Predicates = [HasNEON, HasFullFP16] + +// AdvSIMD MOVI + +// EDIT byte mask: scalar +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", + [(set FPR64:$Rd, simdimmtype10:$imm8)]>; +// The movi_edit node has the immediate value already encoded, so we use +// a plain imm0_255 here. +def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)), + (MOVID imm0_255:$shift)>; + +def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>; +def : Pat<(v2i32 immAllZerosV), (MOVID (i32 0))>; +def : Pat<(v4i16 immAllZerosV), (MOVID (i32 0))>; +def : Pat<(v8i8 immAllZerosV), (MOVID (i32 0))>; + +def : Pat<(v1i64 immAllOnesV), (MOVID (i32 255))>; +def : Pat<(v2i32 immAllOnesV), (MOVID (i32 255))>; +def : Pat<(v4i16 immAllOnesV), (MOVID (i32 255))>; +def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>; + +// EDIT byte mask: 2d + +// The movi_edit node has the immediate value already encoded, so we use +// a plain imm0_255 in the pattern +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128, + simdimmtype10, + "movi", ".2d", + [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; + +def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>; +def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>; +def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>; +def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>; + +def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>; +def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; +def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; +def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; + +// EDIT per word & halfword: 2s, 4h, 4s, & 8h +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; + +def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; + +def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; + +def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), + (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), + (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), + (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), + (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; + +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { +// EDIT per word: 2s & 4s with MSL shifter +def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", + [(set (v2i32 V64:$Rd), + (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; +def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", + [(set (v4i32 V128:$Rd), + (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; + +// Per byte: 8b & 16b +def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255, + "movi", ".8b", + [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; + +def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255, + "movi", ".16b", + [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; +} + +// AdvSIMD MVNI + +// EDIT per word & halfword: 2s, 4h, 4s, & 8h +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; + +def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; + +def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; + +def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), + (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), + (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), + (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), + (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; + +// EDIT per word: 2s & 4s with MSL shifter +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { +def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", + [(set (v2i32 V64:$Rd), + (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; +def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", + [(set (v4i32 V128:$Rd), + (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD indexed element +//---------------------------------------------------------------------------- + +let hasSideEffects = 0 in { + defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">; + defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">; +} + +// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the +// instruction expects the addend first, while the intrinsic expects it last. + +// On the other hand, there are quite a few valid combinatorial options due to +// the commutativity of multiplication and the fact that (-x) * y = x * (-y). +defm : SIMDFPIndexedTiedPatterns<"FMLA", + TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>; +defm : SIMDFPIndexedTiedPatterns<"FMLA", + TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>; + +defm : SIMDFPIndexedTiedPatterns<"FMLS", + TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; +defm : SIMDFPIndexedTiedPatterns<"FMLS", + TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; +defm : SIMDFPIndexedTiedPatterns<"FMLS", + TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; +defm : SIMDFPIndexedTiedPatterns<"FMLS", + TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; + +multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> { + // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit + // and DUP scalar. + def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), + (AArch64duplane32 (v4f32 (fneg V128:$Rm)), + VectorIndexS:$idx))), + (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), + (v2f32 (AArch64duplane32 + (v4f32 (insert_subvector undef, + (v2f32 (fneg V64:$Rm)), + (i32 0))), + VectorIndexS:$idx)))), + (FMLSv2i32_indexed V64:$Rd, V64:$Rn, + (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), + VectorIndexS:$idx)>; + def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), + (AArch64dup (f32 (fneg FPR32Op:$Rm))))), + (FMLSv2i32_indexed V64:$Rd, V64:$Rn, + (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; + + // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit + // and DUP scalar. + def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), + (AArch64duplane32 (v4f32 (fneg V128:$Rm)), + VectorIndexS:$idx))), + (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, + VectorIndexS:$idx)>; + def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), + (v4f32 (AArch64duplane32 + (v4f32 (insert_subvector undef, + (v2f32 (fneg V64:$Rm)), + (i32 0))), + VectorIndexS:$idx)))), + (FMLSv4i32_indexed V128:$Rd, V128:$Rn, + (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), + VectorIndexS:$idx)>; + def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), + (AArch64dup (f32 (fneg FPR32Op:$Rm))))), + (FMLSv4i32_indexed V128:$Rd, V128:$Rn, + (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; + + // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar + // (DUPLANE from 64-bit would be trivial). + def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), + (AArch64duplane64 (v2f64 (fneg V128:$Rm)), + VectorIndexD:$idx))), + (FMLSv2i64_indexed + V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), + (AArch64dup (f64 (fneg FPR64Op:$Rm))))), + (FMLSv2i64_indexed V128:$Rd, V128:$Rn, + (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; + + // 2 variants for 32-bit scalar version: extract from .2s or from .4s + def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), + (vector_extract (v4f32 (fneg V128:$Rm)), + VectorIndexS:$idx))), + (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, + V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), + (vector_extract (v4f32 (insert_subvector undef, + (v2f32 (fneg V64:$Rm)), + (i32 0))), + VectorIndexS:$idx))), + (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, + (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; + + // 1 variant for 64-bit scalar version: extract from .1d or from .2d + def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), + (vector_extract (v2f64 (fneg V128:$Rm)), + VectorIndexS:$idx))), + (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn, + V128:$Rm, VectorIndexS:$idx)>; +} + +defm : FMLSIndexedAfterNegPatterns< + TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; +defm : FMLSIndexedAfterNegPatterns< + TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >; + +defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; +defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", fmul>; + +def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), + (FMULv2i32_indexed V64:$Rn, + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), + (i64 0))>; +def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), + (FMULv4i32_indexed V128:$Rn, + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), + (i64 0))>; +def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), + (FMULv2i64_indexed V128:$Rn, + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), + (i64 0))>; + +defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; +defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; +defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", + TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>; +defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", + TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>; +defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; +defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; +defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", + TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; +defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", + int_aarch64_neon_smull>; +defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", + int_aarch64_neon_sqadd>; +defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", + int_aarch64_neon_sqsub>; +defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", + int_aarch64_neon_sqadd>; +defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", + int_aarch64_neon_sqsub>; +defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; +defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; +defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", + TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; +defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", + int_aarch64_neon_umull>; + +// A scalar sqdmull with the second operand being a vector lane can be +// handled directly with the indexed instruction encoding. +def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), + (vector_extract (v4i32 V128:$Vm), + VectorIndexS:$idx)), + (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; + +//---------------------------------------------------------------------------- +// AdvSIMD scalar shift instructions +//---------------------------------------------------------------------------- +defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">; +defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">; +defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">; +defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">; +// Codegen patterns for the above. We don't put these directly on the +// instructions because TableGen's type inference can't handle the truth. +// Having the same base pattern for fp <--> int totally freaks it out. +def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), + (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; +def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), + (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; +def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), + (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), + (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), + vecshiftR64:$imm)), + (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), + vecshiftR64:$imm)), + (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), + (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; +def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), + (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), + vecshiftR64:$imm)), + (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), + (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), + vecshiftR64:$imm)), + (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), + (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; + +// Patterns for FP16 Instrinsics - requires reg copy to/from as i16s not supported. + +def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)), + (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; +def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)), + (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; +def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp + (and FPR32:$Rn, (i32 65535)), + vecshiftR16:$imm)), + (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; +def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)), + (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; +def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), + (UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>; +def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)), + (i32 (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), + (FCVTZSh FPR16:$Rn, vecshiftR32:$imm), + hsub))>; +def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)), + (i64 (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (FCVTZSh FPR16:$Rn, vecshiftR64:$imm), + hsub))>; +def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)), + (i32 (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), + (FCVTZUh FPR16:$Rn, vecshiftR32:$imm), + hsub))>; +def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)), + (i64 (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (FCVTZUh FPR16:$Rn, vecshiftR64:$imm), + hsub))>; + +defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; +defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; +defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", + int_aarch64_neon_sqrshrn>; +defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", + int_aarch64_neon_sqrshrun>; +defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; +defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; +defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", + int_aarch64_neon_sqshrn>; +defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", + int_aarch64_neon_sqshrun>; +defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; +defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; +defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", + TriOpFrag<(add node:$LHS, + (AArch64srshri node:$MHS, node:$RHS))>>; +defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>; +defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", + TriOpFrag<(add node:$LHS, + (AArch64vashr node:$MHS, node:$RHS))>>; +defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", + int_aarch64_neon_uqrshrn>; +defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; +defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", + int_aarch64_neon_uqshrn>; +defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; +defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", + TriOpFrag<(add node:$LHS, + (AArch64urshri node:$MHS, node:$RHS))>>; +defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>; +defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", + TriOpFrag<(add node:$LHS, + (AArch64vlshr node:$MHS, node:$RHS))>>; + +//---------------------------------------------------------------------------- +// AdvSIMD vector shift instructions +//---------------------------------------------------------------------------- +defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; +defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; +defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf", + int_aarch64_neon_vcvtfxs2fp>; +defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", + int_aarch64_neon_rshrn>; +defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; +defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", + BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; +defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>; +def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), + (i32 vecshiftL64:$imm))), + (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; +defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", + int_aarch64_neon_sqrshrn>; +defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", + int_aarch64_neon_sqrshrun>; +defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; +defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; +defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", + int_aarch64_neon_sqshrn>; +defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", + int_aarch64_neon_sqshrun>; +defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>; +def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), + (i32 vecshiftR64:$imm))), + (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; +defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>; +defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", + TriOpFrag<(add node:$LHS, + (AArch64srshri node:$MHS, node:$RHS))> >; +defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", + BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>; + +defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; +defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", + TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; +defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf", + int_aarch64_neon_vcvtfxu2fp>; +defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", + int_aarch64_neon_uqrshrn>; +defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; +defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", + int_aarch64_neon_uqshrn>; +defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>; +defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", + TriOpFrag<(add node:$LHS, + (AArch64urshri node:$MHS, node:$RHS))> >; +defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", + BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>; +defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; +defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", + TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; + +// SHRN patterns for when a logical right shift was used instead of arithmetic +// (the immediate guarantees no sign bits actually end up in the result so it +// doesn't matter). +def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), + (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; +def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), + (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; +def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), + (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; + +def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), + (trunc (AArch64vlshr (v8i16 V128:$Rn), + vecshiftR16Narrow:$imm)))), + (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR16Narrow:$imm)>; +def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), + (trunc (AArch64vlshr (v4i32 V128:$Rn), + vecshiftR32Narrow:$imm)))), + (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR32Narrow:$imm)>; +def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), + (trunc (AArch64vlshr (v2i64 V128:$Rn), + vecshiftR64Narrow:$imm)))), + (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR32Narrow:$imm)>; + +// Vector sign and zero extensions are implemented with SSHLL and USSHLL. +// Anyexts are implemented as zexts. +def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>; +def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; +def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; +def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>; +def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; +def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; +def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>; +def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; +def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; +// Also match an extend from the upper half of a 128 bit source register. +def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), + (USHLLv16i8_shift V128:$Rn, (i32 0))>; +def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), + (USHLLv16i8_shift V128:$Rn, (i32 0))>; +def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), + (SSHLLv16i8_shift V128:$Rn, (i32 0))>; +def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), + (USHLLv8i16_shift V128:$Rn, (i32 0))>; +def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), + (USHLLv8i16_shift V128:$Rn, (i32 0))>; +def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), + (SSHLLv8i16_shift V128:$Rn, (i32 0))>; +def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), + (USHLLv4i32_shift V128:$Rn, (i32 0))>; +def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), + (USHLLv4i32_shift V128:$Rn, (i32 0))>; +def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), + (SSHLLv4i32_shift V128:$Rn, (i32 0))>; + +// Vector shift sxtl aliases +def : InstAlias<"sxtl.8h $dst, $src1", + (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"sxtl $dst.8h, $src1.8b", + (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"sxtl.4s $dst, $src1", + (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"sxtl $dst.4s, $src1.4h", + (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"sxtl.2d $dst, $src1", + (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"sxtl $dst.2d, $src1.2s", + (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; + +// Vector shift sxtl2 aliases +def : InstAlias<"sxtl2.8h $dst, $src1", + (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"sxtl2 $dst.8h, $src1.16b", + (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"sxtl2.4s $dst, $src1", + (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"sxtl2 $dst.4s, $src1.8h", + (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"sxtl2.2d $dst, $src1", + (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"sxtl2 $dst.2d, $src1.4s", + (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; + +// Vector shift uxtl aliases +def : InstAlias<"uxtl.8h $dst, $src1", + (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"uxtl $dst.8h, $src1.8b", + (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"uxtl.4s $dst, $src1", + (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"uxtl $dst.4s, $src1.4h", + (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"uxtl.2d $dst, $src1", + (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"uxtl $dst.2d, $src1.2s", + (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; + +// Vector shift uxtl2 aliases +def : InstAlias<"uxtl2.8h $dst, $src1", + (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"uxtl2 $dst.8h, $src1.16b", + (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"uxtl2.4s $dst, $src1", + (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"uxtl2 $dst.4s, $src1.8h", + (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"uxtl2.2d $dst, $src1", + (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"uxtl2 $dst.2d, $src1.4s", + (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; + +// If an integer is about to be converted to a floating point value, +// just load it on the floating point unit. +// These patterns are more complex because floating point loads do not +// support sign extension. +// The sign extension has to be explicitly added and is only supported for +// one step: byte-to-half, half-to-word, word-to-doubleword. +// SCVTF GPR -> FPR is 9 cycles. +// SCVTF FPR -> FPR is 4 cyclces. +// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles. +// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR +// and still being faster. +// However, this is not good for code size. +// 8-bits -> float. 2 sizes step-up. +class SExtLoadi8CVTf32Pat<dag addrmode, dag INST> + : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))), + (SCVTFv1i32 (f32 (EXTRACT_SUBREG + (SSHLLv4i16_shift + (f64 + (EXTRACT_SUBREG + (SSHLLv8i8_shift + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + INST, + bsub), + 0), + dsub)), + 0), + ssub)))>, + Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>; + +def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext), + (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>; +def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext), + (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>; +def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset), + (LDURBi GPR64sp:$Rn, simm9:$offset)>; + +// 16-bits -> float. 1 size step-up. +class SExtLoadi16CVTf32Pat<dag addrmode, dag INST> + : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))), + (SCVTFv1i32 (f32 (EXTRACT_SUBREG + (SSHLLv4i16_shift + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + INST, + hsub), + 0), + ssub)))>, Requires<[NotForCodeSize]>; + +def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), + (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; +def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), + (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; +def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; +def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), + (LDURHi GPR64sp:$Rn, simm9:$offset)>; + +// 32-bits to 32-bits are handled in target specific dag combine: +// performIntToFpCombine. +// 64-bits integer to 32-bits floating point, not possible with +// SCVTF on floating point registers (both source and destination +// must have the same size). + +// Here are the patterns for 8, 16, 32, and 64-bits to double. +// 8-bits -> double. 3 size step-up: give up. +// 16-bits -> double. 2 size step. +class SExtLoadi16CVTf64Pat<dag addrmode, dag INST> + : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))), + (SCVTFv1i64 (f64 (EXTRACT_SUBREG + (SSHLLv2i32_shift + (f64 + (EXTRACT_SUBREG + (SSHLLv4i16_shift + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + INST, + hsub), + 0), + dsub)), + 0), + dsub)))>, + Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>; + +def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), + (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; +def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), + (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; +def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; +def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), + (LDURHi GPR64sp:$Rn, simm9:$offset)>; +// 32-bits -> double. 1 size step-up. +class SExtLoadi32CVTf64Pat<dag addrmode, dag INST> + : Pat <(f64 (sint_to_fp (i32 (load addrmode)))), + (SCVTFv1i64 (f64 (EXTRACT_SUBREG + (SSHLLv2i32_shift + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + INST, + ssub), + 0), + dsub)))>, Requires<[NotForCodeSize]>; + +def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), + (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; +def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext), + (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>; +def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), + (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>; +def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset), + (LDURSi GPR64sp:$Rn, simm9:$offset)>; + +// 64-bits -> double are handled in target specific dag combine: +// performIntToFpCombine. + + +//---------------------------------------------------------------------------- +// AdvSIMD Load-Store Structure +//---------------------------------------------------------------------------- +defm LD1 : SIMDLd1Multiple<"ld1">; +defm LD2 : SIMDLd2Multiple<"ld2">; +defm LD3 : SIMDLd3Multiple<"ld3">; +defm LD4 : SIMDLd4Multiple<"ld4">; + +defm ST1 : SIMDSt1Multiple<"st1">; +defm ST2 : SIMDSt2Multiple<"st2">; +defm ST3 : SIMDSt3Multiple<"st3">; +defm ST4 : SIMDSt4Multiple<"st4">; + +class Ld1Pat<ValueType ty, Instruction INST> + : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>; + +def : Ld1Pat<v16i8, LD1Onev16b>; +def : Ld1Pat<v8i16, LD1Onev8h>; +def : Ld1Pat<v4i32, LD1Onev4s>; +def : Ld1Pat<v2i64, LD1Onev2d>; +def : Ld1Pat<v8i8, LD1Onev8b>; +def : Ld1Pat<v4i16, LD1Onev4h>; +def : Ld1Pat<v2i32, LD1Onev2s>; +def : Ld1Pat<v1i64, LD1Onev1d>; + +class St1Pat<ValueType ty, Instruction INST> + : Pat<(store ty:$Vt, GPR64sp:$Rn), + (INST ty:$Vt, GPR64sp:$Rn)>; + +def : St1Pat<v16i8, ST1Onev16b>; +def : St1Pat<v8i16, ST1Onev8h>; +def : St1Pat<v4i32, ST1Onev4s>; +def : St1Pat<v2i64, ST1Onev2d>; +def : St1Pat<v8i8, ST1Onev8b>; +def : St1Pat<v4i16, ST1Onev4h>; +def : St1Pat<v2i32, ST1Onev2s>; +def : St1Pat<v1i64, ST1Onev1d>; + +//--- +// Single-element +//--- + +defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>; +defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>; +defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>; +defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>; +let mayLoad = 1, hasSideEffects = 0 in { +defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>; +defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>; +defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>; +defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>; +defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>; +defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>; +defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>; +defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>; +defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>; +defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>; +defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>; +defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>; +defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>; +defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>; +defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; +defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; +} + +def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), + (LD1Rv8b GPR64sp:$Rn)>; +def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), + (LD1Rv16b GPR64sp:$Rn)>; +def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), + (LD1Rv4h GPR64sp:$Rn)>; +def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), + (LD1Rv8h GPR64sp:$Rn)>; +def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), + (LD1Rv2s GPR64sp:$Rn)>; +def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), + (LD1Rv4s GPR64sp:$Rn)>; +def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), + (LD1Rv2d GPR64sp:$Rn)>; +def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), + (LD1Rv1d GPR64sp:$Rn)>; +// Grab the floating point version too +def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), + (LD1Rv2s GPR64sp:$Rn)>; +def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), + (LD1Rv4s GPR64sp:$Rn)>; +def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), + (LD1Rv2d GPR64sp:$Rn)>; +def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), + (LD1Rv1d GPR64sp:$Rn)>; +def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), + (LD1Rv4h GPR64sp:$Rn)>; +def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), + (LD1Rv8h GPR64sp:$Rn)>; + +class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex, + ValueType VTy, ValueType STy, Instruction LD1> + : Pat<(vector_insert (VTy VecListOne128:$Rd), + (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), + (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>; + +def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>; +def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>; +def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>; +def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>; +def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>; +def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>; +def : Ld1Lane128Pat<load, VectorIndexH, v8f16, f16, LD1i16>; + +class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex, + ValueType VTy, ValueType STy, Instruction LD1> + : Pat<(vector_insert (VTy VecListOne64:$Rd), + (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), + (EXTRACT_SUBREG + (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), + VecIndex:$idx, GPR64sp:$Rn), + dsub)>; + +def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>; +def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>; +def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>; +def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>; +def : Ld1Lane64Pat<load, VectorIndexH, v4f16, f16, LD1i16>; + + +defm LD1 : SIMDLdSt1SingleAliases<"ld1">; +defm LD2 : SIMDLdSt2SingleAliases<"ld2">; +defm LD3 : SIMDLdSt3SingleAliases<"ld3">; +defm LD4 : SIMDLdSt4SingleAliases<"ld4">; + +// Stores +defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; +defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; +defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; +defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; + +let AddedComplexity = 19 in +class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex, + ValueType VTy, ValueType STy, Instruction ST1> + : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), + GPR64sp:$Rn), + (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>; + +def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>; +def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>; +def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>; +def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>; +def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>; +def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>; +def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>; + +let AddedComplexity = 19 in +class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex, + ValueType VTy, ValueType STy, Instruction ST1> + : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), + GPR64sp:$Rn), + (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), + VecIndex:$idx, GPR64sp:$Rn)>; + +def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>; +def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>; +def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>; +def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>; +def : St1Lane64Pat<store, VectorIndexH, v4f16, f16, ST1i16>; + +multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex, + ValueType VTy, ValueType STy, Instruction ST1, + int offset> { + def : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), + GPR64sp:$Rn, offset), + (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), + VecIndex:$idx, GPR64sp:$Rn, XZR)>; + + def : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), + GPR64sp:$Rn, GPR64:$Rm), + (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), + VecIndex:$idx, GPR64sp:$Rn, $Rm)>; +} + +defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>; +defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST, + 2>; +defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>; +defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>; +defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>; +defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>; +defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>; + +multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex, + ValueType VTy, ValueType STy, Instruction ST1, + int offset> { + def : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), + GPR64sp:$Rn, offset), + (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>; + + def : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), + GPR64sp:$Rn, GPR64:$Rm), + (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>; +} + +defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST, + 1>; +defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST, + 2>; +defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>; +defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>; +defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>; +defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>; +defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>; + +let mayStore = 1, hasSideEffects = 0 in { +defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; +defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; +defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; +defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; +defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; +defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; +defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; +defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; +defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; +defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; +defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; +defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; +} + +defm ST1 : SIMDLdSt1SingleAliases<"st1">; +defm ST2 : SIMDLdSt2SingleAliases<"st2">; +defm ST3 : SIMDLdSt3SingleAliases<"st3">; +defm ST4 : SIMDLdSt4SingleAliases<"st4">; + +//---------------------------------------------------------------------------- +// Crypto extensions +//---------------------------------------------------------------------------- + +let Predicates = [HasAES] in { +def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>; +def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>; +def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>; +def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>; +} + +// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required +// for AES fusion on some CPUs. +let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { +def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, + Sched<[WriteV]>; +def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, + Sched<[WriteV]>; +} + +// Only use constrained versions of AES(I)MC instructions if they are paired with +// AESE/AESD. +def : Pat<(v16i8 (int_aarch64_crypto_aesmc + (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1), + (v16i8 V128:$src2))))), + (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1), + (v16i8 V128:$src2)))))>, + Requires<[HasFuseAES]>; + +def : Pat<(v16i8 (int_aarch64_crypto_aesimc + (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1), + (v16i8 V128:$src2))))), + (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1), + (v16i8 V128:$src2)))))>, + Requires<[HasFuseAES]>; + +let Predicates = [HasSHA2] in { +def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>; +def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>; +def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>; +def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>; +def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>; +def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>; +def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>; + +def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>; +def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>; +def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>; +} + +//---------------------------------------------------------------------------- +// Compiler-pseudos +//---------------------------------------------------------------------------- +// FIXME: Like for X86, these should go in their own separate .td file. + +def def32 : PatLeaf<(i32 GPR32:$src), [{ + return isDef32(*N); +}]>; + +// In the case of a 32-bit def that is known to implicitly zero-extend, +// we can use a SUBREG_TO_REG. +def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>; + +// For an anyext, we don't care what the high bits are, so we can perform an +// INSERT_SUBREF into an IMPLICIT_DEF. +def : Pat<(i64 (anyext GPR32:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; + +// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and +// then assert the extension has happened. +def : Pat<(i64 (zext GPR32:$src)), + (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; + +// To sign extend, we use a signed bitfield move instruction (SBFM) on the +// containing super-reg. +def : Pat<(i64 (sext GPR32:$src)), + (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; +def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>; +def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>; +def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>; +def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>; +def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; +def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; +def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; + +def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), + (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), + (i64 (i32shift_sext_i8 imm0_31:$imm)))>; +def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), + (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), + (i64 (i64shift_sext_i8 imm0_63:$imm)))>; + +def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), + (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), + (i64 (i32shift_sext_i16 imm0_31:$imm)))>; +def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), + (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), + (i64 (i64shift_sext_i16 imm0_63:$imm)))>; + +def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)), + (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), + (i64 (i64shift_a imm0_63:$imm)), + (i64 (i64shift_sext_i32 imm0_63:$imm)))>; + +// sra patterns have an AddedComplexity of 10, so make sure we have a higher +// AddedComplexity for the following patterns since we want to match sext + sra +// patterns before we attempt to match a single sra node. +let AddedComplexity = 20 in { +// We support all sext + sra combinations which preserve at least one bit of the +// original value which is to be sign extended. E.g. we support shifts up to +// bitwidth-1 bits. +def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), + (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; +def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), + (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; + +def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), + (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; +def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), + (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; + +def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), + (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), + (i64 imm0_31:$imm), 31)>; +} // AddedComplexity = 20 + +// To truncate, we can simply extract from a subregister. +def : Pat<(i32 (trunc GPR64sp:$src)), + (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; + +// __builtin_trap() uses the BRK instruction on AArch64. +def : Pat<(trap), (BRK 1)>; + +// Conversions within AdvSIMD types in the same register size are free. +// But because we need a consistent lane ordering, in big endian many +// conversions require one or more REV instructions. +// +// Consider a simple memory load followed by a bitconvert then a store. +// v0 = load v2i32 +// v1 = BITCAST v2i32 v0 to v4i16 +// store v4i16 v2 +// +// In big endian mode every memory access has an implicit byte swap. LDR and +// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that +// is, they treat the vector as a sequence of elements to be byte-swapped. +// The two pairs of instructions are fundamentally incompatible. We've decided +// to use LD1/ST1 only to simplify compiler implementation. +// +// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes +// the original code sequence: +// v0 = load v2i32 +// v1 = REV v2i32 (implicit) +// v2 = BITCAST v2i32 v1 to v4i16 +// v3 = REV v4i16 v2 (implicit) +// store v4i16 v3 +// +// But this is now broken - the value stored is different to the value loaded +// due to lane reordering. To fix this, on every BITCAST we must perform two +// other REVs: +// v0 = load v2i32 +// v1 = REV v2i32 (implicit) +// v2 = REV v2i32 +// v3 = BITCAST v2i32 v2 to v4i16 +// v4 = REV v4i16 +// v5 = REV v4i16 v4 (implicit) +// store v4i16 v5 +// +// This means an extra two instructions, but actually in most cases the two REV +// instructions can be combined into one. For example: +// (REV64_2s (REV64_4h X)) === (REV32_4h X) +// +// There is also no 128-bit REV instruction. This must be synthesized with an +// EXT instruction. +// +// Most bitconverts require some sort of conversion. The only exceptions are: +// a) Identity conversions - vNfX <-> vNiX +// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX +// + +// Natural vector casts (64 bit) +def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; + +def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; + +def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2f32 (AArch64NvCast (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; + +def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1f64 (AArch64NvCast (f64 FPR64:$src))), (v1f64 FPR64:$src)>; + +def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; + +// Natural vector casts (128 bit) +def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; + +def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v4f32 (AArch64NvCast (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; + +def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v4f32 (AArch64NvCast (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; + +def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; + +def : Pat<(v16i8 (AArch64NvCast (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; + +def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v4f32 (AArch64NvCast (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; + +def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +} +let Predicates = [IsBE] in { +def : Pat<(v8i8 (bitconvert GPR64:$Xn)), + (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; +def : Pat<(v4i16 (bitconvert GPR64:$Xn)), + (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; +def : Pat<(v2i32 (bitconvert GPR64:$Xn)), + (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; +def : Pat<(v4f16 (bitconvert GPR64:$Xn)), + (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; +def : Pat<(v2f32 (bitconvert GPR64:$Xn)), + (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; + +def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), + (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; +def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), + (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; +def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), + (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; +def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), + (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; +def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), + (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; +} +def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), + (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), + (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; + +def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), + (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>; +def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), + (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>; +def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), + (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), + (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>; +def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; + +let Predicates = [IsLE] in { +def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), + (v1i64 (REV64v2i32 FPR64:$src))>; +def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), + (v1i64 (REV64v4i16 FPR64:$src))>; +def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), + (v1i64 (REV64v8i8 FPR64:$src))>; +def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), + (v1i64 (REV64v4i16 FPR64:$src))>; +def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), + (v1i64 (REV64v2i32 FPR64:$src))>; +} +def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), + (v2i32 (REV64v2i32 FPR64:$src))>; +def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), + (v2i32 (REV32v4i16 FPR64:$src))>; +def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), + (v2i32 (REV32v8i8 FPR64:$src))>; +def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), + (v2i32 (REV64v2i32 FPR64:$src))>; +def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), + (v2i32 (REV64v2i32 FPR64:$src))>; +def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), + (v2i32 (REV32v4i16 FPR64:$src))>; +} +def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), + (v4i16 (REV64v4i16 FPR64:$src))>; +def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), + (v4i16 (REV32v4i16 FPR64:$src))>; +def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), + (v4i16 (REV16v8i8 FPR64:$src))>; +def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), + (v4i16 (REV64v4i16 FPR64:$src))>; +def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), + (v4i16 (REV32v4i16 FPR64:$src))>; +def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), + (v4i16 (REV64v4i16 FPR64:$src))>; +} +def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>; +def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), + (v4f16 (REV64v4i16 FPR64:$src))>; +def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), + (v4f16 (REV32v4i16 FPR64:$src))>; +def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), + (v4f16 (REV16v8i8 FPR64:$src))>; +def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), + (v4f16 (REV64v4i16 FPR64:$src))>; +def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), + (v4f16 (REV32v4i16 FPR64:$src))>; +def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), + (v4f16 (REV64v4i16 FPR64:$src))>; +} +def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), + (v8i8 (REV64v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), + (v8i8 (REV32v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), + (v8i8 (REV16v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), + (v8i8 (REV64v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), + (v8i8 (REV32v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), + (v8i8 (REV64v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), + (v8i8 (REV16v8i8 FPR64:$src))>; +} + +let Predicates = [IsLE] in { +def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), + (f64 (REV64v2i32 FPR64:$src))>; +def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), + (f64 (REV64v4i16 FPR64:$src))>; +def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), + (f64 (REV64v2i32 FPR64:$src))>; +def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), + (f64 (REV64v8i8 FPR64:$src))>; +def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), + (f64 (REV64v4i16 FPR64:$src))>; +} +def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>; +def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>; +def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; +def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; +def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), + (v1f64 (REV64v2i32 FPR64:$src))>; +def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), + (v1f64 (REV64v4i16 FPR64:$src))>; +def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), + (v1f64 (REV64v8i8 FPR64:$src))>; +def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), + (v1f64 (REV64v2i32 FPR64:$src))>; +def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), + (v1f64 (REV64v4i16 FPR64:$src))>; +} +def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; +def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), + (v2f32 (REV64v2i32 FPR64:$src))>; +def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), + (v2f32 (REV32v4i16 FPR64:$src))>; +def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), + (v2f32 (REV32v8i8 FPR64:$src))>; +def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), + (v2f32 (REV64v2i32 FPR64:$src))>; +def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), + (v2f32 (REV64v2i32 FPR64:$src))>; +def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), + (v2f32 (REV32v4i16 FPR64:$src))>; +} +def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), + (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; +def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), + (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), + (REV64v4i32 FPR128:$src), (i32 8)))>; +def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), + (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), + (REV64v8i16 FPR128:$src), (i32 8)))>; +def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), + (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), + (REV64v8i16 FPR128:$src), (i32 8)))>; +def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), + (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; +def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), + (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), + (REV64v4i32 FPR128:$src), (i32 8)))>; +def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), + (f128 (EXTv16i8 (REV64v16i8 FPR128:$src), + (REV64v16i8 FPR128:$src), (i32 8)))>; +} + +let Predicates = [IsLE] in { +def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), + (v2f64 (EXTv16i8 FPR128:$src, + FPR128:$src, (i32 8)))>; +def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), + (v2f64 (REV64v4i32 FPR128:$src))>; +def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), + (v2f64 (REV64v8i16 FPR128:$src))>; +def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), + (v2f64 (REV64v8i16 FPR128:$src))>; +def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), + (v2f64 (REV64v16i8 FPR128:$src))>; +def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), + (v2f64 (REV64v4i32 FPR128:$src))>; +} +def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), + (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src), + (REV64v4i32 FPR128:$src), (i32 8)))>; +def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), + (v4f32 (REV32v8i16 FPR128:$src))>; +def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), + (v4f32 (REV32v8i16 FPR128:$src))>; +def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), + (v4f32 (REV32v16i8 FPR128:$src))>; +def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), + (v4f32 (REV64v4i32 FPR128:$src))>; +def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), + (v4f32 (REV64v4i32 FPR128:$src))>; +} +def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), + (v2i64 (EXTv16i8 FPR128:$src, + FPR128:$src, (i32 8)))>; +def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), + (v2i64 (REV64v4i32 FPR128:$src))>; +def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), + (v2i64 (REV64v8i16 FPR128:$src))>; +def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), + (v2i64 (REV64v16i8 FPR128:$src))>; +def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), + (v2i64 (REV64v4i32 FPR128:$src))>; +def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), + (v2i64 (REV64v8i16 FPR128:$src))>; +} +def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), + (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src), + (REV64v4i32 FPR128:$src), + (i32 8)))>; +def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), + (v4i32 (REV64v4i32 FPR128:$src))>; +def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), + (v4i32 (REV32v8i16 FPR128:$src))>; +def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), + (v4i32 (REV32v16i8 FPR128:$src))>; +def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), + (v4i32 (REV64v4i32 FPR128:$src))>; +def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), + (v4i32 (REV32v8i16 FPR128:$src))>; +} +def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), + (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src), + (REV64v8i16 FPR128:$src), + (i32 8)))>; +def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), + (v8i16 (REV64v8i16 FPR128:$src))>; +def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), + (v8i16 (REV32v8i16 FPR128:$src))>; +def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), + (v8i16 (REV16v16i8 FPR128:$src))>; +def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), + (v8i16 (REV64v8i16 FPR128:$src))>; +def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), + (v8i16 (REV32v8i16 FPR128:$src))>; +} +def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), + (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src), + (REV64v8i16 FPR128:$src), + (i32 8)))>; +def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), + (v8f16 (REV64v8i16 FPR128:$src))>; +def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), + (v8f16 (REV32v8i16 FPR128:$src))>; +def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), + (v8f16 (REV16v16i8 FPR128:$src))>; +def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), + (v8f16 (REV64v8i16 FPR128:$src))>; +def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), + (v8f16 (REV32v8i16 FPR128:$src))>; +} +def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), + (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src), + (REV64v16i8 FPR128:$src), + (i32 8)))>; +def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), + (v16i8 (REV64v16i8 FPR128:$src))>; +def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), + (v16i8 (REV32v16i8 FPR128:$src))>; +def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), + (v16i8 (REV16v16i8 FPR128:$src))>; +def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), + (v16i8 (REV64v16i8 FPR128:$src))>; +def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), + (v16i8 (REV32v16i8 FPR128:$src))>; +def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), + (v16i8 (REV16v16i8 FPR128:$src))>; +} + +def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; + +def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), + (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; +def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), + (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; +def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))), + (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; +def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), + (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; + +// A 64-bit subvector insert to the first 128-bit vector position +// is a subregister copy that needs no instruction. +multiclass InsertSubvectorUndef<ValueType Ty> { + def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)), + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; +} + +defm : InsertSubvectorUndef<i32>; +defm : InsertSubvectorUndef<i64>; + +// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 +// or v2f32. +def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), + (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), + (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; +def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), + (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), + (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; + // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, + // so we match on v4f32 here, not v2f32. This will also catch adding + // the low two lanes of a true v4f32 vector. +def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), + (vector_extract (v4f32 FPR128:$Rn), (i64 1))), + (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; + +// Scalar 64-bit shifts in FPR64 registers. +def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), + (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), + (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), + (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), + (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; + +// Patterns for nontemporal/no-allocate stores. +// We have to resort to tricks to turn a single-input store into a store pair, +// because there is no single-input nontemporal store, only STNP. +let Predicates = [IsLE] in { +let AddedComplexity = 15 in { +class NTStore128Pat<ValueType VT> : + Pat<(nontemporalstore (VT FPR128:$Rt), + (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), + (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub), + (CPYi64 FPR128:$Rt, (i64 1)), + GPR64sp:$Rn, simm7s8:$offset)>; + +def : NTStore128Pat<v2i64>; +def : NTStore128Pat<v4i32>; +def : NTStore128Pat<v8i16>; +def : NTStore128Pat<v16i8>; + +class NTStore64Pat<ValueType VT> : + Pat<(nontemporalstore (VT FPR64:$Rt), + (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), + (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub), + (CPYi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), + GPR64sp:$Rn, simm7s4:$offset)>; + +// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64? +def : NTStore64Pat<v1f64>; +def : NTStore64Pat<v1i64>; +def : NTStore64Pat<v2i32>; +def : NTStore64Pat<v4i16>; +def : NTStore64Pat<v8i8>; + +def : Pat<(nontemporalstore GPR64:$Rt, + (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), + (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), + (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32), + GPR64sp:$Rn, simm7s4:$offset)>; +} // AddedComplexity=10 +} // Predicates = [IsLE] + +// Tail call return handling. These are all compiler pseudo-instructions, +// so no encoding information or anything like that. +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { + def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>, + Sched<[WriteBrReg]>; + def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>, + Sched<[WriteBrReg]>; +} + +def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), + (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>; +def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), + (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; +def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), + (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; + +include "AArch64InstrAtomics.td" +include "AArch64SVEInstrInfo.td" diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td b/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td new file mode 100644 index 000000000..eee584708 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterBanks.td @@ -0,0 +1,20 @@ +//=- AArch64RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +/// General Purpose Registers: W, X. +def GPRRegBank : RegisterBank<"GPR", [GPR64all]>; + +/// Floating Point/Vector Registers: B, H, S, D, Q. +def FPRRegBank : RegisterBank<"FPR", [QQQQ]>; + +/// Conditional register: NZCV. +def CCRegBank : RegisterBank<"CC", [CCR]>; diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td b/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td new file mode 100644 index 000000000..bbf401b47 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64RegisterInfo.td @@ -0,0 +1,1113 @@ +//=- AArch64RegisterInfo.td - Describe the AArch64 Registers -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + + +class AArch64Reg<bits<16> enc, string n, list<Register> subregs = [], + list<string> altNames = []> + : Register<n, altNames> { + let HWEncoding = enc; + let Namespace = "AArch64"; + let SubRegs = subregs; +} + +let Namespace = "AArch64" in { + def sub_32 : SubRegIndex<32>; + + def bsub : SubRegIndex<8>; + def hsub : SubRegIndex<16>; + def ssub : SubRegIndex<32>; + def dsub : SubRegIndex<32>; + def sube32 : SubRegIndex<32>; + def subo32 : SubRegIndex<32>; + def qhisub : SubRegIndex<64>; + def qsub : SubRegIndex<64>; + def sube64 : SubRegIndex<64>; + def subo64 : SubRegIndex<64>; + // SVE + def zsub : SubRegIndex<128>; + // Note: zsub_hi should never be used directly because it represents + // the scalable part of the SVE vector and cannot be manipulated as a + // subvector in the same way the lower 128bits can. + def zsub_hi : SubRegIndex<128>; + // Note: Code depends on these having consecutive numbers + def dsub0 : SubRegIndex<64>; + def dsub1 : SubRegIndex<64>; + def dsub2 : SubRegIndex<64>; + def dsub3 : SubRegIndex<64>; + // Note: Code depends on these having consecutive numbers + def qsub0 : SubRegIndex<128>; + def qsub1 : SubRegIndex<128>; + def qsub2 : SubRegIndex<128>; + def qsub3 : SubRegIndex<128>; +} + +let Namespace = "AArch64" in { + def vreg : RegAltNameIndex; + def vlist1 : RegAltNameIndex; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// +def W0 : AArch64Reg<0, "w0" >, DwarfRegNum<[0]>; +def W1 : AArch64Reg<1, "w1" >, DwarfRegNum<[1]>; +def W2 : AArch64Reg<2, "w2" >, DwarfRegNum<[2]>; +def W3 : AArch64Reg<3, "w3" >, DwarfRegNum<[3]>; +def W4 : AArch64Reg<4, "w4" >, DwarfRegNum<[4]>; +def W5 : AArch64Reg<5, "w5" >, DwarfRegNum<[5]>; +def W6 : AArch64Reg<6, "w6" >, DwarfRegNum<[6]>; +def W7 : AArch64Reg<7, "w7" >, DwarfRegNum<[7]>; +def W8 : AArch64Reg<8, "w8" >, DwarfRegNum<[8]>; +def W9 : AArch64Reg<9, "w9" >, DwarfRegNum<[9]>; +def W10 : AArch64Reg<10, "w10">, DwarfRegNum<[10]>; +def W11 : AArch64Reg<11, "w11">, DwarfRegNum<[11]>; +def W12 : AArch64Reg<12, "w12">, DwarfRegNum<[12]>; +def W13 : AArch64Reg<13, "w13">, DwarfRegNum<[13]>; +def W14 : AArch64Reg<14, "w14">, DwarfRegNum<[14]>; +def W15 : AArch64Reg<15, "w15">, DwarfRegNum<[15]>; +def W16 : AArch64Reg<16, "w16">, DwarfRegNum<[16]>; +def W17 : AArch64Reg<17, "w17">, DwarfRegNum<[17]>; +def W18 : AArch64Reg<18, "w18">, DwarfRegNum<[18]>; +def W19 : AArch64Reg<19, "w19">, DwarfRegNum<[19]>; +def W20 : AArch64Reg<20, "w20">, DwarfRegNum<[20]>; +def W21 : AArch64Reg<21, "w21">, DwarfRegNum<[21]>; +def W22 : AArch64Reg<22, "w22">, DwarfRegNum<[22]>; +def W23 : AArch64Reg<23, "w23">, DwarfRegNum<[23]>; +def W24 : AArch64Reg<24, "w24">, DwarfRegNum<[24]>; +def W25 : AArch64Reg<25, "w25">, DwarfRegNum<[25]>; +def W26 : AArch64Reg<26, "w26">, DwarfRegNum<[26]>; +def W27 : AArch64Reg<27, "w27">, DwarfRegNum<[27]>; +def W28 : AArch64Reg<28, "w28">, DwarfRegNum<[28]>; +def W29 : AArch64Reg<29, "w29">, DwarfRegNum<[29]>; +def W30 : AArch64Reg<30, "w30">, DwarfRegNum<[30]>; +def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>; +def WZR : AArch64Reg<31, "wzr">, DwarfRegAlias<WSP>; + +let SubRegIndices = [sub_32] in { +def X0 : AArch64Reg<0, "x0", [W0]>, DwarfRegAlias<W0>; +def X1 : AArch64Reg<1, "x1", [W1]>, DwarfRegAlias<W1>; +def X2 : AArch64Reg<2, "x2", [W2]>, DwarfRegAlias<W2>; +def X3 : AArch64Reg<3, "x3", [W3]>, DwarfRegAlias<W3>; +def X4 : AArch64Reg<4, "x4", [W4]>, DwarfRegAlias<W4>; +def X5 : AArch64Reg<5, "x5", [W5]>, DwarfRegAlias<W5>; +def X6 : AArch64Reg<6, "x6", [W6]>, DwarfRegAlias<W6>; +def X7 : AArch64Reg<7, "x7", [W7]>, DwarfRegAlias<W7>; +def X8 : AArch64Reg<8, "x8", [W8]>, DwarfRegAlias<W8>; +def X9 : AArch64Reg<9, "x9", [W9]>, DwarfRegAlias<W9>; +def X10 : AArch64Reg<10, "x10", [W10]>, DwarfRegAlias<W10>; +def X11 : AArch64Reg<11, "x11", [W11]>, DwarfRegAlias<W11>; +def X12 : AArch64Reg<12, "x12", [W12]>, DwarfRegAlias<W12>; +def X13 : AArch64Reg<13, "x13", [W13]>, DwarfRegAlias<W13>; +def X14 : AArch64Reg<14, "x14", [W14]>, DwarfRegAlias<W14>; +def X15 : AArch64Reg<15, "x15", [W15]>, DwarfRegAlias<W15>; +def X16 : AArch64Reg<16, "x16", [W16]>, DwarfRegAlias<W16>; +def X17 : AArch64Reg<17, "x17", [W17]>, DwarfRegAlias<W17>; +def X18 : AArch64Reg<18, "x18", [W18]>, DwarfRegAlias<W18>; +def X19 : AArch64Reg<19, "x19", [W19]>, DwarfRegAlias<W19>; +def X20 : AArch64Reg<20, "x20", [W20]>, DwarfRegAlias<W20>; +def X21 : AArch64Reg<21, "x21", [W21]>, DwarfRegAlias<W21>; +def X22 : AArch64Reg<22, "x22", [W22]>, DwarfRegAlias<W22>; +def X23 : AArch64Reg<23, "x23", [W23]>, DwarfRegAlias<W23>; +def X24 : AArch64Reg<24, "x24", [W24]>, DwarfRegAlias<W24>; +def X25 : AArch64Reg<25, "x25", [W25]>, DwarfRegAlias<W25>; +def X26 : AArch64Reg<26, "x26", [W26]>, DwarfRegAlias<W26>; +def X27 : AArch64Reg<27, "x27", [W27]>, DwarfRegAlias<W27>; +def X28 : AArch64Reg<28, "x28", [W28]>, DwarfRegAlias<W28>; +def FP : AArch64Reg<29, "x29", [W29]>, DwarfRegAlias<W29>; +def LR : AArch64Reg<30, "x30", [W30]>, DwarfRegAlias<W30>; +def SP : AArch64Reg<31, "sp", [WSP]>, DwarfRegAlias<WSP>; +def XZR : AArch64Reg<31, "xzr", [WZR]>, DwarfRegAlias<WSP>; +} + +// Condition code register. +def NZCV : AArch64Reg<0, "nzcv">; + +// First fault status register +def FFR : AArch64Reg<0, "ffr">, DwarfRegNum<[47]>; + +// GPR register classes with the intersections of GPR32/GPR32sp and +// GPR64/GPR64sp for use by the coalescer. +def GPR32common : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 30)> { + let AltOrders = [(rotl GPR32common, 8)]; + let AltOrderSelect = [{ return 1; }]; +} +def GPR64common : RegisterClass<"AArch64", [i64], 64, + (add (sequence "X%u", 0, 28), FP, LR)> { + let AltOrders = [(rotl GPR64common, 8)]; + let AltOrderSelect = [{ return 1; }]; +} +// GPR register classes which exclude SP/WSP. +def GPR32 : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR)> { + let AltOrders = [(rotl GPR32, 8)]; + let AltOrderSelect = [{ return 1; }]; +} +def GPR64 : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR)> { + let AltOrders = [(rotl GPR64, 8)]; + let AltOrderSelect = [{ return 1; }]; +} + +// GPR register classes which include SP/WSP. +def GPR32sp : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WSP)> { + let AltOrders = [(rotl GPR32sp, 8)]; + let AltOrderSelect = [{ return 1; }]; +} +def GPR64sp : RegisterClass<"AArch64", [i64], 64, (add GPR64common, SP)> { + let AltOrders = [(rotl GPR64sp, 8)]; + let AltOrderSelect = [{ return 1; }]; +} + +def GPR32sponly : RegisterClass<"AArch64", [i32], 32, (add WSP)>; +def GPR64sponly : RegisterClass<"AArch64", [i64], 64, (add SP)>; + +def GPR64spPlus0Operand : AsmOperandClass { + let Name = "GPR64sp0"; + let RenderMethod = "addRegOperands"; + let PredicateMethod = "isGPR64<AArch64::GPR64spRegClassID>"; + let ParserMethod = "tryParseGPR64sp0Operand"; +} + +def GPR64sp0 : RegisterOperand<GPR64sp> { + let ParserMatchClass = GPR64spPlus0Operand; +} + +// GPR32/GPR64 but with zero-register substitution enabled. +// TODO: Roll this out to GPR32/GPR64/GPR32all/GPR64all. +def GPR32z : RegisterOperand<GPR32> { + let GIZeroRegister = WZR; +} +def GPR64z : RegisterOperand<GPR64> { + let GIZeroRegister = XZR; +} + +// GPR register classes which include WZR/XZR AND SP/WSP. This is not a +// constraint used by any instructions, it is used as a common super-class. +def GPR32all : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR, WSP)>; +def GPR64all : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR, SP)>; + +// For tail calls, we can't use callee-saved registers, as they are restored +// to the saved value before the tail call, which would clobber a call address. +// This is for indirect tail calls to store the address of the destination. +def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X21, + X22, X23, X24, X25, X26, + X27, X28, FP, LR)>; + +// GPR register classes for post increment amount of vector load/store that +// has alternate printing when Rm=31 and prints a constant immediate value +// equal to the total number of bytes transferred. + +// FIXME: TableGen *should* be able to do these itself now. There appears to be +// a bug in counting how many operands a Post-indexed MCInst should have which +// means the aliases don't trigger. +def GPR64pi1 : RegisterOperand<GPR64, "printPostIncOperand<1>">; +def GPR64pi2 : RegisterOperand<GPR64, "printPostIncOperand<2>">; +def GPR64pi3 : RegisterOperand<GPR64, "printPostIncOperand<3>">; +def GPR64pi4 : RegisterOperand<GPR64, "printPostIncOperand<4>">; +def GPR64pi6 : RegisterOperand<GPR64, "printPostIncOperand<6>">; +def GPR64pi8 : RegisterOperand<GPR64, "printPostIncOperand<8>">; +def GPR64pi12 : RegisterOperand<GPR64, "printPostIncOperand<12>">; +def GPR64pi16 : RegisterOperand<GPR64, "printPostIncOperand<16>">; +def GPR64pi24 : RegisterOperand<GPR64, "printPostIncOperand<24>">; +def GPR64pi32 : RegisterOperand<GPR64, "printPostIncOperand<32>">; +def GPR64pi48 : RegisterOperand<GPR64, "printPostIncOperand<48>">; +def GPR64pi64 : RegisterOperand<GPR64, "printPostIncOperand<64>">; + +// Condition code regclass. +def CCR : RegisterClass<"AArch64", [i32], 32, (add NZCV)> { + let CopyCost = -1; // Don't allow copying of status registers. + + // CCR is not allocatable. + let isAllocatable = 0; +} + +//===----------------------------------------------------------------------===// +// Floating Point Scalar Registers +//===----------------------------------------------------------------------===// + +def B0 : AArch64Reg<0, "b0">, DwarfRegNum<[64]>; +def B1 : AArch64Reg<1, "b1">, DwarfRegNum<[65]>; +def B2 : AArch64Reg<2, "b2">, DwarfRegNum<[66]>; +def B3 : AArch64Reg<3, "b3">, DwarfRegNum<[67]>; +def B4 : AArch64Reg<4, "b4">, DwarfRegNum<[68]>; +def B5 : AArch64Reg<5, "b5">, DwarfRegNum<[69]>; +def B6 : AArch64Reg<6, "b6">, DwarfRegNum<[70]>; +def B7 : AArch64Reg<7, "b7">, DwarfRegNum<[71]>; +def B8 : AArch64Reg<8, "b8">, DwarfRegNum<[72]>; +def B9 : AArch64Reg<9, "b9">, DwarfRegNum<[73]>; +def B10 : AArch64Reg<10, "b10">, DwarfRegNum<[74]>; +def B11 : AArch64Reg<11, "b11">, DwarfRegNum<[75]>; +def B12 : AArch64Reg<12, "b12">, DwarfRegNum<[76]>; +def B13 : AArch64Reg<13, "b13">, DwarfRegNum<[77]>; +def B14 : AArch64Reg<14, "b14">, DwarfRegNum<[78]>; +def B15 : AArch64Reg<15, "b15">, DwarfRegNum<[79]>; +def B16 : AArch64Reg<16, "b16">, DwarfRegNum<[80]>; +def B17 : AArch64Reg<17, "b17">, DwarfRegNum<[81]>; +def B18 : AArch64Reg<18, "b18">, DwarfRegNum<[82]>; +def B19 : AArch64Reg<19, "b19">, DwarfRegNum<[83]>; +def B20 : AArch64Reg<20, "b20">, DwarfRegNum<[84]>; +def B21 : AArch64Reg<21, "b21">, DwarfRegNum<[85]>; +def B22 : AArch64Reg<22, "b22">, DwarfRegNum<[86]>; +def B23 : AArch64Reg<23, "b23">, DwarfRegNum<[87]>; +def B24 : AArch64Reg<24, "b24">, DwarfRegNum<[88]>; +def B25 : AArch64Reg<25, "b25">, DwarfRegNum<[89]>; +def B26 : AArch64Reg<26, "b26">, DwarfRegNum<[90]>; +def B27 : AArch64Reg<27, "b27">, DwarfRegNum<[91]>; +def B28 : AArch64Reg<28, "b28">, DwarfRegNum<[92]>; +def B29 : AArch64Reg<29, "b29">, DwarfRegNum<[93]>; +def B30 : AArch64Reg<30, "b30">, DwarfRegNum<[94]>; +def B31 : AArch64Reg<31, "b31">, DwarfRegNum<[95]>; + +let SubRegIndices = [bsub] in { +def H0 : AArch64Reg<0, "h0", [B0]>, DwarfRegAlias<B0>; +def H1 : AArch64Reg<1, "h1", [B1]>, DwarfRegAlias<B1>; +def H2 : AArch64Reg<2, "h2", [B2]>, DwarfRegAlias<B2>; +def H3 : AArch64Reg<3, "h3", [B3]>, DwarfRegAlias<B3>; +def H4 : AArch64Reg<4, "h4", [B4]>, DwarfRegAlias<B4>; +def H5 : AArch64Reg<5, "h5", [B5]>, DwarfRegAlias<B5>; +def H6 : AArch64Reg<6, "h6", [B6]>, DwarfRegAlias<B6>; +def H7 : AArch64Reg<7, "h7", [B7]>, DwarfRegAlias<B7>; +def H8 : AArch64Reg<8, "h8", [B8]>, DwarfRegAlias<B8>; +def H9 : AArch64Reg<9, "h9", [B9]>, DwarfRegAlias<B9>; +def H10 : AArch64Reg<10, "h10", [B10]>, DwarfRegAlias<B10>; +def H11 : AArch64Reg<11, "h11", [B11]>, DwarfRegAlias<B11>; +def H12 : AArch64Reg<12, "h12", [B12]>, DwarfRegAlias<B12>; +def H13 : AArch64Reg<13, "h13", [B13]>, DwarfRegAlias<B13>; +def H14 : AArch64Reg<14, "h14", [B14]>, DwarfRegAlias<B14>; +def H15 : AArch64Reg<15, "h15", [B15]>, DwarfRegAlias<B15>; +def H16 : AArch64Reg<16, "h16", [B16]>, DwarfRegAlias<B16>; +def H17 : AArch64Reg<17, "h17", [B17]>, DwarfRegAlias<B17>; +def H18 : AArch64Reg<18, "h18", [B18]>, DwarfRegAlias<B18>; +def H19 : AArch64Reg<19, "h19", [B19]>, DwarfRegAlias<B19>; +def H20 : AArch64Reg<20, "h20", [B20]>, DwarfRegAlias<B20>; +def H21 : AArch64Reg<21, "h21", [B21]>, DwarfRegAlias<B21>; +def H22 : AArch64Reg<22, "h22", [B22]>, DwarfRegAlias<B22>; +def H23 : AArch64Reg<23, "h23", [B23]>, DwarfRegAlias<B23>; +def H24 : AArch64Reg<24, "h24", [B24]>, DwarfRegAlias<B24>; +def H25 : AArch64Reg<25, "h25", [B25]>, DwarfRegAlias<B25>; +def H26 : AArch64Reg<26, "h26", [B26]>, DwarfRegAlias<B26>; +def H27 : AArch64Reg<27, "h27", [B27]>, DwarfRegAlias<B27>; +def H28 : AArch64Reg<28, "h28", [B28]>, DwarfRegAlias<B28>; +def H29 : AArch64Reg<29, "h29", [B29]>, DwarfRegAlias<B29>; +def H30 : AArch64Reg<30, "h30", [B30]>, DwarfRegAlias<B30>; +def H31 : AArch64Reg<31, "h31", [B31]>, DwarfRegAlias<B31>; +} + +let SubRegIndices = [hsub] in { +def S0 : AArch64Reg<0, "s0", [H0]>, DwarfRegAlias<B0>; +def S1 : AArch64Reg<1, "s1", [H1]>, DwarfRegAlias<B1>; +def S2 : AArch64Reg<2, "s2", [H2]>, DwarfRegAlias<B2>; +def S3 : AArch64Reg<3, "s3", [H3]>, DwarfRegAlias<B3>; +def S4 : AArch64Reg<4, "s4", [H4]>, DwarfRegAlias<B4>; +def S5 : AArch64Reg<5, "s5", [H5]>, DwarfRegAlias<B5>; +def S6 : AArch64Reg<6, "s6", [H6]>, DwarfRegAlias<B6>; +def S7 : AArch64Reg<7, "s7", [H7]>, DwarfRegAlias<B7>; +def S8 : AArch64Reg<8, "s8", [H8]>, DwarfRegAlias<B8>; +def S9 : AArch64Reg<9, "s9", [H9]>, DwarfRegAlias<B9>; +def S10 : AArch64Reg<10, "s10", [H10]>, DwarfRegAlias<B10>; +def S11 : AArch64Reg<11, "s11", [H11]>, DwarfRegAlias<B11>; +def S12 : AArch64Reg<12, "s12", [H12]>, DwarfRegAlias<B12>; +def S13 : AArch64Reg<13, "s13", [H13]>, DwarfRegAlias<B13>; +def S14 : AArch64Reg<14, "s14", [H14]>, DwarfRegAlias<B14>; +def S15 : AArch64Reg<15, "s15", [H15]>, DwarfRegAlias<B15>; +def S16 : AArch64Reg<16, "s16", [H16]>, DwarfRegAlias<B16>; +def S17 : AArch64Reg<17, "s17", [H17]>, DwarfRegAlias<B17>; +def S18 : AArch64Reg<18, "s18", [H18]>, DwarfRegAlias<B18>; +def S19 : AArch64Reg<19, "s19", [H19]>, DwarfRegAlias<B19>; +def S20 : AArch64Reg<20, "s20", [H20]>, DwarfRegAlias<B20>; +def S21 : AArch64Reg<21, "s21", [H21]>, DwarfRegAlias<B21>; +def S22 : AArch64Reg<22, "s22", [H22]>, DwarfRegAlias<B22>; +def S23 : AArch64Reg<23, "s23", [H23]>, DwarfRegAlias<B23>; +def S24 : AArch64Reg<24, "s24", [H24]>, DwarfRegAlias<B24>; +def S25 : AArch64Reg<25, "s25", [H25]>, DwarfRegAlias<B25>; +def S26 : AArch64Reg<26, "s26", [H26]>, DwarfRegAlias<B26>; +def S27 : AArch64Reg<27, "s27", [H27]>, DwarfRegAlias<B27>; +def S28 : AArch64Reg<28, "s28", [H28]>, DwarfRegAlias<B28>; +def S29 : AArch64Reg<29, "s29", [H29]>, DwarfRegAlias<B29>; +def S30 : AArch64Reg<30, "s30", [H30]>, DwarfRegAlias<B30>; +def S31 : AArch64Reg<31, "s31", [H31]>, DwarfRegAlias<B31>; +} + +let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in { +def D0 : AArch64Reg<0, "d0", [S0], ["v0", ""]>, DwarfRegAlias<B0>; +def D1 : AArch64Reg<1, "d1", [S1], ["v1", ""]>, DwarfRegAlias<B1>; +def D2 : AArch64Reg<2, "d2", [S2], ["v2", ""]>, DwarfRegAlias<B2>; +def D3 : AArch64Reg<3, "d3", [S3], ["v3", ""]>, DwarfRegAlias<B3>; +def D4 : AArch64Reg<4, "d4", [S4], ["v4", ""]>, DwarfRegAlias<B4>; +def D5 : AArch64Reg<5, "d5", [S5], ["v5", ""]>, DwarfRegAlias<B5>; +def D6 : AArch64Reg<6, "d6", [S6], ["v6", ""]>, DwarfRegAlias<B6>; +def D7 : AArch64Reg<7, "d7", [S7], ["v7", ""]>, DwarfRegAlias<B7>; +def D8 : AArch64Reg<8, "d8", [S8], ["v8", ""]>, DwarfRegAlias<B8>; +def D9 : AArch64Reg<9, "d9", [S9], ["v9", ""]>, DwarfRegAlias<B9>; +def D10 : AArch64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias<B10>; +def D11 : AArch64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias<B11>; +def D12 : AArch64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias<B12>; +def D13 : AArch64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias<B13>; +def D14 : AArch64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias<B14>; +def D15 : AArch64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias<B15>; +def D16 : AArch64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias<B16>; +def D17 : AArch64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias<B17>; +def D18 : AArch64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias<B18>; +def D19 : AArch64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias<B19>; +def D20 : AArch64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias<B20>; +def D21 : AArch64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias<B21>; +def D22 : AArch64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias<B22>; +def D23 : AArch64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias<B23>; +def D24 : AArch64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias<B24>; +def D25 : AArch64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias<B25>; +def D26 : AArch64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias<B26>; +def D27 : AArch64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias<B27>; +def D28 : AArch64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias<B28>; +def D29 : AArch64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias<B29>; +def D30 : AArch64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias<B30>; +def D31 : AArch64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias<B31>; +} + +let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in { +def Q0 : AArch64Reg<0, "q0", [D0], ["v0", ""]>, DwarfRegAlias<B0>; +def Q1 : AArch64Reg<1, "q1", [D1], ["v1", ""]>, DwarfRegAlias<B1>; +def Q2 : AArch64Reg<2, "q2", [D2], ["v2", ""]>, DwarfRegAlias<B2>; +def Q3 : AArch64Reg<3, "q3", [D3], ["v3", ""]>, DwarfRegAlias<B3>; +def Q4 : AArch64Reg<4, "q4", [D4], ["v4", ""]>, DwarfRegAlias<B4>; +def Q5 : AArch64Reg<5, "q5", [D5], ["v5", ""]>, DwarfRegAlias<B5>; +def Q6 : AArch64Reg<6, "q6", [D6], ["v6", ""]>, DwarfRegAlias<B6>; +def Q7 : AArch64Reg<7, "q7", [D7], ["v7", ""]>, DwarfRegAlias<B7>; +def Q8 : AArch64Reg<8, "q8", [D8], ["v8", ""]>, DwarfRegAlias<B8>; +def Q9 : AArch64Reg<9, "q9", [D9], ["v9", ""]>, DwarfRegAlias<B9>; +def Q10 : AArch64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias<B10>; +def Q11 : AArch64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias<B11>; +def Q12 : AArch64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias<B12>; +def Q13 : AArch64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias<B13>; +def Q14 : AArch64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias<B14>; +def Q15 : AArch64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias<B15>; +def Q16 : AArch64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias<B16>; +def Q17 : AArch64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias<B17>; +def Q18 : AArch64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias<B18>; +def Q19 : AArch64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias<B19>; +def Q20 : AArch64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias<B20>; +def Q21 : AArch64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias<B21>; +def Q22 : AArch64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias<B22>; +def Q23 : AArch64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias<B23>; +def Q24 : AArch64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias<B24>; +def Q25 : AArch64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias<B25>; +def Q26 : AArch64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias<B26>; +def Q27 : AArch64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias<B27>; +def Q28 : AArch64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias<B28>; +def Q29 : AArch64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias<B29>; +def Q30 : AArch64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias<B30>; +def Q31 : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>; +} + +def FPR8 : RegisterClass<"AArch64", [untyped], 8, (sequence "B%u", 0, 31)> { + let Size = 8; +} +def FPR16 : RegisterClass<"AArch64", [f16], 16, (sequence "H%u", 0, 31)> { + let Size = 16; +} +def FPR32 : RegisterClass<"AArch64", [f32, i32], 32,(sequence "S%u", 0, 31)>; +def FPR64 : RegisterClass<"AArch64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32, + v1i64, v4f16], + 64, (sequence "D%u", 0, 31)>; +// We don't (yet) have an f128 legal type, so don't use that here. We +// normalize 128-bit vectors to v2f64 for arg passing and such, so use +// that here. +def FPR128 : RegisterClass<"AArch64", + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128, + v8f16], + 128, (sequence "Q%u", 0, 31)>; + +// The lower 16 vector registers. Some instructions can only take registers +// in this range. +def FPR128_lo : RegisterClass<"AArch64", + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], + 128, (trunc FPR128, 16)>; + +// Pairs, triples, and quads of 64-bit vector registers. +def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>; +def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2], + [(rotl FPR64, 0), (rotl FPR64, 1), + (rotl FPR64, 2)]>; +def DSeqQuads : RegisterTuples<[dsub0, dsub1, dsub2, dsub3], + [(rotl FPR64, 0), (rotl FPR64, 1), + (rotl FPR64, 2), (rotl FPR64, 3)]>; +def DD : RegisterClass<"AArch64", [untyped], 64, (add DSeqPairs)> { + let Size = 128; +} +def DDD : RegisterClass<"AArch64", [untyped], 64, (add DSeqTriples)> { + let Size = 192; +} +def DDDD : RegisterClass<"AArch64", [untyped], 64, (add DSeqQuads)> { + let Size = 256; +} + +// Pairs, triples, and quads of 128-bit vector registers. +def QSeqPairs : RegisterTuples<[qsub0, qsub1], [(rotl FPR128, 0), (rotl FPR128, 1)]>; +def QSeqTriples : RegisterTuples<[qsub0, qsub1, qsub2], + [(rotl FPR128, 0), (rotl FPR128, 1), + (rotl FPR128, 2)]>; +def QSeqQuads : RegisterTuples<[qsub0, qsub1, qsub2, qsub3], + [(rotl FPR128, 0), (rotl FPR128, 1), + (rotl FPR128, 2), (rotl FPR128, 3)]>; +def QQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqPairs)> { + let Size = 256; +} +def QQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqTriples)> { + let Size = 384; +} +def QQQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqQuads)> { + let Size = 512; +} + + +// Vector operand versions of the FP registers. Alternate name printing and +// assmebler matching. +def VectorReg64AsmOperand : AsmOperandClass { + let Name = "VectorReg64"; + let PredicateMethod = "isNeonVectorReg"; +} +def VectorReg128AsmOperand : AsmOperandClass { + let Name = "VectorReg128"; + let PredicateMethod = "isNeonVectorReg"; +} + +def V64 : RegisterOperand<FPR64, "printVRegOperand"> { + let ParserMatchClass = VectorReg64AsmOperand; +} + +def V128 : RegisterOperand<FPR128, "printVRegOperand"> { + let ParserMatchClass = VectorReg128AsmOperand; +} + +def VectorRegLoAsmOperand : AsmOperandClass { + let Name = "VectorRegLo"; + let PredicateMethod = "isNeonVectorRegLo"; +} +def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand"> { + let ParserMatchClass = VectorRegLoAsmOperand; +} + +class TypedVecListAsmOperand<int count, string vecty, int lanes, int eltsize> + : AsmOperandClass { + let Name = "TypedVectorList" # count # "_" # lanes # eltsize; + + let PredicateMethod + = "isTypedVectorList<RegKind::NeonVector, " # count # ", " # lanes # ", " # eltsize # ">"; + let RenderMethod = "addVectorListOperands<" # vecty # ", " # count # ">"; +} + +class TypedVecListRegOperand<RegisterClass Reg, int lanes, string eltsize> + : RegisterOperand<Reg, "printTypedVectorList<" # lanes # ", '" + # eltsize # "'>">; + +multiclass VectorList<int count, RegisterClass Reg64, RegisterClass Reg128> { + // With implicit types (probably on instruction instead). E.g. { v0, v1 } + def _64AsmOperand : AsmOperandClass { + let Name = NAME # "64"; + let PredicateMethod = "isImplicitlyTypedVectorList<RegKind::NeonVector, " # count # ">"; + let RenderMethod = "addVectorListOperands<AArch64Operand::VecListIdx_DReg, " # count # ">"; + } + + def "64" : RegisterOperand<Reg64, "printImplicitlyTypedVectorList"> { + let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_64AsmOperand"); + } + + def _128AsmOperand : AsmOperandClass { + let Name = NAME # "128"; + let PredicateMethod = "isImplicitlyTypedVectorList<RegKind::NeonVector, " # count # ">"; + let RenderMethod = "addVectorListOperands<AArch64Operand::VecListIdx_QReg, " # count # ">"; + } + + def "128" : RegisterOperand<Reg128, "printImplicitlyTypedVectorList"> { + let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_128AsmOperand"); + } + + // 64-bit register lists with explicit type. + + // { v0.8b, v1.8b } + def _8bAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_DReg", 8, 8>; + def "8b" : TypedVecListRegOperand<Reg64, 8, "b"> { + let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8bAsmOperand"); + } + + // { v0.4h, v1.4h } + def _4hAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_DReg", 4, 16>; + def "4h" : TypedVecListRegOperand<Reg64, 4, "h"> { + let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4hAsmOperand"); + } + + // { v0.2s, v1.2s } + def _2sAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_DReg", 2, 32>; + def "2s" : TypedVecListRegOperand<Reg64, 2, "s"> { + let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2sAsmOperand"); + } + + // { v0.1d, v1.1d } + def _1dAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_DReg", 1, 64>; + def "1d" : TypedVecListRegOperand<Reg64, 1, "d"> { + let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_1dAsmOperand"); + } + + // 128-bit register lists with explicit type + + // { v0.16b, v1.16b } + def _16bAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_QReg", 16, 8>; + def "16b" : TypedVecListRegOperand<Reg128, 16, "b"> { + let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_16bAsmOperand"); + } + + // { v0.8h, v1.8h } + def _8hAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_QReg", 8, 16>; + def "8h" : TypedVecListRegOperand<Reg128, 8, "h"> { + let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8hAsmOperand"); + } + + // { v0.4s, v1.4s } + def _4sAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_QReg", 4, 32>; + def "4s" : TypedVecListRegOperand<Reg128, 4, "s"> { + let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4sAsmOperand"); + } + + // { v0.2d, v1.2d } + def _2dAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_QReg", 2, 64>; + def "2d" : TypedVecListRegOperand<Reg128, 2, "d"> { + let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2dAsmOperand"); + } + + // { v0.b, v1.b } + def _bAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_QReg", 0, 8>; + def "b" : TypedVecListRegOperand<Reg128, 0, "b"> { + let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_bAsmOperand"); + } + + // { v0.h, v1.h } + def _hAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_QReg", 0, 16>; + def "h" : TypedVecListRegOperand<Reg128, 0, "h"> { + let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_hAsmOperand"); + } + + // { v0.s, v1.s } + def _sAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_QReg", 0, 32>; + def "s" : TypedVecListRegOperand<Reg128, 0, "s"> { + let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_sAsmOperand"); + } + + // { v0.d, v1.d } + def _dAsmOperand : TypedVecListAsmOperand<count, "AArch64Operand::VecListIdx_QReg", 0, 64>; + def "d" : TypedVecListRegOperand<Reg128, 0, "d"> { + let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_dAsmOperand"); + } + + +} + +defm VecListOne : VectorList<1, FPR64, FPR128>; +defm VecListTwo : VectorList<2, DD, QQ>; +defm VecListThree : VectorList<3, DDD, QQQ>; +defm VecListFour : VectorList<4, DDDD, QQQQ>; + +class FPRAsmOperand<string RC> : AsmOperandClass { + let Name = "FPRAsmOperand" # RC; + let PredicateMethod = "isGPR64<AArch64::" # RC # "RegClassID>"; + let RenderMethod = "addRegOperands"; +} + +// Register operand versions of the scalar FP registers. +def FPR8Op : RegisterOperand<FPR8, "printOperand"> { + let ParserMatchClass = FPRAsmOperand<"FPR8">; +} + +def FPR16Op : RegisterOperand<FPR16, "printOperand"> { + let ParserMatchClass = FPRAsmOperand<"FPR16">; +} + +def FPR32Op : RegisterOperand<FPR32, "printOperand"> { + let ParserMatchClass = FPRAsmOperand<"FPR32">; +} + +def FPR64Op : RegisterOperand<FPR64, "printOperand"> { + let ParserMatchClass = FPRAsmOperand<"FPR64">; +} + +def FPR128Op : RegisterOperand<FPR128, "printOperand"> { + let ParserMatchClass = FPRAsmOperand<"FPR128">; +} + +//===----------------------------------------------------------------------===// +// ARMv8.1a atomic CASP register operands + + +def WSeqPairs : RegisterTuples<[sube32, subo32], + [(rotl GPR32, 0), (rotl GPR32, 1)]>; +def XSeqPairs : RegisterTuples<[sube64, subo64], + [(rotl GPR64, 0), (rotl GPR64, 1)]>; + +def WSeqPairsClass : RegisterClass<"AArch64", [untyped], 32, + (add WSeqPairs)>{ + let Size = 64; +} +def XSeqPairsClass : RegisterClass<"AArch64", [untyped], 64, + (add XSeqPairs)>{ + let Size = 128; +} + + +let RenderMethod = "addRegOperands", ParserMethod="tryParseGPRSeqPair" in { + def WSeqPairsAsmOperandClass : AsmOperandClass { let Name = "WSeqPair"; } + def XSeqPairsAsmOperandClass : AsmOperandClass { let Name = "XSeqPair"; } +} + +def WSeqPairClassOperand : + RegisterOperand<WSeqPairsClass, "printGPRSeqPairsClassOperand<32>"> { + let ParserMatchClass = WSeqPairsAsmOperandClass; +} +def XSeqPairClassOperand : + RegisterOperand<XSeqPairsClass, "printGPRSeqPairsClassOperand<64>"> { + let ParserMatchClass = XSeqPairsAsmOperandClass; +} + + +//===----- END: v8.1a atomic CASP register operands -----------------------===// + +// SVE predicate registers +def P0 : AArch64Reg<0, "p0">, DwarfRegNum<[48]>; +def P1 : AArch64Reg<1, "p1">, DwarfRegNum<[49]>; +def P2 : AArch64Reg<2, "p2">, DwarfRegNum<[50]>; +def P3 : AArch64Reg<3, "p3">, DwarfRegNum<[51]>; +def P4 : AArch64Reg<4, "p4">, DwarfRegNum<[52]>; +def P5 : AArch64Reg<5, "p5">, DwarfRegNum<[53]>; +def P6 : AArch64Reg<6, "p6">, DwarfRegNum<[54]>; +def P7 : AArch64Reg<7, "p7">, DwarfRegNum<[55]>; +def P8 : AArch64Reg<8, "p8">, DwarfRegNum<[56]>; +def P9 : AArch64Reg<9, "p9">, DwarfRegNum<[57]>; +def P10 : AArch64Reg<10, "p10">, DwarfRegNum<[58]>; +def P11 : AArch64Reg<11, "p11">, DwarfRegNum<[59]>; +def P12 : AArch64Reg<12, "p12">, DwarfRegNum<[60]>; +def P13 : AArch64Reg<13, "p13">, DwarfRegNum<[61]>; +def P14 : AArch64Reg<14, "p14">, DwarfRegNum<[62]>; +def P15 : AArch64Reg<15, "p15">, DwarfRegNum<[63]>; + +// The part of SVE registers that don't overlap Neon registers. +// These are only used as part of clobber lists. +def Z0_HI : AArch64Reg<0, "z0_hi">; +def Z1_HI : AArch64Reg<1, "z1_hi">; +def Z2_HI : AArch64Reg<2, "z2_hi">; +def Z3_HI : AArch64Reg<3, "z3_hi">; +def Z4_HI : AArch64Reg<4, "z4_hi">; +def Z5_HI : AArch64Reg<5, "z5_hi">; +def Z6_HI : AArch64Reg<6, "z6_hi">; +def Z7_HI : AArch64Reg<7, "z7_hi">; +def Z8_HI : AArch64Reg<8, "z8_hi">; +def Z9_HI : AArch64Reg<9, "z9_hi">; +def Z10_HI : AArch64Reg<10, "z10_hi">; +def Z11_HI : AArch64Reg<11, "z11_hi">; +def Z12_HI : AArch64Reg<12, "z12_hi">; +def Z13_HI : AArch64Reg<13, "z13_hi">; +def Z14_HI : AArch64Reg<14, "z14_hi">; +def Z15_HI : AArch64Reg<15, "z15_hi">; +def Z16_HI : AArch64Reg<16, "z16_hi">; +def Z17_HI : AArch64Reg<17, "z17_hi">; +def Z18_HI : AArch64Reg<18, "z18_hi">; +def Z19_HI : AArch64Reg<19, "z19_hi">; +def Z20_HI : AArch64Reg<20, "z20_hi">; +def Z21_HI : AArch64Reg<21, "z21_hi">; +def Z22_HI : AArch64Reg<22, "z22_hi">; +def Z23_HI : AArch64Reg<23, "z23_hi">; +def Z24_HI : AArch64Reg<24, "z24_hi">; +def Z25_HI : AArch64Reg<25, "z25_hi">; +def Z26_HI : AArch64Reg<26, "z26_hi">; +def Z27_HI : AArch64Reg<27, "z27_hi">; +def Z28_HI : AArch64Reg<28, "z28_hi">; +def Z29_HI : AArch64Reg<29, "z29_hi">; +def Z30_HI : AArch64Reg<30, "z30_hi">; +def Z31_HI : AArch64Reg<31, "z31_hi">; + +// SVE variable-size vector registers +let SubRegIndices = [zsub,zsub_hi] in { +def Z0 : AArch64Reg<0, "z0", [Q0, Z0_HI]>, DwarfRegNum<[96]>; +def Z1 : AArch64Reg<1, "z1", [Q1, Z1_HI]>, DwarfRegNum<[97]>; +def Z2 : AArch64Reg<2, "z2", [Q2, Z2_HI]>, DwarfRegNum<[98]>; +def Z3 : AArch64Reg<3, "z3", [Q3, Z3_HI]>, DwarfRegNum<[99]>; +def Z4 : AArch64Reg<4, "z4", [Q4, Z4_HI]>, DwarfRegNum<[100]>; +def Z5 : AArch64Reg<5, "z5", [Q5, Z5_HI]>, DwarfRegNum<[101]>; +def Z6 : AArch64Reg<6, "z6", [Q6, Z6_HI]>, DwarfRegNum<[102]>; +def Z7 : AArch64Reg<7, "z7", [Q7, Z7_HI]>, DwarfRegNum<[103]>; +def Z8 : AArch64Reg<8, "z8", [Q8, Z8_HI]>, DwarfRegNum<[104]>; +def Z9 : AArch64Reg<9, "z9", [Q9, Z9_HI]>, DwarfRegNum<[105]>; +def Z10 : AArch64Reg<10, "z10", [Q10, Z10_HI]>, DwarfRegNum<[106]>; +def Z11 : AArch64Reg<11, "z11", [Q11, Z11_HI]>, DwarfRegNum<[107]>; +def Z12 : AArch64Reg<12, "z12", [Q12, Z12_HI]>, DwarfRegNum<[108]>; +def Z13 : AArch64Reg<13, "z13", [Q13, Z13_HI]>, DwarfRegNum<[109]>; +def Z14 : AArch64Reg<14, "z14", [Q14, Z14_HI]>, DwarfRegNum<[110]>; +def Z15 : AArch64Reg<15, "z15", [Q15, Z15_HI]>, DwarfRegNum<[111]>; +def Z16 : AArch64Reg<16, "z16", [Q16, Z16_HI]>, DwarfRegNum<[112]>; +def Z17 : AArch64Reg<17, "z17", [Q17, Z17_HI]>, DwarfRegNum<[113]>; +def Z18 : AArch64Reg<18, "z18", [Q18, Z18_HI]>, DwarfRegNum<[114]>; +def Z19 : AArch64Reg<19, "z19", [Q19, Z19_HI]>, DwarfRegNum<[115]>; +def Z20 : AArch64Reg<20, "z20", [Q20, Z20_HI]>, DwarfRegNum<[116]>; +def Z21 : AArch64Reg<21, "z21", [Q21, Z21_HI]>, DwarfRegNum<[117]>; +def Z22 : AArch64Reg<22, "z22", [Q22, Z22_HI]>, DwarfRegNum<[118]>; +def Z23 : AArch64Reg<23, "z23", [Q23, Z23_HI]>, DwarfRegNum<[119]>; +def Z24 : AArch64Reg<24, "z24", [Q24, Z24_HI]>, DwarfRegNum<[120]>; +def Z25 : AArch64Reg<25, "z25", [Q25, Z25_HI]>, DwarfRegNum<[121]>; +def Z26 : AArch64Reg<26, "z26", [Q26, Z26_HI]>, DwarfRegNum<[122]>; +def Z27 : AArch64Reg<27, "z27", [Q27, Z27_HI]>, DwarfRegNum<[123]>; +def Z28 : AArch64Reg<28, "z28", [Q28, Z28_HI]>, DwarfRegNum<[124]>; +def Z29 : AArch64Reg<29, "z29", [Q29, Z29_HI]>, DwarfRegNum<[125]>; +def Z30 : AArch64Reg<30, "z30", [Q30, Z30_HI]>, DwarfRegNum<[126]>; +def Z31 : AArch64Reg<31, "z31", [Q31, Z31_HI]>, DwarfRegNum<[127]>; +} + +// Enum descibing the element size for destructive +// operations. +class ElementSizeEnum<bits<3> val> { + bits<3> Value = val; +} + +def ElementSizeNone : ElementSizeEnum<0>; +def ElementSizeB : ElementSizeEnum<1>; +def ElementSizeH : ElementSizeEnum<2>; +def ElementSizeS : ElementSizeEnum<3>; +def ElementSizeD : ElementSizeEnum<4>; +def ElementSizeQ : ElementSizeEnum<5>; // Unused + +class SVERegOp <string Suffix, AsmOperandClass C, + ElementSizeEnum Size, + RegisterClass RC> : RegisterOperand<RC> { + ElementSizeEnum ElementSize; + + let ElementSize = Size; + let PrintMethod = !if(!eq(Suffix, ""), + "printSVERegOp<>", + "printSVERegOp<'" # Suffix # "'>"); + let ParserMatchClass = C; +} + +class PPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size, + RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {} +class ZPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size, + RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {} + +//****************************************************************************** + +// SVE predicate register classes. +class PPRClass<int lastreg> : RegisterClass< + "AArch64", + [ nxv16i1, nxv8i1, nxv4i1, nxv2i1 ], 16, + (sequence "P%u", 0, lastreg)> { + let Size = 16; +} + +def PPR : PPRClass<15>; +def PPR_3b : PPRClass<7>; // Restricted 3 bit SVE predicate register class. + +class PPRAsmOperand <string name, string RegClass, int Width>: AsmOperandClass { + let Name = "SVE" # name # "Reg"; + let PredicateMethod = "isSVEPredicateVectorRegOfWidth<" + # Width # ", " # "AArch64::" # RegClass # "RegClassID>"; + let DiagnosticType = "InvalidSVE" # name # "Reg"; + let RenderMethod = "addRegOperands"; + let ParserMethod = "tryParseSVEPredicateVector"; +} + +def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", "PPR", 0>; +def PPRAsmOp8 : PPRAsmOperand<"PredicateB", "PPR", 8>; +def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>; +def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>; +def PPRAsmOp64 : PPRAsmOperand<"PredicateD", "PPR", 64>; + +def PPRAny : PPRRegOp<"", PPRAsmOpAny, ElementSizeNone, PPR>; +def PPR8 : PPRRegOp<"b", PPRAsmOp8, ElementSizeB, PPR>; +def PPR16 : PPRRegOp<"h", PPRAsmOp16, ElementSizeH, PPR>; +def PPR32 : PPRRegOp<"s", PPRAsmOp32, ElementSizeS, PPR>; +def PPR64 : PPRRegOp<"d", PPRAsmOp64, ElementSizeD, PPR>; + +def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", 0>; +def PPRAsmOp3b8 : PPRAsmOperand<"Predicate3bB", "PPR_3b", 8>; +def PPRAsmOp3b16 : PPRAsmOperand<"Predicate3bH", "PPR_3b", 16>; +def PPRAsmOp3b32 : PPRAsmOperand<"Predicate3bS", "PPR_3b", 32>; +def PPRAsmOp3b64 : PPRAsmOperand<"Predicate3bD", "PPR_3b", 64>; + +def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, ElementSizeNone, PPR_3b>; +def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, ElementSizeB, PPR_3b>; +def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, ElementSizeH, PPR_3b>; +def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, ElementSizeS, PPR_3b>; +def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, ElementSizeD, PPR_3b>; + +//****************************************************************************** + +// SVE vector register class +def ZPR : RegisterClass<"AArch64", + [nxv16i8, nxv8i16, nxv4i32, nxv2i64, + nxv2f16, nxv4f16, nxv8f16, + nxv1f32, nxv2f32, nxv4f32, + nxv1f64, nxv2f64], + 128, (sequence "Z%u", 0, 31)> { + let Size = 128; +} + +// SVE restricted 4 bit scalable vector register class +def ZPR_4b : RegisterClass<"AArch64", + [nxv16i8, nxv8i16, nxv4i32, nxv2i64, + nxv2f16, nxv4f16, nxv8f16, + nxv1f32, nxv2f32, nxv4f32, + nxv1f64, nxv2f64], + 128, (sequence "Z%u", 0, 15)> { + let Size = 128; +} + +// SVE restricted 3 bit scalable vector register class +def ZPR_3b : RegisterClass<"AArch64", + [nxv16i8, nxv8i16, nxv4i32, nxv2i64, + nxv2f16, nxv4f16, nxv8f16, + nxv1f32, nxv2f32, nxv4f32, + nxv1f64, nxv2f64], + 128, (sequence "Z%u", 0, 7)> { + let Size = 128; +} + +class ZPRAsmOperand<string name, int Width, string RegClassSuffix = ""> + : AsmOperandClass { + let Name = "SVE" # name # "Reg"; + let PredicateMethod = "isSVEDataVectorRegOfWidth<" + # Width # ", AArch64::ZPR" + # RegClassSuffix # "RegClassID>"; + let RenderMethod = "addRegOperands"; + let DiagnosticType = "InvalidZPR" # RegClassSuffix # Width; + let ParserMethod = "tryParseSVEDataVector<false, " + # !if(!eq(Width, 0), "false", "true") # ">"; +} + +def ZPRAsmOpAny : ZPRAsmOperand<"VectorAny", 0>; +def ZPRAsmOp8 : ZPRAsmOperand<"VectorB", 8>; +def ZPRAsmOp16 : ZPRAsmOperand<"VectorH", 16>; +def ZPRAsmOp32 : ZPRAsmOperand<"VectorS", 32>; +def ZPRAsmOp64 : ZPRAsmOperand<"VectorD", 64>; +def ZPRAsmOp128 : ZPRAsmOperand<"VectorQ", 128>; + +def ZPRAny : ZPRRegOp<"", ZPRAsmOpAny, ElementSizeNone, ZPR>; +def ZPR8 : ZPRRegOp<"b", ZPRAsmOp8, ElementSizeB, ZPR>; +def ZPR16 : ZPRRegOp<"h", ZPRAsmOp16, ElementSizeH, ZPR>; +def ZPR32 : ZPRRegOp<"s", ZPRAsmOp32, ElementSizeS, ZPR>; +def ZPR64 : ZPRRegOp<"d", ZPRAsmOp64, ElementSizeD, ZPR>; +def ZPR128 : ZPRRegOp<"q", ZPRAsmOp128, ElementSizeQ, ZPR>; + +def ZPRAsmOp3b8 : ZPRAsmOperand<"Vector3bB", 8, "_3b">; +def ZPRAsmOp3b16 : ZPRAsmOperand<"Vector3bH", 16, "_3b">; +def ZPRAsmOp3b32 : ZPRAsmOperand<"Vector3bS", 32, "_3b">; + +def ZPR3b8 : ZPRRegOp<"b", ZPRAsmOp3b8, ElementSizeB, ZPR_3b>; +def ZPR3b16 : ZPRRegOp<"h", ZPRAsmOp3b16, ElementSizeH, ZPR_3b>; +def ZPR3b32 : ZPRRegOp<"s", ZPRAsmOp3b32, ElementSizeS, ZPR_3b>; + +def ZPRAsmOp4b16 : ZPRAsmOperand<"Vector4bH", 16, "_4b">; +def ZPRAsmOp4b32 : ZPRAsmOperand<"Vector4bS", 32, "_4b">; +def ZPRAsmOp4b64 : ZPRAsmOperand<"Vector4bD", 64, "_4b">; + +def ZPR4b16 : ZPRRegOp<"h", ZPRAsmOp4b16, ElementSizeH, ZPR_4b>; +def ZPR4b32 : ZPRRegOp<"s", ZPRAsmOp4b32, ElementSizeS, ZPR_4b>; +def ZPR4b64 : ZPRRegOp<"d", ZPRAsmOp4b64, ElementSizeD, ZPR_4b>; + +class FPRasZPR<int Width> : AsmOperandClass{ + let Name = "FPR" # Width # "asZPR"; + let PredicateMethod = "isFPRasZPR<AArch64::FPR" # Width # "RegClassID>"; + let RenderMethod = "addFPRasZPRRegOperands<" # Width # ">"; +} + +class FPRasZPROperand<int Width> : RegisterOperand<ZPR> { + let ParserMatchClass = FPRasZPR<Width>; + let PrintMethod = "printZPRasFPR<" # Width # ">"; +} + +def FPR8asZPR : FPRasZPROperand<8>; +def FPR16asZPR : FPRasZPROperand<16>; +def FPR32asZPR : FPRasZPROperand<32>; +def FPR64asZPR : FPRasZPROperand<64>; +def FPR128asZPR : FPRasZPROperand<128>; + +let Namespace = "AArch64" in { + def zsub0 : SubRegIndex<128, -1>; + def zsub1 : SubRegIndex<128, -1>; + def zsub2 : SubRegIndex<128, -1>; + def zsub3 : SubRegIndex<128, -1>; +} + +// Pairs, triples, and quads of SVE vector registers. +def ZSeqPairs : RegisterTuples<[zsub0, zsub1], [(rotl ZPR, 0), (rotl ZPR, 1)]>; +def ZSeqTriples : RegisterTuples<[zsub0, zsub1, zsub2], [(rotl ZPR, 0), (rotl ZPR, 1), (rotl ZPR, 2)]>; +def ZSeqQuads : RegisterTuples<[zsub0, zsub1, zsub2, zsub3], [(rotl ZPR, 0), (rotl ZPR, 1), (rotl ZPR, 2), (rotl ZPR, 3)]>; + +def ZPR2 : RegisterClass<"AArch64", [untyped], 128, (add ZSeqPairs)> { + let Size = 256; +} +def ZPR3 : RegisterClass<"AArch64", [untyped], 128, (add ZSeqTriples)> { + let Size = 384; +} +def ZPR4 : RegisterClass<"AArch64", [untyped], 128, (add ZSeqQuads)> { + let Size = 512; +} + +class ZPRVectorList<int ElementWidth, int NumRegs> : AsmOperandClass { + let Name = "SVEVectorList" # NumRegs # ElementWidth; + let ParserMethod = "tryParseVectorList<RegKind::SVEDataVector>"; + let PredicateMethod = + "isTypedVectorList<RegKind::SVEDataVector, " #NumRegs #", 0, " #ElementWidth #">"; + let RenderMethod = "addVectorListOperands<AArch64Operand::VecListIdx_ZReg, " # NumRegs # ">"; +} + +def Z_b : RegisterOperand<ZPR, "printTypedVectorList<0,'b'>"> { + let ParserMatchClass = ZPRVectorList<8, 1>; +} + +def Z_h : RegisterOperand<ZPR, "printTypedVectorList<0,'h'>"> { + let ParserMatchClass = ZPRVectorList<16, 1>; +} + +def Z_s : RegisterOperand<ZPR, "printTypedVectorList<0,'s'>"> { + let ParserMatchClass = ZPRVectorList<32, 1>; +} + +def Z_d : RegisterOperand<ZPR, "printTypedVectorList<0,'d'>"> { + let ParserMatchClass = ZPRVectorList<64, 1>; +} + +def ZZ_b : RegisterOperand<ZPR2, "printTypedVectorList<0,'b'>"> { + let ParserMatchClass = ZPRVectorList<8, 2>; +} + +def ZZ_h : RegisterOperand<ZPR2, "printTypedVectorList<0,'h'>"> { + let ParserMatchClass = ZPRVectorList<16, 2>; +} + +def ZZ_s : RegisterOperand<ZPR2, "printTypedVectorList<0,'s'>"> { + let ParserMatchClass = ZPRVectorList<32, 2>; +} + +def ZZ_d : RegisterOperand<ZPR2, "printTypedVectorList<0,'d'>"> { + let ParserMatchClass = ZPRVectorList<64, 2>; +} + +def ZZZ_b : RegisterOperand<ZPR3, "printTypedVectorList<0,'b'>"> { + let ParserMatchClass = ZPRVectorList<8, 3>; +} + +def ZZZ_h : RegisterOperand<ZPR3, "printTypedVectorList<0,'h'>"> { + let ParserMatchClass = ZPRVectorList<16, 3>; +} + +def ZZZ_s : RegisterOperand<ZPR3, "printTypedVectorList<0,'s'>"> { + let ParserMatchClass = ZPRVectorList<32, 3>; +} + +def ZZZ_d : RegisterOperand<ZPR3, "printTypedVectorList<0,'d'>"> { + let ParserMatchClass = ZPRVectorList<64, 3>; +} + +def ZZZZ_b : RegisterOperand<ZPR4, "printTypedVectorList<0,'b'>"> { + let ParserMatchClass = ZPRVectorList<8, 4>; +} + +def ZZZZ_h : RegisterOperand<ZPR4, "printTypedVectorList<0,'h'>"> { + let ParserMatchClass = ZPRVectorList<16, 4>; +} + +def ZZZZ_s : RegisterOperand<ZPR4, "printTypedVectorList<0,'s'>"> { + let ParserMatchClass = ZPRVectorList<32, 4>; +} + +def ZZZZ_d : RegisterOperand<ZPR4, "printTypedVectorList<0,'d'>"> { + let ParserMatchClass = ZPRVectorList<64, 4>; +} + +class ZPRExtendAsmOperand<string ShiftExtend, int RegWidth, int Scale, + bit ScaleAlwaysSame = 0b0> : AsmOperandClass { + let Name = "ZPRExtend" # ShiftExtend # RegWidth # Scale + # !if(ScaleAlwaysSame, "Only", ""); + + let PredicateMethod = "isSVEDataVectorRegWithShiftExtend<" + # RegWidth # ", AArch64::ZPRRegClassID, " + # "AArch64_AM::" # ShiftExtend # ", " + # Scale # ", " + # !if(ScaleAlwaysSame, "true", "false") + # ">"; + let DiagnosticType = "InvalidZPR" # RegWidth # ShiftExtend # Scale; + let RenderMethod = "addRegOperands"; + let ParserMethod = "tryParseSVEDataVector<true, true>"; +} + +class ZPRExtendRegisterOperand<bit SignExtend, bit IsLSL, string Repr, + int RegWidth, int Scale, string Suffix = ""> + : RegisterOperand<ZPR> { + let ParserMatchClass = + !cast<AsmOperandClass>("ZPR" # RegWidth # "AsmOpndExt" # Repr # Scale # Suffix); + let PrintMethod = "printRegWithShiftExtend<" + # !if(SignExtend, "true", "false") # ", " + # Scale # ", " + # !if(IsLSL, "'x'", "'w'") # ", " + # !if(!eq(RegWidth, 32), "'s'", "'d'") # ">"; +} + +foreach RegWidth = [32, 64] in { + // UXTW(8|16|32|64) + def ZPR#RegWidth#AsmOpndExtUXTW8Only : ZPRExtendAsmOperand<"UXTW", RegWidth, 8, 0b1>; + def ZPR#RegWidth#AsmOpndExtUXTW8 : ZPRExtendAsmOperand<"UXTW", RegWidth, 8>; + def ZPR#RegWidth#AsmOpndExtUXTW16 : ZPRExtendAsmOperand<"UXTW", RegWidth, 16>; + def ZPR#RegWidth#AsmOpndExtUXTW32 : ZPRExtendAsmOperand<"UXTW", RegWidth, 32>; + def ZPR#RegWidth#AsmOpndExtUXTW64 : ZPRExtendAsmOperand<"UXTW", RegWidth, 64>; + + def ZPR#RegWidth#ExtUXTW8Only : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 8, "Only">; + def ZPR#RegWidth#ExtUXTW8 : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 8>; + def ZPR#RegWidth#ExtUXTW16 : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 16>; + def ZPR#RegWidth#ExtUXTW32 : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 32>; + def ZPR#RegWidth#ExtUXTW64 : ZPRExtendRegisterOperand<0b0, 0b0, "UXTW", RegWidth, 64>; + + // SXTW(8|16|32|64) + def ZPR#RegWidth#AsmOpndExtSXTW8Only : ZPRExtendAsmOperand<"SXTW", RegWidth, 8, 0b1>; + def ZPR#RegWidth#AsmOpndExtSXTW8 : ZPRExtendAsmOperand<"SXTW", RegWidth, 8>; + def ZPR#RegWidth#AsmOpndExtSXTW16 : ZPRExtendAsmOperand<"SXTW", RegWidth, 16>; + def ZPR#RegWidth#AsmOpndExtSXTW32 : ZPRExtendAsmOperand<"SXTW", RegWidth, 32>; + def ZPR#RegWidth#AsmOpndExtSXTW64 : ZPRExtendAsmOperand<"SXTW", RegWidth, 64>; + + def ZPR#RegWidth#ExtSXTW8Only : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 8, "Only">; + def ZPR#RegWidth#ExtSXTW8 : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 8>; + def ZPR#RegWidth#ExtSXTW16 : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 16>; + def ZPR#RegWidth#ExtSXTW32 : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 32>; + def ZPR#RegWidth#ExtSXTW64 : ZPRExtendRegisterOperand<0b1, 0b0, "SXTW", RegWidth, 64>; + + // LSL(8|16|32|64) + def ZPR#RegWidth#AsmOpndExtLSL8 : ZPRExtendAsmOperand<"LSL", RegWidth, 8>; + def ZPR#RegWidth#AsmOpndExtLSL16 : ZPRExtendAsmOperand<"LSL", RegWidth, 16>; + def ZPR#RegWidth#AsmOpndExtLSL32 : ZPRExtendAsmOperand<"LSL", RegWidth, 32>; + def ZPR#RegWidth#AsmOpndExtLSL64 : ZPRExtendAsmOperand<"LSL", RegWidth, 64>; + def ZPR#RegWidth#ExtLSL8 : ZPRExtendRegisterOperand<0b0, 0b1, "LSL", RegWidth, 8>; + def ZPR#RegWidth#ExtLSL16 : ZPRExtendRegisterOperand<0b0, 0b1, "LSL", RegWidth, 16>; + def ZPR#RegWidth#ExtLSL32 : ZPRExtendRegisterOperand<0b0, 0b1, "LSL", RegWidth, 32>; + def ZPR#RegWidth#ExtLSL64 : ZPRExtendRegisterOperand<0b0, 0b1, "LSL", RegWidth, 64>; +} + +class GPR64ShiftExtendAsmOperand <string AsmOperandName, int Scale, string RegClass> : AsmOperandClass { + let Name = AsmOperandName # Scale; + let PredicateMethod = "isGPR64WithShiftExtend<AArch64::"#RegClass#"RegClassID, " # Scale # ">"; + let DiagnosticType = "Invalid" # AsmOperandName # Scale; + let RenderMethod = "addRegOperands"; + let ParserMethod = "tryParseGPROperand<true>"; +} + +class GPR64ExtendRegisterOperand<string Name, int Scale, RegisterClass RegClass> : RegisterOperand<RegClass>{ + let ParserMatchClass = !cast<AsmOperandClass>(Name); + let PrintMethod = "printRegWithShiftExtend<false, " # Scale # ", 'x', 0>"; +} + +foreach Scale = [8, 16, 32, 64] in { + def GPR64shiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64shifted", Scale, "GPR64">; + def GPR64shifted # Scale : GPR64ExtendRegisterOperand<"GPR64shiftedAsmOpnd" # Scale, Scale, GPR64>; + + def GPR64NoXZRshiftedAsmOpnd # Scale : GPR64ShiftExtendAsmOperand<"GPR64NoXZRshifted", Scale, "GPR64common">; + def GPR64NoXZRshifted # Scale : GPR64ExtendRegisterOperand<"GPR64NoXZRshiftedAsmOpnd" # Scale, Scale, GPR64common>; +} diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td new file mode 100644 index 000000000..0fde68011 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SVEInstrInfo.td @@ -0,0 +1,1024 @@ +//=- AArch64SVEInstrInfo.td - AArch64 SVE Instructions -*- tablegen -*-----=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AArch64 Scalable Vector Extension (SVE) Instruction definitions. +// +//===----------------------------------------------------------------------===// + +let Predicates = [HasSVE] in { + + def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr">; + def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">; + def RDFFR_P : sve_int_rdffr_unpred<"rdffr">; + def SETFFR : sve_int_setffr<"setffr">; + def WRFFR : sve_int_wrffr<"wrffr">; + + defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add">; + defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub">; + defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd">; + defm UQADD_ZZZ : sve_int_bin_cons_arit_0<0b101, "uqadd">; + defm SQSUB_ZZZ : sve_int_bin_cons_arit_0<0b110, "sqsub">; + defm UQSUB_ZZZ : sve_int_bin_cons_arit_0<0b111, "uqsub">; + + def AND_ZZZ : sve_int_bin_cons_log<0b00, "and">; + def ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr">; + def EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor">; + def BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic">; + + defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add">; + defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub">; + defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr">; + + defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr">; + defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor">; + defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and">; + defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic">; + + defm ADD_ZI : sve_int_arith_imm0<0b000, "add">; + defm SUB_ZI : sve_int_arith_imm0<0b001, "sub">; + defm SUBR_ZI : sve_int_arith_imm0<0b011, "subr">; + defm SQADD_ZI : sve_int_arith_imm0<0b100, "sqadd">; + defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd">; + defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub">; + defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub">; + + defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad">; + defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb">; + defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla">; + defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls">; + + // SVE predicated integer reductions. + defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv">; + defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv">; + defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv">; + defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv">; + defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv">; + defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv">; + defm ORV_VPZ : sve_int_reduce_2<0b000, "orv">; + defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv">; + defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv">; + + defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn">; + defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon">; + defm AND_ZI : sve_int_log_imm<0b10, "and", "bic">; + + defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", simm8>; + defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", simm8>; + defm UMAX_ZI : sve_int_arith_imm1<0b01, "umax", imm0_255>; + defm UMIN_ZI : sve_int_arith_imm1<0b11, "umin", imm0_255>; + + defm MUL_ZI : sve_int_arith_imm2<"mul">; + defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul">; + defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh">; + defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh">; + + defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv">; + defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv">; + defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr">; + defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr">; + + defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot">; + defm UDOT_ZZZ : sve_intx_dot<0b1, "udot">; + + defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot">; + defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot">; + + defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb">; + defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb">; + defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth">; + defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth">; + defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw">; + defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw">; + defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs">; + defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg">; + + defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls">; + defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz">; + defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt">; + defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot">; + defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not">; + defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs">; + defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg">; + + defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax">; + defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax">; + defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin">; + defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin">; + defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd">; + defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd">; + + defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe">; + defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte">; + + defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", sve_fpimm_half_one>; + defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", sve_fpimm_half_one>; + defm FMUL_ZPmI : sve_fp_2op_i_p_zds<0b010, "fmul", sve_fpimm_half_two>; + defm FSUBR_ZPmI : sve_fp_2op_i_p_zds<0b011, "fsubr", sve_fpimm_half_one>; + defm FMAXNM_ZPmI : sve_fp_2op_i_p_zds<0b100, "fmaxnm", sve_fpimm_zero_one>; + defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", sve_fpimm_zero_one>; + defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", sve_fpimm_zero_one>; + defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", sve_fpimm_zero_one>; + + defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd">; + defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub">; + defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul">; + defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr">; + defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm">; + defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm">; + defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax">; + defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin">; + defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd">; + defm FSCALE_ZPmZ : sve_fp_2op_p_zds<0b1001, "fscale">; + defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx">; + defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr">; + defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv">; + + defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd">; + defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub">; + defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul">; + defm FTSMUL_ZZZ : sve_fp_3op_u_zd<0b011, "ftsmul">; + defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps">; + defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts">; + + defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel">; + + defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd">; + defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla">; + + defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla">; + defm FMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b01, "fmls">; + defm FNMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b10, "fnmla">; + defm FNMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b11, "fnmls">; + + defm FMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b00, "fmad">; + defm FMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b01, "fmsb">; + defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad">; + defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb">; + + defm FTMAD_ZZI : sve_fp_ftmad<"ftmad">; + + defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla">; + defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls">; + + defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla">; + defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul">; + + // SVE floating point reductions. + defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda">; + defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv">; + defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv">; + defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv">; + defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv">; + defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv">; + + // Splat immediate (unpredicated) + defm DUP_ZI : sve_int_dup_imm<"dup">; + defm FDUP_ZI : sve_int_dup_fpimm<"fdup">; + defm DUPM_ZI : sve_int_dup_mask_imm<"dupm">; + + // Splat immediate (predicated) + defm CPY_ZPmI : sve_int_dup_imm_pred_merge<"cpy">; + defm CPY_ZPzI : sve_int_dup_imm_pred_zero<"cpy">; + defm FCPY_ZPmI : sve_int_dup_fpimm_pred<"fcpy">; + + // Splat scalar register (unpredicated, GPR or vector + element index) + defm DUP_ZR : sve_int_perm_dup_r<"dup">; + defm DUP_ZZI : sve_int_perm_dup_i<"dup">; + + // Splat scalar register (predicated) + defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy">; + defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy">; + + // Select elements from either vector (predicated) + defm SEL_ZPZZ : sve_int_sel_vvv<"sel">; + + defm SPLICE_ZPZ : sve_int_perm_splice<"splice">; + defm COMPACT_ZPZ : sve_int_perm_compact<"compact">; + defm INSR_ZR : sve_int_perm_insrs<"insr">; + defm INSR_ZV : sve_int_perm_insrv<"insr">; + def EXT_ZZI : sve_int_perm_extract_i<"ext">; + + defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit">; + defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb">; + defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh">; + defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw">; + + defm REV_PP : sve_int_perm_reverse_p<"rev">; + defm REV_ZZ : sve_int_perm_reverse_z<"rev">; + + defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo">; + defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi">; + defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo">; + defm UUNPKHI_ZZ : sve_int_perm_unpk<0b11, "uunpkhi">; + + def PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo">; + def PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi">; + + defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">; + defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">; + def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>; + def FEXPA_ZZ_H : sve_int_bin_cons_misc_0_c<0b01000000, "fexpa", ZPR16>; + def FEXPA_ZZ_S : sve_int_bin_cons_misc_0_c<0b10000000, "fexpa", ZPR32>; + def FEXPA_ZZ_D : sve_int_bin_cons_misc_0_c<0b11000000, "fexpa", ZPR64>; + + def BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa">; + def BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas">; + def BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb">; + def BRKPBS_PPzPP : sve_int_brkp<0b11, "brkpbs">; + + def BRKN_PPzP : sve_int_brkn<0b0, "brkn">; + def BRKNS_PPzP : sve_int_brkn<0b1, "brkns">; + + defm BRKA_PPzP : sve_int_break_z<0b000, "brka">; + defm BRKA_PPmP : sve_int_break_m<0b001, "brka">; + defm BRKAS_PPzP : sve_int_break_z<0b010, "brkas">; + defm BRKB_PPzP : sve_int_break_z<0b100, "brkb">; + defm BRKB_PPmP : sve_int_break_m<0b101, "brkb">; + defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs">; + + def PTEST_PP : sve_int_ptest<0b010000, "ptest">; + def PFALSE : sve_int_pfalse<0b000000, "pfalse">; + defm PFIRST : sve_int_pfirst<0b00000, "pfirst">; + defm PNEXT : sve_int_pnext<0b00110, "pnext">; + + def AND_PPzPP : sve_int_pred_log<0b0000, "and">; + def BIC_PPzPP : sve_int_pred_log<0b0001, "bic">; + def EOR_PPzPP : sve_int_pred_log<0b0010, "eor">; + def SEL_PPPP : sve_int_pred_log<0b0011, "sel">; + def ANDS_PPzPP : sve_int_pred_log<0b0100, "ands">; + def BICS_PPzPP : sve_int_pred_log<0b0101, "bics">; + def EORS_PPzPP : sve_int_pred_log<0b0110, "eors">; + def ORR_PPzPP : sve_int_pred_log<0b1000, "orr">; + def ORN_PPzPP : sve_int_pred_log<0b1001, "orn">; + def NOR_PPzPP : sve_int_pred_log<0b1010, "nor">; + def NAND_PPzPP : sve_int_pred_log<0b1011, "nand">; + def ORRS_PPzPP : sve_int_pred_log<0b1100, "orrs">; + def ORNS_PPzPP : sve_int_pred_log<0b1101, "orns">; + def NORS_PPzPP : sve_int_pred_log<0b1110, "nors">; + def NANDS_PPzPP : sve_int_pred_log<0b1111, "nands">; + + defm CLASTA_RPZ : sve_int_perm_clast_rz<0, "clasta">; + defm CLASTB_RPZ : sve_int_perm_clast_rz<1, "clastb">; + defm CLASTA_VPZ : sve_int_perm_clast_vz<0, "clasta">; + defm CLASTB_VPZ : sve_int_perm_clast_vz<1, "clastb">; + defm CLASTA_ZPZ : sve_int_perm_clast_zz<0, "clasta">; + defm CLASTB_ZPZ : sve_int_perm_clast_zz<1, "clastb">; + + defm LASTA_RPZ : sve_int_perm_last_r<0, "lasta">; + defm LASTB_RPZ : sve_int_perm_last_r<1, "lastb">; + defm LASTA_VPZ : sve_int_perm_last_v<0, "lasta">; + defm LASTB_VPZ : sve_int_perm_last_v<1, "lastb">; + + // continuous load with reg+immediate + defm LD1B_IMM : sve_mem_cld_si<0b0000, "ld1b", Z_b, ZPR8>; + defm LD1B_H_IMM : sve_mem_cld_si<0b0001, "ld1b", Z_h, ZPR16>; + defm LD1B_S_IMM : sve_mem_cld_si<0b0010, "ld1b", Z_s, ZPR32>; + defm LD1B_D_IMM : sve_mem_cld_si<0b0011, "ld1b", Z_d, ZPR64>; + defm LD1SW_D_IMM : sve_mem_cld_si<0b0100, "ld1sw", Z_d, ZPR64>; + defm LD1H_IMM : sve_mem_cld_si<0b0101, "ld1h", Z_h, ZPR16>; + defm LD1H_S_IMM : sve_mem_cld_si<0b0110, "ld1h", Z_s, ZPR32>; + defm LD1H_D_IMM : sve_mem_cld_si<0b0111, "ld1h", Z_d, ZPR64>; + defm LD1SH_D_IMM : sve_mem_cld_si<0b1000, "ld1sh", Z_d, ZPR64>; + defm LD1SH_S_IMM : sve_mem_cld_si<0b1001, "ld1sh", Z_s, ZPR32>; + defm LD1W_IMM : sve_mem_cld_si<0b1010, "ld1w", Z_s, ZPR32>; + defm LD1W_D_IMM : sve_mem_cld_si<0b1011, "ld1w", Z_d, ZPR64>; + defm LD1SB_D_IMM : sve_mem_cld_si<0b1100, "ld1sb", Z_d, ZPR64>; + defm LD1SB_S_IMM : sve_mem_cld_si<0b1101, "ld1sb", Z_s, ZPR32>; + defm LD1SB_H_IMM : sve_mem_cld_si<0b1110, "ld1sb", Z_h, ZPR16>; + defm LD1D_IMM : sve_mem_cld_si<0b1111, "ld1d", Z_d, ZPR64>; + + // LD1R loads (splat scalar to vector) + defm LD1RB_IMM : sve_mem_ld_dup<0b00, 0b00, "ld1rb", Z_b, ZPR8, uimm6s1>; + defm LD1RB_H_IMM : sve_mem_ld_dup<0b00, 0b01, "ld1rb", Z_h, ZPR16, uimm6s1>; + defm LD1RB_S_IMM : sve_mem_ld_dup<0b00, 0b10, "ld1rb", Z_s, ZPR32, uimm6s1>; + defm LD1RB_D_IMM : sve_mem_ld_dup<0b00, 0b11, "ld1rb", Z_d, ZPR64, uimm6s1>; + defm LD1RSW_IMM : sve_mem_ld_dup<0b01, 0b00, "ld1rsw", Z_d, ZPR64, uimm6s4>; + defm LD1RH_IMM : sve_mem_ld_dup<0b01, 0b01, "ld1rh", Z_h, ZPR16, uimm6s2>; + defm LD1RH_S_IMM : sve_mem_ld_dup<0b01, 0b10, "ld1rh", Z_s, ZPR32, uimm6s2>; + defm LD1RH_D_IMM : sve_mem_ld_dup<0b01, 0b11, "ld1rh", Z_d, ZPR64, uimm6s2>; + defm LD1RSH_D_IMM : sve_mem_ld_dup<0b10, 0b00, "ld1rsh", Z_d, ZPR64, uimm6s2>; + defm LD1RSH_S_IMM : sve_mem_ld_dup<0b10, 0b01, "ld1rsh", Z_s, ZPR32, uimm6s2>; + defm LD1RW_IMM : sve_mem_ld_dup<0b10, 0b10, "ld1rw", Z_s, ZPR32, uimm6s4>; + defm LD1RW_D_IMM : sve_mem_ld_dup<0b10, 0b11, "ld1rw", Z_d, ZPR64, uimm6s4>; + defm LD1RSB_D_IMM : sve_mem_ld_dup<0b11, 0b00, "ld1rsb", Z_d, ZPR64, uimm6s1>; + defm LD1RSB_S_IMM : sve_mem_ld_dup<0b11, 0b01, "ld1rsb", Z_s, ZPR32, uimm6s1>; + defm LD1RSB_H_IMM : sve_mem_ld_dup<0b11, 0b10, "ld1rsb", Z_h, ZPR16, uimm6s1>; + defm LD1RD_IMM : sve_mem_ld_dup<0b11, 0b11, "ld1rd", Z_d, ZPR64, uimm6s8>; + + // LD1RQ loads (load quadword-vector and splat to scalable vector) + defm LD1RQ_B_IMM : sve_mem_ldqr_si<0b00, "ld1rqb", Z_b, ZPR8>; + defm LD1RQ_H_IMM : sve_mem_ldqr_si<0b01, "ld1rqh", Z_h, ZPR16>; + defm LD1RQ_W_IMM : sve_mem_ldqr_si<0b10, "ld1rqw", Z_s, ZPR32>; + defm LD1RQ_D_IMM : sve_mem_ldqr_si<0b11, "ld1rqd", Z_d, ZPR64>; + defm LD1RQ_B : sve_mem_ldqr_ss<0b00, "ld1rqb", Z_b, ZPR8, GPR64NoXZRshifted8>; + defm LD1RQ_H : sve_mem_ldqr_ss<0b01, "ld1rqh", Z_h, ZPR16, GPR64NoXZRshifted16>; + defm LD1RQ_W : sve_mem_ldqr_ss<0b10, "ld1rqw", Z_s, ZPR32, GPR64NoXZRshifted32>; + defm LD1RQ_D : sve_mem_ldqr_ss<0b11, "ld1rqd", Z_d, ZPR64, GPR64NoXZRshifted64>; + + // continuous load with reg+reg addressing. + defm LD1B : sve_mem_cld_ss<0b0000, "ld1b", Z_b, ZPR8, GPR64NoXZRshifted8>; + defm LD1B_H : sve_mem_cld_ss<0b0001, "ld1b", Z_h, ZPR16, GPR64NoXZRshifted8>; + defm LD1B_S : sve_mem_cld_ss<0b0010, "ld1b", Z_s, ZPR32, GPR64NoXZRshifted8>; + defm LD1B_D : sve_mem_cld_ss<0b0011, "ld1b", Z_d, ZPR64, GPR64NoXZRshifted8>; + defm LD1SW_D : sve_mem_cld_ss<0b0100, "ld1sw", Z_d, ZPR64, GPR64NoXZRshifted32>; + defm LD1H : sve_mem_cld_ss<0b0101, "ld1h", Z_h, ZPR16, GPR64NoXZRshifted16>; + defm LD1H_S : sve_mem_cld_ss<0b0110, "ld1h", Z_s, ZPR32, GPR64NoXZRshifted16>; + defm LD1H_D : sve_mem_cld_ss<0b0111, "ld1h", Z_d, ZPR64, GPR64NoXZRshifted16>; + defm LD1SH_D : sve_mem_cld_ss<0b1000, "ld1sh", Z_d, ZPR64, GPR64NoXZRshifted16>; + defm LD1SH_S : sve_mem_cld_ss<0b1001, "ld1sh", Z_s, ZPR32, GPR64NoXZRshifted16>; + defm LD1W : sve_mem_cld_ss<0b1010, "ld1w", Z_s, ZPR32, GPR64NoXZRshifted32>; + defm LD1W_D : sve_mem_cld_ss<0b1011, "ld1w", Z_d, ZPR64, GPR64NoXZRshifted32>; + defm LD1SB_D : sve_mem_cld_ss<0b1100, "ld1sb", Z_d, ZPR64, GPR64NoXZRshifted8>; + defm LD1SB_S : sve_mem_cld_ss<0b1101, "ld1sb", Z_s, ZPR32, GPR64NoXZRshifted8>; + defm LD1SB_H : sve_mem_cld_ss<0b1110, "ld1sb", Z_h, ZPR16, GPR64NoXZRshifted8>; + defm LD1D : sve_mem_cld_ss<0b1111, "ld1d", Z_d, ZPR64, GPR64NoXZRshifted64>; + + // non-faulting continuous load with reg+immediate + defm LDNF1B_IMM : sve_mem_cldnf_si<0b0000, "ldnf1b", Z_b, ZPR8>; + defm LDNF1B_H_IMM : sve_mem_cldnf_si<0b0001, "ldnf1b", Z_h, ZPR16>; + defm LDNF1B_S_IMM : sve_mem_cldnf_si<0b0010, "ldnf1b", Z_s, ZPR32>; + defm LDNF1B_D_IMM : sve_mem_cldnf_si<0b0011, "ldnf1b", Z_d, ZPR64>; + defm LDNF1SW_D_IMM : sve_mem_cldnf_si<0b0100, "ldnf1sw", Z_d, ZPR64>; + defm LDNF1H_IMM : sve_mem_cldnf_si<0b0101, "ldnf1h", Z_h, ZPR16>; + defm LDNF1H_S_IMM : sve_mem_cldnf_si<0b0110, "ldnf1h", Z_s, ZPR32>; + defm LDNF1H_D_IMM : sve_mem_cldnf_si<0b0111, "ldnf1h", Z_d, ZPR64>; + defm LDNF1SH_D_IMM : sve_mem_cldnf_si<0b1000, "ldnf1sh", Z_d, ZPR64>; + defm LDNF1SH_S_IMM : sve_mem_cldnf_si<0b1001, "ldnf1sh", Z_s, ZPR32>; + defm LDNF1W_IMM : sve_mem_cldnf_si<0b1010, "ldnf1w", Z_s, ZPR32>; + defm LDNF1W_D_IMM : sve_mem_cldnf_si<0b1011, "ldnf1w", Z_d, ZPR64>; + defm LDNF1SB_D_IMM : sve_mem_cldnf_si<0b1100, "ldnf1sb", Z_d, ZPR64>; + defm LDNF1SB_S_IMM : sve_mem_cldnf_si<0b1101, "ldnf1sb", Z_s, ZPR32>; + defm LDNF1SB_H_IMM : sve_mem_cldnf_si<0b1110, "ldnf1sb", Z_h, ZPR16>; + defm LDNF1D_IMM : sve_mem_cldnf_si<0b1111, "ldnf1d", Z_d, ZPR64>; + + // First-faulting loads with reg+reg addressing. + defm LDFF1B : sve_mem_cldff_ss<0b0000, "ldff1b", Z_b, ZPR8, GPR64shifted8>; + defm LDFF1B_H : sve_mem_cldff_ss<0b0001, "ldff1b", Z_h, ZPR16, GPR64shifted8>; + defm LDFF1B_S : sve_mem_cldff_ss<0b0010, "ldff1b", Z_s, ZPR32, GPR64shifted8>; + defm LDFF1B_D : sve_mem_cldff_ss<0b0011, "ldff1b", Z_d, ZPR64, GPR64shifted8>; + defm LDFF1SW_D : sve_mem_cldff_ss<0b0100, "ldff1sw", Z_d, ZPR64, GPR64shifted32>; + defm LDFF1H : sve_mem_cldff_ss<0b0101, "ldff1h", Z_h, ZPR16, GPR64shifted16>; + defm LDFF1H_S : sve_mem_cldff_ss<0b0110, "ldff1h", Z_s, ZPR32, GPR64shifted16>; + defm LDFF1H_D : sve_mem_cldff_ss<0b0111, "ldff1h", Z_d, ZPR64, GPR64shifted16>; + defm LDFF1SH_D : sve_mem_cldff_ss<0b1000, "ldff1sh", Z_d, ZPR64, GPR64shifted16>; + defm LDFF1SH_S : sve_mem_cldff_ss<0b1001, "ldff1sh", Z_s, ZPR32, GPR64shifted16>; + defm LDFF1W : sve_mem_cldff_ss<0b1010, "ldff1w", Z_s, ZPR32, GPR64shifted32>; + defm LDFF1W_D : sve_mem_cldff_ss<0b1011, "ldff1w", Z_d, ZPR64, GPR64shifted32>; + defm LDFF1SB_D : sve_mem_cldff_ss<0b1100, "ldff1sb", Z_d, ZPR64, GPR64shifted8>; + defm LDFF1SB_S : sve_mem_cldff_ss<0b1101, "ldff1sb", Z_s, ZPR32, GPR64shifted8>; + defm LDFF1SB_H : sve_mem_cldff_ss<0b1110, "ldff1sb", Z_h, ZPR16, GPR64shifted8>; + defm LDFF1D : sve_mem_cldff_ss<0b1111, "ldff1d", Z_d, ZPR64, GPR64shifted64>; + + // LD(2|3|4) structured loads with reg+immediate + defm LD2B_IMM : sve_mem_eld_si<0b00, 0b01, ZZ_b, "ld2b", simm4s2>; + defm LD3B_IMM : sve_mem_eld_si<0b00, 0b10, ZZZ_b, "ld3b", simm4s3>; + defm LD4B_IMM : sve_mem_eld_si<0b00, 0b11, ZZZZ_b, "ld4b", simm4s4>; + defm LD2H_IMM : sve_mem_eld_si<0b01, 0b01, ZZ_h, "ld2h", simm4s2>; + defm LD3H_IMM : sve_mem_eld_si<0b01, 0b10, ZZZ_h, "ld3h", simm4s3>; + defm LD4H_IMM : sve_mem_eld_si<0b01, 0b11, ZZZZ_h, "ld4h", simm4s4>; + defm LD2W_IMM : sve_mem_eld_si<0b10, 0b01, ZZ_s, "ld2w", simm4s2>; + defm LD3W_IMM : sve_mem_eld_si<0b10, 0b10, ZZZ_s, "ld3w", simm4s3>; + defm LD4W_IMM : sve_mem_eld_si<0b10, 0b11, ZZZZ_s, "ld4w", simm4s4>; + defm LD2D_IMM : sve_mem_eld_si<0b11, 0b01, ZZ_d, "ld2d", simm4s2>; + defm LD3D_IMM : sve_mem_eld_si<0b11, 0b10, ZZZ_d, "ld3d", simm4s3>; + defm LD4D_IMM : sve_mem_eld_si<0b11, 0b11, ZZZZ_d, "ld4d", simm4s4>; + + // LD(2|3|4) structured loads (register + register) + def LD2B : sve_mem_eld_ss<0b00, 0b01, ZZ_b, "ld2b", GPR64NoXZRshifted8>; + def LD3B : sve_mem_eld_ss<0b00, 0b10, ZZZ_b, "ld3b", GPR64NoXZRshifted8>; + def LD4B : sve_mem_eld_ss<0b00, 0b11, ZZZZ_b, "ld4b", GPR64NoXZRshifted8>; + def LD2H : sve_mem_eld_ss<0b01, 0b01, ZZ_h, "ld2h", GPR64NoXZRshifted16>; + def LD3H : sve_mem_eld_ss<0b01, 0b10, ZZZ_h, "ld3h", GPR64NoXZRshifted16>; + def LD4H : sve_mem_eld_ss<0b01, 0b11, ZZZZ_h, "ld4h", GPR64NoXZRshifted16>; + def LD2W : sve_mem_eld_ss<0b10, 0b01, ZZ_s, "ld2w", GPR64NoXZRshifted32>; + def LD3W : sve_mem_eld_ss<0b10, 0b10, ZZZ_s, "ld3w", GPR64NoXZRshifted32>; + def LD4W : sve_mem_eld_ss<0b10, 0b11, ZZZZ_s, "ld4w", GPR64NoXZRshifted32>; + def LD2D : sve_mem_eld_ss<0b11, 0b01, ZZ_d, "ld2d", GPR64NoXZRshifted64>; + def LD3D : sve_mem_eld_ss<0b11, 0b10, ZZZ_d, "ld3d", GPR64NoXZRshifted64>; + def LD4D : sve_mem_eld_ss<0b11, 0b11, ZZZZ_d, "ld4d", GPR64NoXZRshifted64>; + + // Gathers using unscaled 32-bit offsets, e.g. + // ld1h z0.s, p0/z, [x0, z0.s, uxtw] + defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; + defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; + defm GLD1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0010, "ld1b", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; + defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; + defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; + defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; + defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; + defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; + defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; + defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", ZPR32ExtSXTW8, ZPR32ExtUXTW8>; + + // Gathers using scaled 32-bit offsets, e.g. + // ld1h z0.s, p0/z, [x0, z0.s, uxtw #1] + defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>; + defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>; + defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", ZPR32ExtSXTW16, ZPR32ExtUXTW16>; + defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", ZPR32ExtSXTW16, ZPR32ExtUXTW16>; + defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", ZPR32ExtSXTW32, ZPR32ExtUXTW32>; + defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", ZPR32ExtSXTW32, ZPR32ExtUXTW32>; + + // Gathers using scaled 32-bit pointers with offset, e.g. + // ld1h z0.s, p0/z, [z0.s, #16] + defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31>; + defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31>; + defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31>; + defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31>; + defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2>; + defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2>; + defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2>; + defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2>; + defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4>; + defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4>; + + // Gathers using scaled 64-bit pointers with offset, e.g. + // ld1h z0.d, p0/z, [z0.d, #16] + defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31>; + defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31>; + defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31>; + defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31>; + defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2>; + defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2>; + defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2>; + defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2>; + defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4>; + defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4>; + defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4>; + defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4>; + defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8>; + defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8>; + + // Gathers using unscaled 64-bit offsets, e.g. + // ld1h z0.d, p0/z, [x0, z0.d] + defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb">; + defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb">; + defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b">; + defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b">; + defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh">; + defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh">; + defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h">; + defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h">; + defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw">; + defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw">; + defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w">; + defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w">; + defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d">; + defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d">; + + // Gathers using scaled 64-bit offsets, e.g. + // ld1h z0.d, p0/z, [x0, z0.d, lsl #1] + defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", ZPR64ExtLSL16>; + defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", ZPR64ExtLSL16>; + defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", ZPR64ExtLSL16>; + defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", ZPR64ExtLSL16>; + defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", ZPR64ExtLSL32>; + defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", ZPR64ExtLSL32>; + defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", ZPR64ExtLSL32>; + defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", ZPR64ExtLSL32>; + defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", ZPR64ExtLSL64>; + defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", ZPR64ExtLSL64>; + + // Gathers using unscaled 32-bit offsets unpacked in 64-bits elements, e.g. + // ld1h z0.d, p0/z, [x0, z0.d, uxtw] + defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; + defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; + defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; + defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; + defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLD1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0110, "ld1h", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLD1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1000, "ld1sw", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLD1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1010, "ld1w", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLD1D : sve_mem_64b_gld_vs_32_unscaled<0b1110, "ld1d", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + + // Gathers using scaled 32-bit offsets unpacked in 64-bits elements, e.g. + // ld1h z0.d, p0/z, [x0, z0.d, uxtw #1] + defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", ZPR64ExtSXTW16, ZPR64ExtUXTW16>; + defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh",ZPR64ExtSXTW16, ZPR64ExtUXTW16>; + defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", ZPR64ExtSXTW16, ZPR64ExtUXTW16>; + defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", ZPR64ExtSXTW16, ZPR64ExtUXTW16>; + defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", ZPR64ExtSXTW32, ZPR64ExtUXTW32>; + defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw",ZPR64ExtSXTW32, ZPR64ExtUXTW32>; + defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", ZPR64ExtSXTW32, ZPR64ExtUXTW32>; + defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", ZPR64ExtSXTW32, ZPR64ExtUXTW32>; + defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", ZPR64ExtSXTW64, ZPR64ExtUXTW64>; + defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", ZPR64ExtSXTW64, ZPR64ExtUXTW64>; + + // Non-temporal contiguous loads (register + immediate) + defm LDNT1B_ZRI : sve_mem_cldnt_si<0b00, "ldnt1b", Z_b, ZPR8>; + defm LDNT1H_ZRI : sve_mem_cldnt_si<0b01, "ldnt1h", Z_h, ZPR16>; + defm LDNT1W_ZRI : sve_mem_cldnt_si<0b10, "ldnt1w", Z_s, ZPR32>; + defm LDNT1D_ZRI : sve_mem_cldnt_si<0b11, "ldnt1d", Z_d, ZPR64>; + + // Non-temporal contiguous loads (register + register) + defm LDNT1B_ZRR : sve_mem_cldnt_ss<0b00, "ldnt1b", Z_b, ZPR8, GPR64NoXZRshifted8>; + defm LDNT1H_ZRR : sve_mem_cldnt_ss<0b01, "ldnt1h", Z_h, ZPR16, GPR64NoXZRshifted16>; + defm LDNT1W_ZRR : sve_mem_cldnt_ss<0b10, "ldnt1w", Z_s, ZPR32, GPR64NoXZRshifted32>; + defm LDNT1D_ZRR : sve_mem_cldnt_ss<0b11, "ldnt1d", Z_d, ZPR64, GPR64NoXZRshifted64>; + + // contiguous store with immediates + defm ST1B_IMM : sve_mem_cst_si<0b00, 0b00, "st1b", Z_b, ZPR8>; + defm ST1B_H_IMM : sve_mem_cst_si<0b00, 0b01, "st1b", Z_h, ZPR16>; + defm ST1B_S_IMM : sve_mem_cst_si<0b00, 0b10, "st1b", Z_s, ZPR32>; + defm ST1B_D_IMM : sve_mem_cst_si<0b00, 0b11, "st1b", Z_d, ZPR64>; + defm ST1H_IMM : sve_mem_cst_si<0b01, 0b01, "st1h", Z_h, ZPR16>; + defm ST1H_S_IMM : sve_mem_cst_si<0b01, 0b10, "st1h", Z_s, ZPR32>; + defm ST1H_D_IMM : sve_mem_cst_si<0b01, 0b11, "st1h", Z_d, ZPR64>; + defm ST1W_IMM : sve_mem_cst_si<0b10, 0b10, "st1w", Z_s, ZPR32>; + defm ST1W_D_IMM : sve_mem_cst_si<0b10, 0b11, "st1w", Z_d, ZPR64>; + defm ST1D_IMM : sve_mem_cst_si<0b11, 0b11, "st1d", Z_d, ZPR64>; + + // contiguous store with reg+reg addressing. + defm ST1B : sve_mem_cst_ss<0b0000, "st1b", Z_b, ZPR8, GPR64NoXZRshifted8>; + defm ST1B_H : sve_mem_cst_ss<0b0001, "st1b", Z_h, ZPR16, GPR64NoXZRshifted8>; + defm ST1B_S : sve_mem_cst_ss<0b0010, "st1b", Z_s, ZPR32, GPR64NoXZRshifted8>; + defm ST1B_D : sve_mem_cst_ss<0b0011, "st1b", Z_d, ZPR64, GPR64NoXZRshifted8>; + defm ST1H : sve_mem_cst_ss<0b0101, "st1h", Z_h, ZPR16, GPR64NoXZRshifted16>; + defm ST1H_S : sve_mem_cst_ss<0b0110, "st1h", Z_s, ZPR32, GPR64NoXZRshifted16>; + defm ST1H_D : sve_mem_cst_ss<0b0111, "st1h", Z_d, ZPR64, GPR64NoXZRshifted16>; + defm ST1W : sve_mem_cst_ss<0b1010, "st1w", Z_s, ZPR32, GPR64NoXZRshifted32>; + defm ST1W_D : sve_mem_cst_ss<0b1011, "st1w", Z_d, ZPR64, GPR64NoXZRshifted32>; + defm ST1D : sve_mem_cst_ss<0b1111, "st1d", Z_d, ZPR64, GPR64NoXZRshifted64>; + + // Scatters using unscaled 32-bit offsets, e.g. + // st1h z0.s, p0, [x0, z0.s, uxtw] + // and unpacked: + // st1h z0.d, p0, [x0, z0.d, uxtw] + defm SST1B_D : sve_mem_sst_sv_32_unscaled<0b000, "st1b", Z_d, ZPR64, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; + defm SST1B_S : sve_mem_sst_sv_32_unscaled<0b001, "st1b", Z_s, ZPR32, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; + defm SST1H_D : sve_mem_sst_sv_32_unscaled<0b010, "st1h", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm SST1H_S : sve_mem_sst_sv_32_unscaled<0b011, "st1h", Z_s, ZPR32, ZPR32ExtSXTW8, ZPR32ExtUXTW8>; + defm SST1W_D : sve_mem_sst_sv_32_unscaled<0b100, "st1w", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + defm SST1W : sve_mem_sst_sv_32_unscaled<0b101, "st1w", Z_s, ZPR32, ZPR32ExtSXTW8, ZPR32ExtUXTW8>; + defm SST1D : sve_mem_sst_sv_32_unscaled<0b110, "st1d", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>; + + // Scatters using scaled 32-bit offsets, e.g. + // st1h z0.s, p0, [x0, z0.s, uxtw #1] + // and unpacked: + // st1h z0.d, p0, [x0, z0.d, uxtw #1] + defm SST1H_D : sve_mem_sst_sv_32_scaled<0b010, "st1h", Z_d, ZPR64, ZPR64ExtSXTW16, ZPR64ExtUXTW16>; + defm SST1H_S : sve_mem_sst_sv_32_scaled<0b011, "st1h", Z_s, ZPR32, ZPR32ExtSXTW16, ZPR32ExtUXTW16>; + defm SST1W_D : sve_mem_sst_sv_32_scaled<0b100, "st1w", Z_d, ZPR64, ZPR64ExtSXTW32, ZPR64ExtUXTW32>; + defm SST1W : sve_mem_sst_sv_32_scaled<0b101, "st1w", Z_s, ZPR32, ZPR32ExtSXTW32, ZPR32ExtUXTW32>; + defm SST1D : sve_mem_sst_sv_32_scaled<0b110, "st1d", Z_d, ZPR64, ZPR64ExtSXTW64, ZPR64ExtUXTW64>; + + // Scatters using 32/64-bit pointers with offset, e.g. + // st1h z0.s, p0, [z0.s, #16] + // st1h z0.d, p0, [z0.d, #16] + defm SST1B_D : sve_mem_sst_vi_ptrs<0b000, "st1b", Z_d, ZPR64, imm0_31>; + defm SST1B_S : sve_mem_sst_vi_ptrs<0b001, "st1b", Z_s, ZPR32, imm0_31>; + defm SST1H_D : sve_mem_sst_vi_ptrs<0b010, "st1h", Z_d, ZPR64, uimm5s2>; + defm SST1H_S : sve_mem_sst_vi_ptrs<0b011, "st1h", Z_s, ZPR32, uimm5s2>; + defm SST1W_D : sve_mem_sst_vi_ptrs<0b100, "st1w", Z_d, ZPR64, uimm5s4>; + defm SST1W : sve_mem_sst_vi_ptrs<0b101, "st1w", Z_s, ZPR32, uimm5s4>; + defm SST1D : sve_mem_sst_vi_ptrs<0b110, "st1d", Z_d, ZPR64, uimm5s8>; + + // Scatters using unscaled 64-bit offsets, e.g. + // st1h z0.d, p0, [x0, z0.d] + defm SST1B_D : sve_mem_sst_sv_64_unscaled<0b00, "st1b">; + defm SST1H_D : sve_mem_sst_sv_64_unscaled<0b01, "st1h">; + defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w">; + defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d">; + + // Scatters using scaled 64-bit offsets, e.g. + // st1h z0.d, p0, [x0, z0.d, lsl #1] + defm SST1H_D_SCALED : sve_mem_sst_sv_64_scaled<0b01, "st1h", ZPR64ExtLSL16>; + defm SST1W_D_SCALED : sve_mem_sst_sv_64_scaled<0b10, "st1w", ZPR64ExtLSL32>; + defm SST1D_SCALED : sve_mem_sst_sv_64_scaled<0b11, "st1d", ZPR64ExtLSL64>; + + // ST(2|3|4) structured stores (register + immediate) + defm ST2B_IMM : sve_mem_est_si<0b00, 0b01, ZZ_b, "st2b", simm4s2>; + defm ST3B_IMM : sve_mem_est_si<0b00, 0b10, ZZZ_b, "st3b", simm4s3>; + defm ST4B_IMM : sve_mem_est_si<0b00, 0b11, ZZZZ_b, "st4b", simm4s4>; + defm ST2H_IMM : sve_mem_est_si<0b01, 0b01, ZZ_h, "st2h", simm4s2>; + defm ST3H_IMM : sve_mem_est_si<0b01, 0b10, ZZZ_h, "st3h", simm4s3>; + defm ST4H_IMM : sve_mem_est_si<0b01, 0b11, ZZZZ_h, "st4h", simm4s4>; + defm ST2W_IMM : sve_mem_est_si<0b10, 0b01, ZZ_s, "st2w", simm4s2>; + defm ST3W_IMM : sve_mem_est_si<0b10, 0b10, ZZZ_s, "st3w", simm4s3>; + defm ST4W_IMM : sve_mem_est_si<0b10, 0b11, ZZZZ_s, "st4w", simm4s4>; + defm ST2D_IMM : sve_mem_est_si<0b11, 0b01, ZZ_d, "st2d", simm4s2>; + defm ST3D_IMM : sve_mem_est_si<0b11, 0b10, ZZZ_d, "st3d", simm4s3>; + defm ST4D_IMM : sve_mem_est_si<0b11, 0b11, ZZZZ_d, "st4d", simm4s4>; + + // ST(2|3|4) structured stores (register + register) + def ST2B : sve_mem_est_ss<0b00, 0b01, ZZ_b, "st2b", GPR64NoXZRshifted8>; + def ST3B : sve_mem_est_ss<0b00, 0b10, ZZZ_b, "st3b", GPR64NoXZRshifted8>; + def ST4B : sve_mem_est_ss<0b00, 0b11, ZZZZ_b, "st4b", GPR64NoXZRshifted8>; + def ST2H : sve_mem_est_ss<0b01, 0b01, ZZ_h, "st2h", GPR64NoXZRshifted16>; + def ST3H : sve_mem_est_ss<0b01, 0b10, ZZZ_h, "st3h", GPR64NoXZRshifted16>; + def ST4H : sve_mem_est_ss<0b01, 0b11, ZZZZ_h, "st4h", GPR64NoXZRshifted16>; + def ST2W : sve_mem_est_ss<0b10, 0b01, ZZ_s, "st2w", GPR64NoXZRshifted32>; + def ST3W : sve_mem_est_ss<0b10, 0b10, ZZZ_s, "st3w", GPR64NoXZRshifted32>; + def ST4W : sve_mem_est_ss<0b10, 0b11, ZZZZ_s, "st4w", GPR64NoXZRshifted32>; + def ST2D : sve_mem_est_ss<0b11, 0b01, ZZ_d, "st2d", GPR64NoXZRshifted64>; + def ST3D : sve_mem_est_ss<0b11, 0b10, ZZZ_d, "st3d", GPR64NoXZRshifted64>; + def ST4D : sve_mem_est_ss<0b11, 0b11, ZZZZ_d, "st4d", GPR64NoXZRshifted64>; + + // Non-temporal contiguous stores (register + immediate) + defm STNT1B_ZRI : sve_mem_cstnt_si<0b00, "stnt1b", Z_b, ZPR8>; + defm STNT1H_ZRI : sve_mem_cstnt_si<0b01, "stnt1h", Z_h, ZPR16>; + defm STNT1W_ZRI : sve_mem_cstnt_si<0b10, "stnt1w", Z_s, ZPR32>; + defm STNT1D_ZRI : sve_mem_cstnt_si<0b11, "stnt1d", Z_d, ZPR64>; + + // Non-temporal contiguous stores (register + register) + defm STNT1B_ZRR : sve_mem_cstnt_ss<0b00, "stnt1b", Z_b, ZPR8, GPR64NoXZRshifted8>; + defm STNT1H_ZRR : sve_mem_cstnt_ss<0b01, "stnt1h", Z_h, ZPR16, GPR64NoXZRshifted16>; + defm STNT1W_ZRR : sve_mem_cstnt_ss<0b10, "stnt1w", Z_s, ZPR32, GPR64NoXZRshifted32>; + defm STNT1D_ZRR : sve_mem_cstnt_ss<0b11, "stnt1d", Z_d, ZPR64, GPR64NoXZRshifted64>; + + // Fill/Spill + defm LDR_ZXI : sve_mem_z_fill<"ldr">; + defm LDR_PXI : sve_mem_p_fill<"ldr">; + defm STR_ZXI : sve_mem_z_spill<"str">; + defm STR_PXI : sve_mem_p_spill<"str">; + + // Contiguous prefetch (register + immediate) + defm PRFB_PRI : sve_mem_prfm_si<0b00, "prfb">; + defm PRFH_PRI : sve_mem_prfm_si<0b01, "prfh">; + defm PRFW_PRI : sve_mem_prfm_si<0b10, "prfw">; + defm PRFD_PRI : sve_mem_prfm_si<0b11, "prfd">; + + // Contiguous prefetch (register + register) + def PRFB_PRR : sve_mem_prfm_ss<0b001, "prfb", GPR64NoXZRshifted8>; + def PRFH_PRR : sve_mem_prfm_ss<0b011, "prfh", GPR64NoXZRshifted16>; + def PRFS_PRR : sve_mem_prfm_ss<0b101, "prfw", GPR64NoXZRshifted32>; + def PRFD_PRR : sve_mem_prfm_ss<0b111, "prfd", GPR64NoXZRshifted64>; + + // Gather prefetch using scaled 32-bit offsets, e.g. + // prfh pldl1keep, p0, [x0, z0.s, uxtw #1] + defm PRFB_S : sve_mem_32b_prfm_sv_scaled<0b00, "prfb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>; + defm PRFH_S : sve_mem_32b_prfm_sv_scaled<0b01, "prfh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>; + defm PRFW_S : sve_mem_32b_prfm_sv_scaled<0b10, "prfw", ZPR32ExtSXTW32, ZPR32ExtUXTW32>; + defm PRFD_S : sve_mem_32b_prfm_sv_scaled<0b11, "prfd", ZPR32ExtSXTW64, ZPR32ExtUXTW64>; + + // Gather prefetch using unpacked, scaled 32-bit offsets, e.g. + // prfh pldl1keep, p0, [x0, z0.d, uxtw #1] + defm PRFB_D : sve_mem_64b_prfm_sv_ext_scaled<0b00, "prfb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>; + defm PRFH_D : sve_mem_64b_prfm_sv_ext_scaled<0b01, "prfh", ZPR64ExtSXTW16, ZPR64ExtUXTW16>; + defm PRFW_D : sve_mem_64b_prfm_sv_ext_scaled<0b10, "prfw", ZPR64ExtSXTW32, ZPR64ExtUXTW32>; + defm PRFD_D : sve_mem_64b_prfm_sv_ext_scaled<0b11, "prfd", ZPR64ExtSXTW64, ZPR64ExtUXTW64>; + + // Gather prefetch using scaled 64-bit offsets, e.g. + // prfh pldl1keep, p0, [x0, z0.d, lsl #1] + defm PRFB_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b00, "prfb", ZPR64ExtLSL8>; + defm PRFH_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b01, "prfh", ZPR64ExtLSL16>; + defm PRFW_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b10, "prfw", ZPR64ExtLSL32>; + defm PRFD_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b11, "prfd", ZPR64ExtLSL64>; + + // Gather prefetch using 32/64-bit pointers with offset, e.g. + // prfh pldl1keep, p0, [z0.s, #16] + // prfh pldl1keep, p0, [z0.d, #16] + defm PRFB_S_PZI : sve_mem_32b_prfm_vi<0b00, "prfb", imm0_31>; + defm PRFH_S_PZI : sve_mem_32b_prfm_vi<0b01, "prfh", uimm5s2>; + defm PRFW_S_PZI : sve_mem_32b_prfm_vi<0b10, "prfw", uimm5s4>; + defm PRFD_S_PZI : sve_mem_32b_prfm_vi<0b11, "prfd", uimm5s8>; + + defm PRFB_D_PZI : sve_mem_64b_prfm_vi<0b00, "prfb", imm0_31>; + defm PRFH_D_PZI : sve_mem_64b_prfm_vi<0b01, "prfh", uimm5s2>; + defm PRFW_D_PZI : sve_mem_64b_prfm_vi<0b10, "prfw", uimm5s4>; + defm PRFD_D_PZI : sve_mem_64b_prfm_vi<0b11, "prfd", uimm5s8>; + + defm ADR_SXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_sxtw<0b00, "adr">; + defm ADR_UXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_uxtw<0b01, "adr">; + defm ADR_LSL_ZZZ_S : sve_int_bin_cons_misc_0_a_32_lsl<0b10, "adr">; + defm ADR_LSL_ZZZ_D : sve_int_bin_cons_misc_0_a_64_lsl<0b11, "adr">; + + defm TBL_ZZZ : sve_int_perm_tbl<"tbl">; + + defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1">; + defm ZIP2_ZZZ : sve_int_perm_bin_perm_zz<0b001, "zip2">; + defm UZP1_ZZZ : sve_int_perm_bin_perm_zz<0b010, "uzp1">; + defm UZP2_ZZZ : sve_int_perm_bin_perm_zz<0b011, "uzp2">; + defm TRN1_ZZZ : sve_int_perm_bin_perm_zz<0b100, "trn1">; + defm TRN2_ZZZ : sve_int_perm_bin_perm_zz<0b101, "trn2">; + + defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1">; + defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2">; + defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1">; + defm UZP2_PPP : sve_int_perm_bin_perm_pp<0b011, "uzp2">; + defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1">; + defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2">; + + defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs">; + defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi">; + defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge">; + defm CMPGT_PPzZZ : sve_int_cmp_0<0b101, "cmpgt">; + defm CMPEQ_PPzZZ : sve_int_cmp_0<0b110, "cmpeq">; + defm CMPNE_PPzZZ : sve_int_cmp_0<0b111, "cmpne">; + + defm CMPEQ_WIDE_PPzZZ : sve_int_cmp_0_wide<0b010, "cmpeq">; + defm CMPNE_WIDE_PPzZZ : sve_int_cmp_0_wide<0b011, "cmpne">; + defm CMPGE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b000, "cmpge">; + defm CMPGT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b001, "cmpgt">; + defm CMPLT_WIDE_PPzZZ : sve_int_cmp_1_wide<0b010, "cmplt">; + defm CMPLE_WIDE_PPzZZ : sve_int_cmp_1_wide<0b011, "cmple">; + defm CMPHS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b100, "cmphs">; + defm CMPHI_WIDE_PPzZZ : sve_int_cmp_1_wide<0b101, "cmphi">; + defm CMPLO_WIDE_PPzZZ : sve_int_cmp_1_wide<0b110, "cmplo">; + defm CMPLS_WIDE_PPzZZ : sve_int_cmp_1_wide<0b111, "cmpls">; + + defm CMPGE_PPzZI : sve_int_scmp_vi<0b000, "cmpge">; + defm CMPGT_PPzZI : sve_int_scmp_vi<0b001, "cmpgt">; + defm CMPLT_PPzZI : sve_int_scmp_vi<0b010, "cmplt">; + defm CMPLE_PPzZI : sve_int_scmp_vi<0b011, "cmple">; + defm CMPEQ_PPzZI : sve_int_scmp_vi<0b100, "cmpeq">; + defm CMPNE_PPzZI : sve_int_scmp_vi<0b101, "cmpne">; + defm CMPHS_PPzZI : sve_int_ucmp_vi<0b00, "cmphs">; + defm CMPHI_PPzZI : sve_int_ucmp_vi<0b01, "cmphi">; + defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo">; + defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls">; + + defm FCMGE_PPzZZ : sve_fp_3op_p_pd<0b000, "fcmge">; + defm FCMGT_PPzZZ : sve_fp_3op_p_pd<0b001, "fcmgt">; + defm FCMEQ_PPzZZ : sve_fp_3op_p_pd<0b010, "fcmeq">; + defm FCMNE_PPzZZ : sve_fp_3op_p_pd<0b011, "fcmne">; + defm FCMUO_PPzZZ : sve_fp_3op_p_pd<0b100, "fcmuo">; + defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge">; + defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt">; + + defm FCMGE_PPzZ0 : sve_fp_2op_p_pd<0b000, "fcmge">; + defm FCMGT_PPzZ0 : sve_fp_2op_p_pd<0b001, "fcmgt">; + defm FCMLT_PPzZ0 : sve_fp_2op_p_pd<0b010, "fcmlt">; + defm FCMLE_PPzZ0 : sve_fp_2op_p_pd<0b011, "fcmle">; + defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq">; + defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne">; + + defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt">; + defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele">; + defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo">; + defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels">; + + defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt">; + defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele">; + defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo">; + defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels">; + + def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>; + def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>; + def CTERMEQ_XX : sve_int_cterm<0b1, 0b0, "ctermeq", GPR64>; + def CTERMNE_XX : sve_int_cterm<0b1, 0b1, "ctermne", GPR64>; + + def RDVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdvl">; + def ADDVL_XXI : sve_int_arith_vl<0b0, "addvl">; + def ADDPL_XXI : sve_int_arith_vl<0b1, "addpl">; + + defm CNTB_XPiI : sve_int_count<0b000, "cntb">; + defm CNTH_XPiI : sve_int_count<0b010, "cnth">; + defm CNTW_XPiI : sve_int_count<0b100, "cntw">; + defm CNTD_XPiI : sve_int_count<0b110, "cntd">; + defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp">; + + defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb">; + defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb">; + defm INCH_XPiI : sve_int_pred_pattern_a<0b010, "inch">; + defm DECH_XPiI : sve_int_pred_pattern_a<0b011, "dech">; + defm INCW_XPiI : sve_int_pred_pattern_a<0b100, "incw">; + defm DECW_XPiI : sve_int_pred_pattern_a<0b101, "decw">; + defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd">; + defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd">; + + defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb">; + defm UQINCB_WPiI : sve_int_pred_pattern_b_u32<0b00001, "uqincb">; + defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb">; + defm UQDECB_WPiI : sve_int_pred_pattern_b_u32<0b00011, "uqdecb">; + defm SQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00100, "sqincb">; + defm UQINCB_XPiI : sve_int_pred_pattern_b_x64<0b00101, "uqincb">; + defm SQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00110, "sqdecb">; + defm UQDECB_XPiI : sve_int_pred_pattern_b_x64<0b00111, "uqdecb">; + + defm SQINCH_XPiWdI : sve_int_pred_pattern_b_s32<0b01000, "sqinch">; + defm UQINCH_WPiI : sve_int_pred_pattern_b_u32<0b01001, "uqinch">; + defm SQDECH_XPiWdI : sve_int_pred_pattern_b_s32<0b01010, "sqdech">; + defm UQDECH_WPiI : sve_int_pred_pattern_b_u32<0b01011, "uqdech">; + defm SQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01100, "sqinch">; + defm UQINCH_XPiI : sve_int_pred_pattern_b_x64<0b01101, "uqinch">; + defm SQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01110, "sqdech">; + defm UQDECH_XPiI : sve_int_pred_pattern_b_x64<0b01111, "uqdech">; + + defm SQINCW_XPiWdI : sve_int_pred_pattern_b_s32<0b10000, "sqincw">; + defm UQINCW_WPiI : sve_int_pred_pattern_b_u32<0b10001, "uqincw">; + defm SQDECW_XPiWdI : sve_int_pred_pattern_b_s32<0b10010, "sqdecw">; + defm UQDECW_WPiI : sve_int_pred_pattern_b_u32<0b10011, "uqdecw">; + defm SQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10100, "sqincw">; + defm UQINCW_XPiI : sve_int_pred_pattern_b_x64<0b10101, "uqincw">; + defm SQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10110, "sqdecw">; + defm UQDECW_XPiI : sve_int_pred_pattern_b_x64<0b10111, "uqdecw">; + + defm SQINCD_XPiWdI : sve_int_pred_pattern_b_s32<0b11000, "sqincd">; + defm UQINCD_WPiI : sve_int_pred_pattern_b_u32<0b11001, "uqincd">; + defm SQDECD_XPiWdI : sve_int_pred_pattern_b_s32<0b11010, "sqdecd">; + defm UQDECD_WPiI : sve_int_pred_pattern_b_u32<0b11011, "uqdecd">; + defm SQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11100, "sqincd">; + defm UQINCD_XPiI : sve_int_pred_pattern_b_x64<0b11101, "uqincd">; + defm SQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11110, "sqdecd">; + defm UQDECD_XPiI : sve_int_pred_pattern_b_x64<0b11111, "uqdecd">; + + defm SQINCH_ZPiI : sve_int_countvlv<0b01000, "sqinch", ZPR16>; + defm UQINCH_ZPiI : sve_int_countvlv<0b01001, "uqinch", ZPR16>; + defm SQDECH_ZPiI : sve_int_countvlv<0b01010, "sqdech", ZPR16>; + defm UQDECH_ZPiI : sve_int_countvlv<0b01011, "uqdech", ZPR16>; + defm INCH_ZPiI : sve_int_countvlv<0b01100, "inch", ZPR16>; + defm DECH_ZPiI : sve_int_countvlv<0b01101, "dech", ZPR16>; + defm SQINCW_ZPiI : sve_int_countvlv<0b10000, "sqincw", ZPR32>; + defm UQINCW_ZPiI : sve_int_countvlv<0b10001, "uqincw", ZPR32>; + defm SQDECW_ZPiI : sve_int_countvlv<0b10010, "sqdecw", ZPR32>; + defm UQDECW_ZPiI : sve_int_countvlv<0b10011, "uqdecw", ZPR32>; + defm INCW_ZPiI : sve_int_countvlv<0b10100, "incw", ZPR32>; + defm DECW_ZPiI : sve_int_countvlv<0b10101, "decw", ZPR32>; + defm SQINCD_ZPiI : sve_int_countvlv<0b11000, "sqincd", ZPR64>; + defm UQINCD_ZPiI : sve_int_countvlv<0b11001, "uqincd", ZPR64>; + defm SQDECD_ZPiI : sve_int_countvlv<0b11010, "sqdecd", ZPR64>; + defm UQDECD_ZPiI : sve_int_countvlv<0b11011, "uqdecd", ZPR64>; + defm INCD_ZPiI : sve_int_countvlv<0b11100, "incd", ZPR64>; + defm DECD_ZPiI : sve_int_countvlv<0b11101, "decd", ZPR64>; + + defm SQINCP_XPWd : sve_int_count_r_s32<0b00000, "sqincp">; + defm SQINCP_XP : sve_int_count_r_x64<0b00010, "sqincp">; + defm UQINCP_WP : sve_int_count_r_u32<0b00100, "uqincp">; + defm UQINCP_XP : sve_int_count_r_x64<0b00110, "uqincp">; + defm SQDECP_XPWd : sve_int_count_r_s32<0b01000, "sqdecp">; + defm SQDECP_XP : sve_int_count_r_x64<0b01010, "sqdecp">; + defm UQDECP_WP : sve_int_count_r_u32<0b01100, "uqdecp">; + defm UQDECP_XP : sve_int_count_r_x64<0b01110, "uqdecp">; + defm INCP_XP : sve_int_count_r_x64<0b10000, "incp">; + defm DECP_XP : sve_int_count_r_x64<0b10100, "decp">; + + defm SQINCP_ZP : sve_int_count_v<0b00000, "sqincp">; + defm UQINCP_ZP : sve_int_count_v<0b00100, "uqincp">; + defm SQDECP_ZP : sve_int_count_v<0b01000, "sqdecp">; + defm UQDECP_ZP : sve_int_count_v<0b01100, "uqdecp">; + defm INCP_ZP : sve_int_count_v<0b10000, "incp">; + defm DECP_ZP : sve_int_count_v<0b10100, "decp">; + + defm INDEX_RR : sve_int_index_rr<"index">; + defm INDEX_IR : sve_int_index_ir<"index">; + defm INDEX_RI : sve_int_index_ri<"index">; + defm INDEX_II : sve_int_index_ii<"index">; + + // Unpredicated shifts + defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr">; + defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr">; + defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl">; + + defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr">; + defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr">; + defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl">; + + // Predicated shifts + defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b000, "asr">; + defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b001, "lsr">; + defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b011, "lsl">; + defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b100, "asrd">; + + defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr">; + defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr">; + defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl">; + defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr">; + defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr">; + defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr">; + + defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr">; + defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">; + defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">; + + def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, ElementSizeS>; + def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, ElementSizeS>; + def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, ElementSizeH>; + def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, ElementSizeS>; + def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, ElementSizeS>; + def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, ElementSizeH>; + def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, ElementSizeH>; + def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, ElementSizeS>; + def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, ElementSizeH>; + def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, ElementSizeS>; + def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, ElementSizeD>; + def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, ElementSizeD>; + def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, ElementSizeD>; + def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, ElementSizeD>; + def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, ElementSizeD>; + def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, ElementSizeD>; + def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, ElementSizeS>; + def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, ElementSizeD>; + def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, ElementSizeS>; + def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, ElementSizeD>; + def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, ElementSizeD>; + def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, ElementSizeD>; + def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, ElementSizeD>; + def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, ElementSizeD>; + def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, ElementSizeD>; + def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, ElementSizeD>; + def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, ElementSizeD>; + def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, ElementSizeS>; + def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, ElementSizeD>; + def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, ElementSizeS>; + def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, ElementSizeD>; + def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, ElementSizeD>; + def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, ElementSizeD>; + def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, ElementSizeD>; + + defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn">; + defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp">; + defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm">; + defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz">; + defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta">; + defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx">; + defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti">; + defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx">; + defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt">; + + // InstAliases + def : InstAlias<"mov $Zd, $Zn", + (ORR_ZZZ ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zn), 1>; + def : InstAlias<"mov $Pd, $Pg/m, $Pn", + (SEL_PPPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pd), 1>; + def : InstAlias<"mov $Pd, $Pn", + (ORR_PPzPP PPR8:$Pd, PPR8:$Pn, PPR8:$Pn, PPR8:$Pn), 1>; + def : InstAlias<"mov $Pd, $Pg/z, $Pn", + (AND_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pn), 1>; + + def : InstAlias<"movs $Pd, $Pn", + (ORRS_PPzPP PPR8:$Pd, PPR8:$Pn, PPR8:$Pn, PPR8:$Pn), 1>; + def : InstAlias<"movs $Pd, $Pg/z, $Pn", + (ANDS_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPR8:$Pn), 1>; + + def : InstAlias<"not $Pd, $Pg/z, $Pn", + (EOR_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPRAny:$Pg), 1>; + + def : InstAlias<"nots $Pd, $Pg/z, $Pn", + (EORS_PPzPP PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, PPRAny:$Pg), 1>; + + def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn", + (CMPGE_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>; + def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn", + (CMPGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn", + (CMPGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias<"cmple $Zd, $Pg/z, $Zm, $Zn", + (CMPGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + + def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn", + (CMPHI_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>; + def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn", + (CMPHI_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn", + (CMPHI_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias<"cmplo $Zd, $Pg/z, $Zm, $Zn", + (CMPHI_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + + def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn", + (CMPHS_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>; + def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn", + (CMPHS_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn", + (CMPHS_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias<"cmpls $Zd, $Pg/z, $Zm, $Zn", + (CMPHS_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + + def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn", + (CMPGT_PPzZZ_B PPR8:$Zd, PPR3bAny:$Pg, ZPR8:$Zn, ZPR8:$Zm), 0>; + def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn", + (CMPGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn", + (CMPGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias<"cmplt $Zd, $Pg/z, $Zm, $Zn", + (CMPGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + + def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn", + (FACGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn", + (FACGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias<"facle $Zd, $Pg/z, $Zm, $Zn", + (FACGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + + def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn", + (FACGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn", + (FACGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias<"faclt $Zd, $Pg/z, $Zm, $Zn", + (FACGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + + def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn", + (FCMGE_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn", + (FCMGE_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias<"fcmle $Zd, $Pg/z, $Zm, $Zn", + (FCMGE_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + + def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn", + (FCMGT_PPzZZ_H PPR16:$Zd, PPR3bAny:$Pg, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn", + (FCMGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn", + (FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; +} diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA53.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA53.td new file mode 100644 index 000000000..f253a4f3e --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA53.td @@ -0,0 +1,295 @@ +//==- AArch64SchedA53.td - Cortex-A53 Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the ARM Cortex A53 processors. +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// The following definitions describe the simpler per-operand machine model. +// This works with MachineScheduler. See MCSchedule.h for details. + +// Cortex-A53 machine model for scheduling and other instruction cost heuristics. +def CortexA53Model : SchedMachineModel { + let MicroOpBufferSize = 0; // Explicitly set to zero since A53 is in-order. + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let LoadLatency = 3; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 9; // Based on "Cortex-A53 Software Optimisation + // Specification - Instruction Timings" + // v 1.0 Spreadsheet + let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = [HasSVE]; +} + + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +// Modeling each pipeline as a ProcResource using the BufferSize = 0 since +// Cortex-A53 is in-order. + +def A53UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU +def A53UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC +def A53UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division +def A53UnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store +def A53UnitB : ProcResource<1> { let BufferSize = 0; } // Branch +def A53UnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU +def A53UnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mult/Div/Sqrt + + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedWrite types which both map the ProcResources and +// set the latency. + +let SchedModel = CortexA53Model in { + +// ALU - Despite having a full latency of 4, most of the ALU instructions can +// forward a cycle earlier and then two cycles earlier in the case of a +// shift-only instruction. These latencies will be incorrect when the +// result cannot be forwarded, but modeling isn't rocket surgery. +def : WriteRes<WriteImm, [A53UnitALU]> { let Latency = 3; } +def : WriteRes<WriteI, [A53UnitALU]> { let Latency = 3; } +def : WriteRes<WriteISReg, [A53UnitALU]> { let Latency = 3; } +def : WriteRes<WriteIEReg, [A53UnitALU]> { let Latency = 3; } +def : WriteRes<WriteIS, [A53UnitALU]> { let Latency = 2; } +def : WriteRes<WriteExtr, [A53UnitALU]> { let Latency = 3; } + +// MAC +def : WriteRes<WriteIM32, [A53UnitMAC]> { let Latency = 4; } +def : WriteRes<WriteIM64, [A53UnitMAC]> { let Latency = 4; } + +// Div +def : WriteRes<WriteID32, [A53UnitDiv]> { let Latency = 4; } +def : WriteRes<WriteID64, [A53UnitDiv]> { let Latency = 4; } + +// Load +def : WriteRes<WriteLD, [A53UnitLdSt]> { let Latency = 4; } +def : WriteRes<WriteLDIdx, [A53UnitLdSt]> { let Latency = 4; } +def : WriteRes<WriteLDHi, [A53UnitLdSt]> { let Latency = 4; } + +// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd +// below, choosing the median of 3 which makes the latency 6. +// May model this more carefully in the future. The remaining +// A53WriteVLD# types represent the 1-5 cycle issues explicitly. +def : WriteRes<WriteVLD, [A53UnitLdSt]> { let Latency = 6; + let ResourceCycles = [3]; } +def A53WriteVLD1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; } +def A53WriteVLD2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5; + let ResourceCycles = [2]; } +def A53WriteVLD3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6; + let ResourceCycles = [3]; } +def A53WriteVLD4 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 7; + let ResourceCycles = [4]; } +def A53WriteVLD5 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 8; + let ResourceCycles = [5]; } + +// Pre/Post Indexing - Performed as part of address generation which is already +// accounted for in the WriteST* latencies below +def : WriteRes<WriteAdr, []> { let Latency = 0; } + +// Store +def : WriteRes<WriteST, [A53UnitLdSt]> { let Latency = 4; } +def : WriteRes<WriteSTP, [A53UnitLdSt]> { let Latency = 4; } +def : WriteRes<WriteSTIdx, [A53UnitLdSt]> { let Latency = 4; } +def : WriteRes<WriteSTX, [A53UnitLdSt]> { let Latency = 4; } + +// Vector Store - Similar to vector loads, can take 1-3 cycles to issue. +def : WriteRes<WriteVST, [A53UnitLdSt]> { let Latency = 5; + let ResourceCycles = [2];} +def A53WriteVST1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; } +def A53WriteVST2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5; + let ResourceCycles = [2]; } +def A53WriteVST3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6; + let ResourceCycles = [3]; } + +def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } + +// Branch +def : WriteRes<WriteBr, [A53UnitB]>; +def : WriteRes<WriteBrReg, [A53UnitB]>; +def : WriteRes<WriteSys, [A53UnitB]>; +def : WriteRes<WriteBarrier, [A53UnitB]>; +def : WriteRes<WriteHint, [A53UnitB]>; + +// FP ALU +def : WriteRes<WriteF, [A53UnitFPALU]> { let Latency = 6; } +def : WriteRes<WriteFCmp, [A53UnitFPALU]> { let Latency = 6; } +def : WriteRes<WriteFCvt, [A53UnitFPALU]> { let Latency = 6; } +def : WriteRes<WriteFCopy, [A53UnitFPALU]> { let Latency = 6; } +def : WriteRes<WriteFImm, [A53UnitFPALU]> { let Latency = 6; } +def : WriteRes<WriteV, [A53UnitFPALU]> { let Latency = 6; } + +// FP Mul, Div, Sqrt +def : WriteRes<WriteFMul, [A53UnitFPMDS]> { let Latency = 6; } +def : WriteRes<WriteFDiv, [A53UnitFPMDS]> { let Latency = 33; + let ResourceCycles = [29]; } +def A53WriteFMAC : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 10; } +def A53WriteFDivSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 18; + let ResourceCycles = [14]; } +def A53WriteFDivDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 33; + let ResourceCycles = [29]; } +def A53WriteFSqrtSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 17; + let ResourceCycles = [13]; } +def A53WriteFSqrtDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 32; + let ResourceCycles = [28]; } + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedRead types. + +// No forwarding for these reads. +def : ReadAdvance<ReadExtrHi, 0>; +def : ReadAdvance<ReadAdrBase, 0>; +def : ReadAdvance<ReadVLD, 0>; + +// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable +// operands are needed one cycle later if and only if they are to be +// shifted. Otherwise, they too are needed two cycles later. This same +// ReadAdvance applies to Extended registers as well, even though there is +// a separate SchedPredicate for them. +def : ReadAdvance<ReadI, 2, [WriteImm,WriteI, + WriteISReg, WriteIEReg,WriteIS, + WriteID32,WriteID64, + WriteIM32,WriteIM64]>; +def A53ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI, + WriteISReg, WriteIEReg,WriteIS, + WriteID32,WriteID64, + WriteIM32,WriteIM64]>; +def A53ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI, + WriteISReg, WriteIEReg,WriteIS, + WriteID32,WriteID64, + WriteIM32,WriteIM64]>; +def A53ReadISReg : SchedReadVariant<[ + SchedVar<RegShiftedPred, [A53ReadShifted]>, + SchedVar<NoSchedPred, [A53ReadNotShifted]>]>; +def : SchedAlias<ReadISReg, A53ReadISReg>; + +def A53ReadIEReg : SchedReadVariant<[ + SchedVar<RegExtendedPred, [A53ReadShifted]>, + SchedVar<NoSchedPred, [A53ReadNotShifted]>]>; +def : SchedAlias<ReadIEReg, A53ReadIEReg>; + +// MAC - Operands are generally needed one cycle later in the MAC pipe. +// Accumulator operands are needed two cycles later. +def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI, + WriteISReg, WriteIEReg,WriteIS, + WriteID32,WriteID64, + WriteIM32,WriteIM64]>; +def : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI, + WriteISReg, WriteIEReg,WriteIS, + WriteID32,WriteID64, + WriteIM32,WriteIM64]>; + +// Div +def : ReadAdvance<ReadID, 1, [WriteImm,WriteI, + WriteISReg, WriteIEReg,WriteIS, + WriteID32,WriteID64, + WriteIM32,WriteIM64]>; + +//===----------------------------------------------------------------------===// +// Subtarget-specific InstRWs. + +//--- +// Miscellaneous +//--- +def : InstRW<[WriteI], (instrs COPY)>; + +//--- +// Vector Loads +//--- +def : InstRW<[A53WriteVLD1], (instregex "LD1i(8|16|32|64)$")>; +def : InstRW<[A53WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A53WriteVLD1], (instregex "LD2i(8|16|32|64)$")>; +def : InstRW<[A53WriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[A53WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A53WriteVLD2], (instregex "LD3i(8|16|32|64)$")>; +def : InstRW<[A53WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[A53WriteVLD3], (instregex "LD3Threev2d$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD3Threev2d_POST$")>; + +def : InstRW<[A53WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; +def : InstRW<[A53WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[A53WriteVLD4], (instregex "LD4Fourv(2d)$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; + +//--- +// Vector Stores +//--- +def : InstRW<[A53WriteVST1], (instregex "ST1i(8|16|32|64)$")>; +def : InstRW<[A53WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A53WriteVST1], (instregex "ST2i(8|16|32|64)$")>; +def : InstRW<[A53WriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A53WriteVST2], (instregex "ST3i(8|16|32|64)$")>; +def : InstRW<[A53WriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST3Threev(2d)$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; + +def : InstRW<[A53WriteVST2], (instregex "ST4i(8|16|32|64)$")>; +def : InstRW<[A53WriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST4Fourv(2d)$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; + +//--- +// Floating Point MAC, DIV, SQRT +//--- +def : InstRW<[A53WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>; +def : InstRW<[A53WriteFMAC], (instregex "^FML(A|S).*")>; +def : InstRW<[A53WriteFDivSP], (instrs FDIVSrr)>; +def : InstRW<[A53WriteFDivDP], (instrs FDIVDrr)>; +def : InstRW<[A53WriteFDivSP], (instregex "^FDIVv.*32$")>; +def : InstRW<[A53WriteFDivDP], (instregex "^FDIVv.*64$")>; +def : InstRW<[A53WriteFSqrtSP], (instregex "^.*SQRT.*32$")>; +def : InstRW<[A53WriteFSqrtDP], (instregex "^.*SQRT.*64$")>; + +} diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57.td new file mode 100644 index 000000000..ade03f23f --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57.td @@ -0,0 +1,668 @@ +//=- AArch64SchedA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for ARM Cortex-A57 to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// The Cortex-A57 is a traditional superscalar microprocessor with a +// conservative 3-wide in-order stage for decode and dispatch. Combined with the +// much wider out-of-order issue stage, this produced a need to carefully +// schedule micro-ops so that all three decoded each cycle are successfully +// issued as the reservation station(s) simply don't stay occupied for long. +// Therefore, IssueWidth is set to the narrower of the two at three, while still +// modeling the machine as out-of-order. + +def CortexA57Model : SchedMachineModel { + let IssueWidth = 3; // 3-way decode and dispatch + let MicroOpBufferSize = 128; // 128 micro-op re-order buffer + let LoadLatency = 4; // Optimistic load latency + let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch + + // Enable partial & runtime unrolling. The magic number is chosen based on + // experiments and benchmarking data. + let LoopMicroOpBufferSize = 16; + let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = [HasSVE]; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Cortex-A57. +// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where +// micro-ops wait for their operands and then issue out-of-order. + +def A57UnitB : ProcResource<1>; // Type B micro-ops +def A57UnitI : ProcResource<2>; // Type I micro-ops +def A57UnitM : ProcResource<1>; // Type M micro-ops +def A57UnitL : ProcResource<1>; // Type L micro-ops +def A57UnitS : ProcResource<1>; // Type S micro-ops +def A57UnitX : ProcResource<1>; // Type X micro-ops +def A57UnitW : ProcResource<1>; // Type W micro-ops +let SchedModel = CortexA57Model in { + def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops +} + +let SchedModel = CortexA57Model in { + +//===----------------------------------------------------------------------===// +// Define customized scheduler read/write types specific to the Cortex-A57. + +include "AArch64SchedA57WriteRes.td" + +//===----------------------------------------------------------------------===// +// Map the target-defined scheduler read/write resources and latency for +// Cortex-A57. The Cortex-A57 types are directly associated with resources, so +// defining the aliases precludes the need for mapping them using WriteRes. The +// aliases are sufficient for creating a coarse, working model. As the model +// evolves, InstRWs will be used to override some of these SchedAliases. +// +// WARNING: Using SchedAliases is convenient and works well for latency and +// resource lookup for instructions. However, this creates an entry in +// AArch64WriteLatencyTable with a WriteResourceID of 0, breaking +// any SchedReadAdvance since the lookup will fail. + +def : SchedAlias<WriteImm, A57Write_1cyc_1I>; +def : SchedAlias<WriteI, A57Write_1cyc_1I>; +def : SchedAlias<WriteISReg, A57Write_2cyc_1M>; +def : SchedAlias<WriteIEReg, A57Write_2cyc_1M>; +def : SchedAlias<WriteExtr, A57Write_1cyc_1I>; +def : SchedAlias<WriteIS, A57Write_1cyc_1I>; +def : SchedAlias<WriteID32, A57Write_19cyc_1M>; +def : SchedAlias<WriteID64, A57Write_35cyc_1M>; +def : WriteRes<WriteIM32, [A57UnitM]> { let Latency = 3; } +def : WriteRes<WriteIM64, [A57UnitM]> { let Latency = 5; } +def : SchedAlias<WriteBr, A57Write_1cyc_1B>; +def : SchedAlias<WriteBrReg, A57Write_1cyc_1B>; +def : SchedAlias<WriteLD, A57Write_4cyc_1L>; +def : SchedAlias<WriteST, A57Write_1cyc_1S>; +def : SchedAlias<WriteSTP, A57Write_1cyc_1S>; +def : SchedAlias<WriteAdr, A57Write_1cyc_1I>; +def : SchedAlias<WriteLDIdx, A57Write_4cyc_1I_1L>; +def : SchedAlias<WriteSTIdx, A57Write_1cyc_1I_1S>; +def : SchedAlias<WriteF, A57Write_3cyc_1V>; +def : SchedAlias<WriteFCmp, A57Write_3cyc_1V>; +def : SchedAlias<WriteFCvt, A57Write_5cyc_1V>; +def : SchedAlias<WriteFCopy, A57Write_5cyc_1L>; +def : SchedAlias<WriteFImm, A57Write_3cyc_1V>; +def : SchedAlias<WriteFMul, A57Write_5cyc_1V>; +def : SchedAlias<WriteFDiv, A57Write_17cyc_1W>; +def : SchedAlias<WriteV, A57Write_3cyc_1V>; +def : SchedAlias<WriteVLD, A57Write_5cyc_1L>; +def : SchedAlias<WriteVST, A57Write_1cyc_1S>; + +def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } + +def : WriteRes<WriteSys, []> { let Latency = 1; } +def : WriteRes<WriteBarrier, []> { let Latency = 1; } +def : WriteRes<WriteHint, []> { let Latency = 1; } + +def : WriteRes<WriteLDHi, []> { let Latency = 4; } + +// Forwarding logic is only modeled for multiply and accumulate +def : ReadAdvance<ReadI, 0>; +def : ReadAdvance<ReadISReg, 0>; +def : ReadAdvance<ReadIEReg, 0>; +def : ReadAdvance<ReadIM, 0>; +def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>; +def : ReadAdvance<ReadID, 0>; +def : ReadAdvance<ReadExtrHi, 0>; +def : ReadAdvance<ReadAdrBase, 0>; +def : ReadAdvance<ReadVLD, 0>; + + +//===----------------------------------------------------------------------===// +// Specialize the coarse model by associating instruction groups with the +// subtarget-defined types. As the modeled is refined, this will override most +// of the above ShchedAlias mappings. + +// Miscellaneous +// ----------------------------------------------------------------------------- + +def : InstRW<[WriteI], (instrs COPY)>; + + +// Branch Instructions +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_1cyc_1B_1I], (instrs BL)>; +def : InstRW<[A57Write_2cyc_1B_1I], (instrs BLR)>; + + +// Shifted Register with Shift == 0 +// ---------------------------------------------------------------------------- + +def A57WriteISReg : SchedWriteVariant<[ + SchedVar<RegShiftedPred, [WriteISReg]>, + SchedVar<NoSchedPred, [WriteI]>]>; +def : InstRW<[A57WriteISReg], (instregex ".*rs$")>; + + +// Divide and Multiply Instructions +// ----------------------------------------------------------------------------- + +// Multiply high +def : InstRW<[A57Write_6cyc_1M], (instrs SMULHrr, UMULHrr)>; + + +// Miscellaneous Data-Processing Instructions +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_1cyc_1I], (instrs EXTRWrri)>; +def : InstRW<[A57Write_3cyc_1I_1M], (instrs EXTRXrri)>; +def : InstRW<[A57Write_2cyc_1M], (instregex "BFM")>; + + +// Cryptography Extensions +// ----------------------------------------------------------------------------- + +def A57ReadAES : SchedReadAdvance<3, [A57Write_3cyc_1W]>; +def : InstRW<[A57Write_3cyc_1W], (instregex "^AES[DE]")>; +def : InstRW<[A57Write_3cyc_1W, A57ReadAES], (instregex "^AESI?MC")>; +def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>; +def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>; +def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>; +def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>; +def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>; +def : InstRW<[A57Write_3cyc_1W], (instregex "^CRC32")>; + + +// Vector Load +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1i(8|16|32)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1i(64)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1i(64)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Rv(1d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Rv(1d)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_7cyc_3L], (instregex "LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_7cyc_3L, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_8cyc_4L], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2i(8|16)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD2i(32)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2i(32)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2i(64)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2i(64)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Rv(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD2Rv(1d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD2Rv(1d)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s)$")>; +def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD2Twov(2d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2Twov(2d)_POST$")>; + +def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3i(8|16)$")>; +def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3i(8|16)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3i(32)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3i(32)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD3i(64)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3i(64)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3Rv(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD3Rv(1d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3Rv(1d)_POST$")>; +def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3Rv(16b|8h|4s)$")>; +def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD3Rv(2d)$")>; +def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD3Rv(2d)_POST$")>; + +def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD3Threev(8b|4h|2s)$")>; +def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_10cyc_3L_4V], (instregex "LD3Threev(16b|8h|4s)$")>; +def : InstRW<[A57Write_10cyc_3L_4V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_8cyc_4L], (instregex "LD3Threev(2d)$")>; +def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; + +def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(8|16)$")>; +def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(8|16)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4i(32)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4i(32)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(64)$")>; +def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(64)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4Rv(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD4Rv(1d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD4Rv(1d)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4Rv(16b|8h|4s)$")>; +def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_4V], (instregex "LD4Rv(2d)$")>; +def : InstRW<[A57Write_9cyc_2L_4V, WriteAdr], (instregex "LD4Rv(2d)_POST$")>; + +def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_11cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s)$")>; +def : InstRW<[A57Write_11cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_8cyc_4L], (instregex "LD4Fourv(2d)$")>; +def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; + +// Vector Store +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_1cyc_1S], (instregex "ST1i(8|16|32)$")>; +def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1i(8|16|32)_POST$")>; +def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST1i(64)$")>; +def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST1i(64)_POST$")>; + +def : InstRW<[A57Write_1cyc_1S], (instregex "ST1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_3cyc_3S], (instregex "ST1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_6cyc_6S], (instregex "ST1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_8cyc_8S], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST2i(8|16|32)$")>; +def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST2i(8|16|32)_POST$")>; +def : InstRW<[A57Write_2cyc_2S], (instregex "ST2i(64)$")>; +def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST2i(64)_POST$")>; + +def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST2Twov(16b|8h|4s)$")>; +def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_4cyc_4S], (instregex "ST2Twov(2d)$")>; +def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST2Twov(2d)_POST$")>; + +def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST3i(8|16)$")>; +def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST3i(8|16)_POST$")>; +def : InstRW<[A57Write_3cyc_3S], (instregex "ST3i(32)$")>; +def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST3i(32)_POST$")>; +def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST3i(64)$")>; +def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST3i(64)_POST$")>; + +def : InstRW<[A57Write_3cyc_3S_2V], (instregex "ST3Threev(8b|4h|2s)$")>; +def : InstRW<[A57Write_3cyc_3S_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_6cyc_6S_4V], (instregex "ST3Threev(16b|8h|4s)$")>; +def : InstRW<[A57Write_6cyc_6S_4V, WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_6cyc_6S], (instregex "ST3Threev(2d)$")>; +def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; + +def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST4i(8|16)$")>; +def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST4i(8|16)_POST$")>; +def : InstRW<[A57Write_4cyc_4S], (instregex "ST4i(32)$")>; +def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST4i(32)_POST$")>; +def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST4i(64)$")>; +def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST4i(64)_POST$")>; + +def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST4Fourv(8b|4h|2s)$")>; +def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_8cyc_8S_4V], (instregex "ST4Fourv(16b|8h|4s)$")>; +def : InstRW<[A57Write_8cyc_8S_4V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_8cyc_8S], (instregex "ST4Fourv(2d)$")>; +def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; + +// Vector - Integer +// ----------------------------------------------------------------------------- + +// Reference for forms in this group +// D form - v8i8, v4i16, v2i32 +// Q form - v16i8, v8i16, v4i32 +// D form - v1i8, v1i16, v1i32, v1i64 +// Q form - v16i8, v8i16, v4i32, v2i64 +// D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64 +// Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64 + +// ASIMD absolute diff accum, D-form +def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; +// ASIMD absolute diff accum, Q-form +def : InstRW<[A57Write_5cyc_2X], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; +// ASIMD absolute diff accum long +def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABAL")>; + +// ASIMD arith, reduce, 4H/4S +def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; +// ASIMD arith, reduce, 8B/8H +def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; +// ASIMD arith, reduce, 16B +def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU]?ADDL?Vv16i8v$")>; + +// ASIMD max/min, reduce, 4H/4S +def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; +// ASIMD max/min, reduce, 8B/8H +def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; +// ASIMD max/min, reduce, 16B +def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU](MIN|MAX)Vv16i8v$")>; + +// ASIMD multiply, D-form +def : InstRW<[A57Write_5cyc_1W], (instregex "^(P?MUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>; +// ASIMD multiply, Q-form +def : InstRW<[A57Write_6cyc_2W], (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; + +// ASIMD multiply accumulate, D-form +def : InstRW<[A57Write_5cyc_1W], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; +// ASIMD multiply accumulate, Q-form +def : InstRW<[A57Write_6cyc_2W], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; + +// ASIMD multiply accumulate long +// ASIMD multiply accumulate saturating long +def A57WriteIVMA : SchedWriteRes<[A57UnitW]> { let Latency = 5; } +def A57ReadIVMA4 : SchedReadAdvance<4, [A57WriteIVMA]>; +def : InstRW<[A57WriteIVMA, A57ReadIVMA4], (instregex "^(S|U|SQD)ML[AS]L")>; + +// ASIMD multiply long +def : InstRW<[A57Write_5cyc_1W], (instregex "^(S|U|SQD)MULL")>; +def : InstRW<[A57Write_5cyc_1W], (instregex "^PMULL(v8i8|v16i8)")>; +def : InstRW<[A57Write_3cyc_1W], (instregex "^PMULL(v1i64|v2i64)")>; + +// ASIMD pairwise add and accumulate +// ASIMD shift accumulate +def A57WriteIVA : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadIVA3 : SchedReadAdvance<3, [A57WriteIVA]>; +def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^[SU]ADALP")>; +def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>; + +// ASIMD shift by immed, complex +def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?(Q|R){1,2}SHR")>; +def : InstRW<[A57Write_4cyc_1X], (instregex "^SQSHLU")>; + + +// ASIMD shift by register, basic, Q-form +def : InstRW<[A57Write_4cyc_2X], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; + +// ASIMD shift by register, complex, D-form +def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU][QR]{1,2}SHL(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; + +// ASIMD shift by register, complex, Q-form +def : InstRW<[A57Write_5cyc_2X], (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; + + +// Vector - Floating Point +// ----------------------------------------------------------------------------- + +// Reference for forms in this group +// D form - v2f32 +// Q form - v4f32, v2f64 +// D form - 32, 64 +// D form - v1i32, v1i64 +// D form - v2i32 +// Q form - v4i32, v2i64 + +// ASIMD FP arith, normal, D-form +def : InstRW<[A57Write_5cyc_1V], (instregex "^(FABD|FADD|FSUB)(v2f32|32|64|v2i32p)")>; +// ASIMD FP arith, normal, Q-form +def : InstRW<[A57Write_5cyc_2V], (instregex "^(FABD|FADD|FSUB)(v4f32|v2f64|v2i64p)")>; + +// ASIMD FP arith, pairwise, D-form +def : InstRW<[A57Write_5cyc_1V], (instregex "^FADDP(v2f32|32|64|v2i32)")>; +// ASIMD FP arith, pairwise, Q-form +def : InstRW<[A57Write_9cyc_3V], (instregex "^FADDP(v4f32|v2f64|v2i64)")>; + +// ASIMD FP compare, D-form +def : InstRW<[A57Write_5cyc_1V], (instregex "^(FACGE|FACGT|FCMEQ|FCMGE|FCMGT|FCMLE|FCMLT)(v2f32|32|64|v1i32|v2i32|v1i64)")>; +// ASIMD FP compare, Q-form +def : InstRW<[A57Write_5cyc_2V], (instregex "^(FACGE|FACGT|FCMEQ|FCMGE|FCMGT|FCMLE|FCMLT)(v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP convert, long and narrow +def : InstRW<[A57Write_8cyc_3V], (instregex "^FCVT(L|N|XN)v")>; +// ASIMD FP convert, other, D-form +def : InstRW<[A57Write_5cyc_1V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; +// ASIMD FP convert, other, Q-form +def : InstRW<[A57Write_5cyc_2V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP divide, D-form, F32 +def : InstRW<[A57Write_17cyc_1W], (instregex "FDIVv2f32")>; +// ASIMD FP divide, Q-form, F32 +def : InstRW<[A57Write_34cyc_2W], (instregex "FDIVv4f32")>; +// ASIMD FP divide, Q-form, F64 +def : InstRW<[A57Write_64cyc_2W], (instregex "FDIVv2f64")>; + +// Note: These were simply duplicated from ASIMD FDIV because of missing documentation +// ASIMD FP square root, D-form, F32 +def : InstRW<[A57Write_17cyc_1W], (instregex "FSQRTv2f32")>; +// ASIMD FP square root, Q-form, F32 +def : InstRW<[A57Write_34cyc_2W], (instregex "FSQRTv4f32")>; +// ASIMD FP square root, Q-form, F64 +def : InstRW<[A57Write_64cyc_2W], (instregex "FSQRTv2f64")>; + +// ASIMD FP max/min, normal, D-form +def : InstRW<[A57Write_5cyc_1V], (instregex "^(FMAX|FMIN)(NM)?(v2f32)")>; +// ASIMD FP max/min, normal, Q-form +def : InstRW<[A57Write_5cyc_2V], (instregex "^(FMAX|FMIN)(NM)?(v4f32|v2f64)")>; +// ASIMD FP max/min, pairwise, D-form +def : InstRW<[A57Write_5cyc_1V], (instregex "^(FMAX|FMIN)(NM)?P(v2f32|v2i32)")>; +// ASIMD FP max/min, pairwise, Q-form +def : InstRW<[A57Write_9cyc_3V], (instregex "^(FMAX|FMIN)(NM)?P(v4f32|v2f64|v2i64)")>; +// ASIMD FP max/min, reduce +def : InstRW<[A57Write_10cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv")>; + +// ASIMD FP multiply, D-form, FZ +def : InstRW<[A57Write_5cyc_1V], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; +// ASIMD FP multiply, Q-form, FZ +def : InstRW<[A57Write_5cyc_2V], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP multiply accumulate, D-form, FZ +// ASIMD FP multiply accumulate, Q-form, FZ +def A57WriteFPVMAD : SchedWriteRes<[A57UnitV]> { let Latency = 9; } +def A57WriteFPVMAQ : SchedWriteRes<[A57UnitV, A57UnitV]> { let Latency = 10; } +def A57ReadFPVMA5 : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ]>; +def : InstRW<[A57WriteFPVMAD, A57ReadFPVMA5], (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; +def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA5], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP round, D-form +def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT[AIMNPXZ](v2f32)")>; +// ASIMD FP round, Q-form +def : InstRW<[A57Write_5cyc_2V], (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; + + +// Vector - Miscellaneous +// ----------------------------------------------------------------------------- + +// Reference for forms in this group +// D form - v8i8, v4i16, v2i32 +// Q form - v16i8, v8i16, v4i32 +// D form - v1i8, v1i16, v1i32, v1i64 +// Q form - v16i8, v8i16, v4i32, v2i64 + +// ASIMD bitwise insert, Q-form +def : InstRW<[A57Write_3cyc_2V], (instregex "^(BIF|BIT|BSL)v16i8")>; + +// ASIMD duplicate, gen reg, D-form and Q-form +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^CPY")>; +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^DUPv.+gpr")>; + +// ASIMD move, saturating +def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]QXTU?N")>; + +// ASIMD reciprocal estimate, D-form +def : InstRW<[A57Write_5cyc_1V], (instregex "^[FU](RECP|RSQRT)(E|X)(v2f32|v1i32|v2i32|v1i64)")>; +// ASIMD reciprocal estimate, Q-form +def : InstRW<[A57Write_5cyc_2V], (instregex "^[FU](RECP|RSQRT)(E|X)(v2f64|v4f32|v4i32)")>; + +// ASIMD reciprocal step, D-form, FZ +def : InstRW<[A57Write_9cyc_1V], (instregex "^F(RECP|RSQRT)S(v2f32|v1i32|v2i32|v1i64|32|64)")>; +// ASIMD reciprocal step, Q-form, FZ +def : InstRW<[A57Write_9cyc_2V], (instregex "^F(RECP|RSQRT)S(v2f64|v4f32|v4i32)")>; + +// ASIMD table lookup, D-form +def : InstRW<[A57Write_3cyc_1V], (instregex "^TB[LX]v8i8One")>; +def : InstRW<[A57Write_6cyc_2V], (instregex "^TB[LX]v8i8Two")>; +def : InstRW<[A57Write_9cyc_3V], (instregex "^TB[LX]v8i8Three")>; +def : InstRW<[A57Write_12cyc_4V], (instregex "^TB[LX]v8i8Four")>; +// ASIMD table lookup, Q-form +def : InstRW<[A57Write_6cyc_3V], (instregex "^TB[LX]v16i8One")>; +def : InstRW<[A57Write_9cyc_5V], (instregex "^TB[LX]v16i8Two")>; +def : InstRW<[A57Write_12cyc_7V], (instregex "^TB[LX]v16i8Three")>; +def : InstRW<[A57Write_15cyc_9V], (instregex "^TB[LX]v16i8Four")>; + +// ASIMD transfer, element to gen reg +def : InstRW<[A57Write_6cyc_1I_1L], (instregex "^[SU]MOVv")>; + +// ASIMD transfer, gen reg to element +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^INSv")>; + +// ASIMD unzip/zip, Q-form +def : InstRW<[A57Write_6cyc_3V], (instregex "^(UZP|ZIP)(1|2)(v16i8|v8i16|v4i32|v2i64)")>; + + +// Remainder +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_5cyc_1V], (instregex "^F(ADD|SUB)[DS]rr")>; + +def A57WriteFPMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; } +def A57ReadFPMA5 : SchedReadAdvance<5, [A57WriteFPMA]>; +def A57ReadFPM : SchedReadAdvance<0>; +def : InstRW<[A57WriteFPMA, A57ReadFPM, A57ReadFPM, A57ReadFPMA5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>; + +def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>; +def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[SU]CVTF")>; + +def : InstRW<[A57Write_32cyc_1W], (instrs FDIVDrr)>; +def : InstRW<[A57Write_17cyc_1W], (instrs FDIVSrr)>; + +def : InstRW<[A57Write_5cyc_1V], (instregex "^F(MAX|MIN).+rr")>; + +def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT.+r")>; + +def : InstRW<[A57Write_32cyc_1W], (instrs FSQRTDr)>; +def : InstRW<[A57Write_17cyc_1W], (instrs FSQRTSr)>; + +def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDNPDi)>; +def : InstRW<[A57Write_6cyc_2L, WriteLDHi], (instrs LDNPQi)>; +def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDNPSi)>; +def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDPDi)>; +def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPDpost)>; +def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPDpre)>; +def : InstRW<[A57Write_6cyc_2L, WriteLDHi], (instrs LDPQi)>; +def : InstRW<[A57Write_6cyc_2L, WriteLDHi, WriteAdr], (instrs LDPQpost)>; +def : InstRW<[A57Write_6cyc_2L, WriteLDHi, WriteAdr], (instrs LDPQpre)>; +def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi], (instrs LDPSWi)>; +def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi, WriteAdr], (instrs LDPSWpost)>; +def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi, WriteAdr], (instrs LDPSWpre)>; +def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDPSi)>; +def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPSpost)>; +def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPSpre)>; +def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRBpost)>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRBpre)>; +def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRBroW)>; +def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRBroX)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDRBui)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDRDl)>; +def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRDpost)>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRDpre)>; +def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRDroW)>; +def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRDroX)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDRDui)>; +def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRHHroW)>; +def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRHHroX)>; +def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRHpost)>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRHpre)>; +def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRHroW)>; +def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRHroX)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDRHui)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDRQl)>; +def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRQpost)>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRQpre)>; +def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRQroW)>; +def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRQroX)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDRQui)>; +def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHWroW)>; +def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHWroX)>; +def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHXroW)>; +def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHXroX)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDRSl)>; +def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRSpost)>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRSpre)>; +def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRSroW)>; +def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRSroX)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDRSui)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDURBi)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDURDi)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDURHi)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDURQi)>; +def : InstRW<[A57Write_5cyc_1L], (instrs LDURSi)>; + +def : InstRW<[A57Write_2cyc_2S], (instrs STNPDi)>; +def : InstRW<[A57Write_4cyc_1I_4S], (instrs STNPQi)>; +def : InstRW<[A57Write_2cyc_2S], (instrs STNPXi)>; +def : InstRW<[A57Write_2cyc_2S], (instrs STPDi)>; +def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPDpost)>; +def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPDpre)>; +def : InstRW<[A57Write_4cyc_1I_4S], (instrs STPQi)>; +def : InstRW<[WriteAdr, A57Write_4cyc_1I_4S], (instrs STPQpost)>; +def : InstRW<[WriteAdr, A57Write_4cyc_2I_4S], (instrs STPQpre)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPSpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPSpre)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPWpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPWpre)>; +def : InstRW<[A57Write_2cyc_2S], (instrs STPXi)>; +def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPXpost)>; +def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPXpre)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBBpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBBpre)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRBpre)>; +def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRBroW)>; +def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRBroX)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRDpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRDpre)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHHpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHHpre)>; +def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHHroW)>; +def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHHroX)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRHpre)>; +def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHroW)>; +def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHroX)>; +def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQpost)>; +def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STRQpre)>; +def : InstRW<[A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQroW)>; +def : InstRW<[A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQroX)>; +def : InstRW<[A57Write_2cyc_1I_2S], (instrs STRQui)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRSpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRSpre)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRWpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRWpre)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRXpost)>; +def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRXpre)>; +def : InstRW<[A57Write_2cyc_2S], (instrs STURQi)>; + +} // SchedModel = CortexA57Model diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td new file mode 100644 index 000000000..55005e1d9 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedA57WriteRes.td @@ -0,0 +1,544 @@ +//=- AArch64SchedA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Contains all of the Cortex-A57 specific SchedWriteRes types. The approach +// below is to define a generic SchedWriteRes for every combination of +// latency and microOps. The naming conventions is to use a prefix, one field +// for latency, and one or more microOp count/type designators. +// Prefix: A57Write +// Latency: #cyc +// MicroOp Count/Types: #(B|I|M|L|S|X|W|V) +// +// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are +// 11 micro-ops to be issued down one I pipe, six S pipes and four V pipes. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Define Generic 1 micro-op types + +def A57Write_5cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 5; } +def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; } +def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; } +def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; } +def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; } +def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17; + let ResourceCycles = [17]; } +def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19; + let ResourceCycles = [19]; } +def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; } +def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; } +def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; } +def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; } +def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32; + let ResourceCycles = [32]; } +def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35; + let ResourceCycles = [35]; } +def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; } +def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; } +def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; } +def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; } +def A57Write_4cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 4; } +def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; } +def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; } +def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; } + + +//===----------------------------------------------------------------------===// +// Define Generic 2 micro-op types + +def A57Write_64cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> { + let Latency = 64; + let NumMicroOps = 2; + let ResourceCycles = [32, 32]; +} +def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_7cyc_1V_1X : SchedWriteRes<[A57UnitV, + A57UnitX]> { + let Latency = 7; + let NumMicroOps = 2; +} +def A57Write_8cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 8; + let NumMicroOps = 2; +} +def A57Write_9cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 2; +} +def A57Write_8cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 8; + let NumMicroOps = 2; +} +def A57Write_6cyc_2L : SchedWriteRes<[A57UnitL, A57UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 10; + let NumMicroOps = 2; +} +def A57Write_10cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 2; +} +def A57Write_1cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_1cyc_1I_1S : SchedWriteRes<[A57UnitI, + A57UnitS]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_2cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_2cyc_2S : SchedWriteRes<[A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_34cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> { + let Latency = 34; + let NumMicroOps = 2; + let ResourceCycles = [17, 17]; +} +def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_3cyc_1I_1S : SchedWriteRes<[A57UnitI, + A57UnitS]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_3cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_4cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 4; + let NumMicroOps = 2; +} +def A57Write_4cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 4; + let NumMicroOps = 2; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 3 micro-op types + +def A57Write_10cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 3; +} +def A57Write_2cyc_1I_2S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 3; +} +def A57Write_3cyc_1I_1S_1V : SchedWriteRes<[A57UnitI, + A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_3cyc_1M_2S : SchedWriteRes<[A57UnitM, + A57UnitS, A57UnitS]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_3cyc_3S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_3cyc_2S_1V : SchedWriteRes<[A57UnitS, A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_5cyc_1I_2L : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL]> { + let Latency = 5; + let NumMicroOps = 3; +} +def A57Write_6cyc_1I_2L : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL]> { + let Latency = 6; + let NumMicroOps = 3; +} +def A57Write_6cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 3; +} +def A57Write_7cyc_3L : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL]> { + let Latency = 7; + let NumMicroOps = 3; +} +def A57Write_8cyc_1I_1L_1V : SchedWriteRes<[A57UnitI, + A57UnitL, + A57UnitV]> { + let Latency = 8; + let NumMicroOps = 3; +} +def A57Write_8cyc_1L_2V : SchedWriteRes<[A57UnitL, + A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 3; +} +def A57Write_8cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 3; +} +def A57Write_9cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 3; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 4 micro-op types + +def A57Write_2cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI, + A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 4; +} +def A57Write_3cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI, + A57UnitS, A57UnitS]> { + let Latency = 3; + let NumMicroOps = 4; +} +def A57Write_3cyc_1I_3S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS]> { + let Latency = 3; + let NumMicroOps = 4; +} +def A57Write_3cyc_1I_2S_1V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 4; +} +def A57Write_4cyc_4S : SchedWriteRes<[A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 4; + let NumMicroOps = 4; +} +def A57Write_7cyc_1I_3L : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, A57UnitL]> { + let Latency = 7; + let NumMicroOps = 4; +} +def A57Write_5cyc_2I_2L : SchedWriteRes<[A57UnitI, A57UnitI, + A57UnitL, A57UnitL]> { + let Latency = 5; + let NumMicroOps = 4; +} +def A57Write_8cyc_1I_1L_2V : SchedWriteRes<[A57UnitI, + A57UnitL, + A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 4; +} +def A57Write_8cyc_4L : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitL, A57UnitL]> { + let Latency = 8; + let NumMicroOps = 4; +} +def A57Write_9cyc_2L_2V : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 4; +} +def A57Write_9cyc_1L_3V : SchedWriteRes<[A57UnitL, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 4; +} +def A57Write_12cyc_4V : SchedWriteRes<[A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 12; + let NumMicroOps = 4; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 5 micro-op types + +def A57Write_3cyc_3S_2V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV]> { + let Latency = 3; + let NumMicroOps = 5; +} +def A57Write_8cyc_1I_4L : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitL, A57UnitL]> { + let Latency = 8; + let NumMicroOps = 5; +} +def A57Write_4cyc_1I_4S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 4; + let NumMicroOps = 5; +} +def A57Write_9cyc_1I_2L_2V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 5; +} +def A57Write_9cyc_1I_1L_3V : SchedWriteRes<[A57UnitI, + A57UnitL, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 5; +} +def A57Write_9cyc_2L_3V : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 5; +} +def A57Write_9cyc_5V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 5; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 6 micro-op types + +def A57Write_3cyc_1I_3S_2V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV]> { + let Latency = 3; + let NumMicroOps = 6; +} +def A57Write_4cyc_2I_4S : SchedWriteRes<[A57UnitI, A57UnitI, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 4; + let NumMicroOps = 6; +} +def A57Write_4cyc_4S_2V : SchedWriteRes<[A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitV, A57UnitV]> { + let Latency = 4; + let NumMicroOps = 6; +} +def A57Write_6cyc_6S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS]> { + let Latency = 6; + let NumMicroOps = 6; +} +def A57Write_9cyc_1I_2L_3V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 6; +} +def A57Write_9cyc_1I_1L_4V : SchedWriteRes<[A57UnitI, + A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 6; +} +def A57Write_9cyc_2L_4V : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 6; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 7 micro-op types + +def A57Write_10cyc_3L_4V : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 7; +} +def A57Write_4cyc_1I_4S_2V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitV, A57UnitV]> { + let Latency = 4; + let NumMicroOps = 7; +} +def A57Write_6cyc_1I_6S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS]> { + let Latency = 6; + let NumMicroOps = 7; +} +def A57Write_9cyc_1I_2L_4V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 7; +} +def A57Write_12cyc_7V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 12; + let NumMicroOps = 7; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 8 micro-op types + +def A57Write_10cyc_1I_3L_4V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 8; +} +def A57Write_11cyc_4L_4V : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 11; + let NumMicroOps = 8; +} +def A57Write_8cyc_8S : SchedWriteRes<[A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 8; + let NumMicroOps = 8; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 9 micro-op types + +def A57Write_8cyc_1I_8S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 8; + let NumMicroOps = 9; +} +def A57Write_11cyc_1I_4L_4V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 11; + let NumMicroOps = 9; +} +def A57Write_15cyc_9V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV, + A57UnitV, A57UnitV, A57UnitV, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 15; + let NumMicroOps = 9; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 10 micro-op types + +def A57Write_6cyc_6S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 10; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 11 micro-op types + +def A57Write_6cyc_1I_6S_4V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 11; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 12 micro-op types + +def A57Write_8cyc_8S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 12; +} + +//===----------------------------------------------------------------------===// +// Define Generic 13 micro-op types + +def A57Write_8cyc_1I_8S_4V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 13; +} + diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td new file mode 100644 index 000000000..7a474ba8e --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedCyclone.td @@ -0,0 +1,871 @@ +//=- AArch64SchedCyclone.td - Cyclone Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for AArch64 Cyclone to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +def CycloneModel : SchedMachineModel { + let IssueWidth = 6; // 6 micro-ops are dispatched per cycle. + let MicroOpBufferSize = 192; // Based on the reorder buffer. + let LoadLatency = 4; // Optimistic load latency. + let MispredictPenalty = 16; // 14-19 cycles are typical. + let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = [HasSVE]; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Cyclone. + +// 4 integer pipes +def CyUnitI : ProcResource<4> { + let BufferSize = 48; +} + +// 2 branch units: I[0..1] +def CyUnitB : ProcResource<2> { + let Super = CyUnitI; + let BufferSize = 24; +} + +// 1 indirect-branch unit: I[0] +def CyUnitBR : ProcResource<1> { + let Super = CyUnitB; +} + +// 2 shifter pipes: I[2..3] +// When an instruction consumes a CyUnitIS, it also consumes a CyUnitI +def CyUnitIS : ProcResource<2> { + let Super = CyUnitI; + let BufferSize = 24; +} + +// 1 mul pipe: I[0] +def CyUnitIM : ProcResource<1> { + let Super = CyUnitBR; + let BufferSize = 32; +} + +// 1 div pipe: I[1] +def CyUnitID : ProcResource<1> { + let Super = CyUnitB; + let BufferSize = 16; +} + +// 1 integer division unit. This is driven by the ID pipe, but only +// consumes the pipe for one cycle at issue and another cycle at writeback. +def CyUnitIntDiv : ProcResource<1>; + +// 2 ld/st pipes. +def CyUnitLS : ProcResource<2> { + let BufferSize = 28; +} + +// 3 fp/vector pipes. +def CyUnitV : ProcResource<3> { + let BufferSize = 48; +} +// 2 fp/vector arithmetic and multiply pipes: V[0-1] +def CyUnitVM : ProcResource<2> { + let Super = CyUnitV; + let BufferSize = 32; +} +// 1 fp/vector division/sqrt pipe: V[2] +def CyUnitVD : ProcResource<1> { + let Super = CyUnitV; + let BufferSize = 16; +} +// 1 fp compare pipe: V[0] +def CyUnitVC : ProcResource<1> { + let Super = CyUnitVM; + let BufferSize = 16; +} + +// 2 fp division/square-root units. These are driven by the VD pipe, +// but only consume the pipe for one cycle at issue and a cycle at writeback. +def CyUnitFloatDiv : ProcResource<2>; + +//===----------------------------------------------------------------------===// +// Define scheduler read/write resources and latency on Cyclone. +// This mirrors sections 7.7-7.9 of the Tuning Guide v1.0.1. + +let SchedModel = CycloneModel in { + +//--- +// 7.8.1. Moves +//--- + +// A single nop micro-op (uX). +def WriteX : SchedWriteRes<[]> { let Latency = 0; } + +// Move zero is a register rename (to machine register zero). +// The move is replaced by a single nop micro-op. +// MOVZ Rd, #0 +// AND Rd, Rzr, #imm +def WriteZPred : SchedPredicate<[{TII->isGPRZero(*MI)}]>; +def WriteImmZ : SchedWriteVariant<[ + SchedVar<WriteZPred, [WriteX]>, + SchedVar<NoSchedPred, [WriteImm]>]>; +def : InstRW<[WriteImmZ], (instrs MOVZWi,MOVZXi,ANDWri,ANDXri)>; + +// Move GPR is a register rename and single nop micro-op. +// ORR Xd, XZR, Xm +// ADD Xd, Xn, #0 +def WriteIMovPred : SchedPredicate<[{TII->isGPRCopy(*MI)}]>; +def WriteVMovPred : SchedPredicate<[{TII->isFPRCopy(*MI)}]>; +def WriteMov : SchedWriteVariant<[ + SchedVar<WriteIMovPred, [WriteX]>, + SchedVar<WriteVMovPred, [WriteX]>, + SchedVar<NoSchedPred, [WriteI]>]>; +def : InstRW<[WriteMov], (instrs COPY,ORRXrr,ADDXrr)>; + +// Move non-zero immediate is an integer ALU op. +// MOVN,MOVZ,MOVK +def : WriteRes<WriteImm, [CyUnitI]>; + +//--- +// 7.8.2-7.8.5. Arithmetic and Logical, Comparison, Conditional, +// Shifts and Bitfield Operations +//--- + +// ADR,ADRP +// ADD(S)ri,SUB(S)ri,AND(S)ri,EORri,ORRri +// ADD(S)rr,SUB(S)rr,AND(S)rr,BIC(S)rr,EONrr,EORrr,ORNrr,ORRrr +// ADC(S),SBC(S) +// Aliases: CMN, CMP, TST +// +// Conditional operations. +// CCMNi,CCMPi,CCMNr,CCMPr, +// CSEL,CSINC,CSINV,CSNEG +// +// Bit counting and reversal operations. +// CLS,CLZ,RBIT,REV,REV16,REV32 +def : WriteRes<WriteI, [CyUnitI]>; + +// ADD with shifted register operand is a single micro-op that +// consumes a shift pipeline for two cycles. +// ADD(S)rs,SUB(S)rs,AND(S)rs,BIC(S)rs,EONrs,EORrs,ORNrs,ORRrs +// EXAMPLE: ADDrs Xn, Xm LSL #imm +def : WriteRes<WriteISReg, [CyUnitIS]> { + let Latency = 2; + let ResourceCycles = [2]; +} + +// ADD with extended register operand is the same as shifted reg operand. +// ADD(S)re,SUB(S)re +// EXAMPLE: ADDXre Xn, Xm, UXTB #1 +def : WriteRes<WriteIEReg, [CyUnitIS]> { + let Latency = 2; + let ResourceCycles = [2]; +} + +// Variable shift and bitfield operations. +// ASRV,LSLV,LSRV,RORV,BFM,SBFM,UBFM +def : WriteRes<WriteIS, [CyUnitIS]>; + +// EXTR Shifts a pair of registers and requires two micro-ops. +// The second micro-op is delayed, as modeled by ReadExtrHi. +// EXTR Xn, Xm, #imm +def : WriteRes<WriteExtr, [CyUnitIS, CyUnitIS]> { + let Latency = 2; + let NumMicroOps = 2; +} + +// EXTR's first register read is delayed by one cycle, effectively +// shortening its writer's latency. +// EXTR Xn, Xm, #imm +def : ReadAdvance<ReadExtrHi, 1>; + +//--- +// 7.8.6. Multiplies +//--- + +// MUL/MNEG are aliases for MADD/MSUB. +// MADDW,MSUBW,SMADDL,SMSUBL,UMADDL,UMSUBL +def : WriteRes<WriteIM32, [CyUnitIM]> { + let Latency = 4; +} +// MADDX,MSUBX,SMULH,UMULH +def : WriteRes<WriteIM64, [CyUnitIM]> { + let Latency = 5; +} + +//--- +// 7.8.7. Divide +//--- + +// 32-bit divide takes 7-13 cycles. 10 cycles covers a 20-bit quotient. +// The ID pipe is consumed for 2 cycles: issue and writeback. +// SDIVW,UDIVW +def : WriteRes<WriteID32, [CyUnitID, CyUnitIntDiv]> { + let Latency = 10; + let ResourceCycles = [2, 10]; +} +// 64-bit divide takes 7-21 cycles. 13 cycles covers a 32-bit quotient. +// The ID pipe is consumed for 2 cycles: issue and writeback. +// SDIVX,UDIVX +def : WriteRes<WriteID64, [CyUnitID, CyUnitIntDiv]> { + let Latency = 13; + let ResourceCycles = [2, 13]; +} + +//--- +// 7.8.8,7.8.10. Load/Store, single element +//--- + +// Integer loads take 4 cycles and use one LS unit for one cycle. +def : WriteRes<WriteLD, [CyUnitLS]> { + let Latency = 4; +} + +// Store-load forwarding is 4 cycles. +// +// Note: The store-exclusive sequence incorporates this +// latency. However, general heuristics should not model the +// dependence between a store and subsequent may-alias load because +// hardware speculation works. +def : WriteRes<WriteST, [CyUnitLS]> { + let Latency = 4; +} + +// Load from base address plus an optionally scaled register offset. +// Rt latency is latency WriteIS + WriteLD. +// EXAMPLE: LDR Xn, Xm [, lsl 3] +def CyWriteLDIdx : SchedWriteVariant<[ + SchedVar<ScaledIdxPred, [WriteIS, WriteLD]>, // Load from scaled register. + SchedVar<NoSchedPred, [WriteLD]>]>; // Load from register offset. +def : SchedAlias<WriteLDIdx, CyWriteLDIdx>; // Map AArch64->Cyclone type. + +// EXAMPLE: STR Xn, Xm [, lsl 3] +def CyWriteSTIdx : SchedWriteVariant<[ + SchedVar<ScaledIdxPred, [WriteIS, WriteST]>, // Store to scaled register. + SchedVar<NoSchedPred, [WriteST]>]>; // Store to register offset. +def : SchedAlias<WriteSTIdx, CyWriteSTIdx>; // Map AArch64->Cyclone type. + +// Read the (unshifted) base register Xn in the second micro-op one cycle later. +// EXAMPLE: LDR Xn, Xm [, lsl 3] +def ReadBaseRS : SchedReadAdvance<1>; +def CyReadAdrBase : SchedReadVariant<[ + SchedVar<ScaledIdxPred, [ReadBaseRS]>, // Read base reg after shifting offset. + SchedVar<NoSchedPred, [ReadDefault]>]>; // Read base reg with no shift. +def : SchedAlias<ReadAdrBase, CyReadAdrBase>; // Map AArch64->Cyclone type. + +//--- +// 7.8.9,7.8.11. Load/Store, paired +//--- + +// Address pre/post increment is a simple ALU op with one cycle latency. +def : WriteRes<WriteAdr, [CyUnitI]>; + +// LDP high register write is fused with the load, but a nop micro-op remains. +def : WriteRes<WriteLDHi, []> { + let Latency = 4; +} + +// STP is a vector op and store, except for QQ, which is just two stores. +def : SchedAlias<WriteSTP, WriteVSTShuffle>; +def : InstRW<[WriteST, WriteST], (instrs STPQi)>; + +//--- +// 7.8.13. Branches +//--- + +// Branches take a single micro-op. +// The misprediction penalty is defined as a SchedMachineModel property. +def : WriteRes<WriteBr, [CyUnitB]> {let Latency = 0;} +def : WriteRes<WriteBrReg, [CyUnitBR]> {let Latency = 0;} + +//--- +// 7.8.14. Never-issued Instructions, Barrier and Hint Operations +//--- + +// NOP,SEV,SEVL,WFE,WFI,YIELD +def : WriteRes<WriteHint, []> {let Latency = 0;} +// ISB +def : InstRW<[WriteI], (instrs ISB)>; +// SLREX,DMB,DSB +def : WriteRes<WriteBarrier, [CyUnitLS]>; + +// System instructions get an invalid latency because the latency of +// other operations across them is meaningless. +def : WriteRes<WriteSys, []> {let Latency = -1;} + +//===----------------------------------------------------------------------===// +// 7.9 Vector Unit Instructions + +// Simple vector operations take 2 cycles. +def : WriteRes<WriteV, [CyUnitV]> {let Latency = 2;} + +// Define some longer latency vector op types for Cyclone. +def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;} +def CyWriteV4 : SchedWriteRes<[CyUnitV]> {let Latency = 4;} +def CyWriteV5 : SchedWriteRes<[CyUnitV]> {let Latency = 5;} +def CyWriteV6 : SchedWriteRes<[CyUnitV]> {let Latency = 6;} + +// Simple floating-point operations take 2 cycles. +def : WriteRes<WriteF, [CyUnitV]> {let Latency = 2;} + +//--- +// 7.9.1 Vector Moves +//--- + +// TODO: Add Cyclone-specific zero-cycle zeros. LLVM currently +// generates expensive int-float conversion instead: +// FMOVDi Dd, #0.0 +// FMOVv2f64ns Vd.2d, #0.0 + +// FMOVSi,FMOVDi +def : WriteRes<WriteFImm, [CyUnitV]> {let Latency = 2;} + +// MOVI,MVNI are WriteV +// FMOVv2f32ns,FMOVv2f64ns,FMOVv4f32ns are WriteV + +// Move FPR is a register rename and single nop micro-op. +// ORR.16b Vd,Vn,Vn +// COPY is handled above in the WriteMov Variant. +def WriteVMov : SchedWriteVariant<[ + SchedVar<WriteVMovPred, [WriteX]>, + SchedVar<NoSchedPred, [WriteV]>]>; +def : InstRW<[WriteVMov], (instrs ORRv16i8)>; + +// FMOVSr,FMOVDr are WriteF. + +// MOV V,V is a WriteV. + +// CPY D,V[x] is a WriteV + +// INS V[x],V[y] is a WriteV. + +// FMOVWSr,FMOVXDr,FMOVXDHighr +def : WriteRes<WriteFCopy, [CyUnitLS]> { + let Latency = 5; +} + +// FMOVSWr,FMOVDXr +def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>; + +// INS V[x],R +def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>; +def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>; + +// SMOV,UMOV R,V[x] +def CyWriteCopyToGPR : WriteSequence<[WriteLD, WriteI]>; +def : InstRW<[CyWriteCopyToGPR], (instregex "SMOVv","UMOVv")>; + +// DUP V,R +def : InstRW<[CyWriteCopyToFPR], (instregex "DUPv")>; + +// DUP V,V[x] is a WriteV. + +//--- +// 7.9.2 Integer Arithmetic, Logical, and Comparisons +//--- + +// BIC,ORR V,#imm are WriteV + +def : InstRW<[CyWriteV3], (instregex "ABSv")>; + +// MVN,NEG,NOT are WriteV + +def : InstRW<[CyWriteV3], (instregex "SQABSv","SQNEGv")>; + +// ADDP is a WriteV. +def CyWriteVADDLP : SchedWriteRes<[CyUnitV]> {let Latency = 2;} +def : InstRW<[CyWriteVADDLP], (instregex "SADDLPv","UADDLPv")>; + +def : InstRW<[CyWriteV3], + (instregex "ADDVv","SMAXVv","UMAXVv","SMINVv","UMINVv")>; + +def : InstRW<[CyWriteV3], (instregex "SADDLV","UADDLV")>; + +// ADD,SUB are WriteV + +// Forward declare. +def CyWriteVABD : SchedWriteRes<[CyUnitV]> {let Latency = 3;} + +// Add/Diff and accumulate uses the vector multiply unit. +def CyWriteVAccum : SchedWriteRes<[CyUnitVM]> {let Latency = 3;} +def CyReadVAccum : SchedReadAdvance<1, + [CyWriteVAccum, CyWriteVADDLP, CyWriteVABD]>; + +def : InstRW<[CyWriteVAccum, CyReadVAccum], + (instregex "SADALP","UADALP")>; + +def : InstRW<[CyWriteVAccum, CyReadVAccum], + (instregex "SABAv","UABAv","SABALv","UABALv")>; + +def : InstRW<[CyWriteV3], (instregex "SQADDv","SQSUBv","UQADDv","UQSUBv")>; + +def : InstRW<[CyWriteV3], (instregex "SUQADDv","USQADDv")>; + +def : InstRW<[CyWriteV4], (instregex "ADDHNv","RADDHNv", "RSUBHNv", "SUBHNv")>; + +// WriteV includes: +// AND,BIC,CMTST,EOR,ORN,ORR +// ADDP +// SHADD,SHSUB,SRHADD,UHADD,UHSUB,URHADD +// SADDL,SSUBL,UADDL,USUBL +// SADDW,SSUBW,UADDW,USUBW + +def : InstRW<[CyWriteV3], (instregex "CMEQv","CMGEv","CMGTv", + "CMLEv","CMLTv", + "CMHIv","CMHSv")>; + +def : InstRW<[CyWriteV3], (instregex "SMAXv","SMINv","UMAXv","UMINv", + "SMAXPv","SMINPv","UMAXPv","UMINPv")>; + +def : InstRW<[CyWriteVABD], (instregex "SABDv","UABDv", + "SABDLv","UABDLv")>; + +//--- +// 7.9.3 Floating Point Arithmetic and Comparisons +//--- + +// FABS,FNEG are WriteF + +def : InstRW<[CyWriteV4], (instrs FADDPv2i32p)>; +def : InstRW<[CyWriteV5], (instrs FADDPv2i64p)>; + +def : InstRW<[CyWriteV3], (instregex "FMAXPv2i","FMAXNMPv2i", + "FMINPv2i","FMINNMPv2i")>; + +def : InstRW<[CyWriteV4], (instregex "FMAXVv","FMAXNMVv","FMINVv","FMINNMVv")>; + +def : InstRW<[CyWriteV4], (instrs FADDSrr,FADDv2f32,FADDv4f32, + FSUBSrr,FSUBv2f32,FSUBv4f32, + FADDPv2f32,FADDPv4f32, + FABD32,FABDv2f32,FABDv4f32)>; +def : InstRW<[CyWriteV5], (instrs FADDDrr,FADDv2f64, + FSUBDrr,FSUBv2f64, + FADDPv2f64, + FABD64,FABDv2f64)>; + +def : InstRW<[CyWriteV3], (instregex "FCMEQ","FCMGT","FCMLE","FCMLT")>; + +def : InstRW<[CyWriteV3], (instregex "FACGE","FACGT", + "FMAXS","FMAXD","FMAXv", + "FMINS","FMIND","FMINv", + "FMAXNMS","FMAXNMD","FMAXNMv", + "FMINNMS","FMINNMD","FMINNMv", + "FMAXPv2f","FMAXPv4f", + "FMINPv2f","FMINPv4f", + "FMAXNMPv2f","FMAXNMPv4f", + "FMINNMPv2f","FMINNMPv4f")>; + +// FCMP,FCMPE,FCCMP,FCCMPE +def : WriteRes<WriteFCmp, [CyUnitVC]> {let Latency = 4;} + +// FCSEL is a WriteF. + +//--- +// 7.9.4 Shifts and Bitfield Operations +//--- + +// SHL is a WriteV + +def CyWriteVSHR : SchedWriteRes<[CyUnitV]> {let Latency = 2;} +def : InstRW<[CyWriteVSHR], (instregex "SSHRv","USHRv")>; + +def CyWriteVSRSHR : SchedWriteRes<[CyUnitV]> {let Latency = 3;} +def : InstRW<[CyWriteVSRSHR], (instregex "SRSHRv","URSHRv")>; + +// Shift and accumulate uses the vector multiply unit. +def CyWriteVShiftAcc : SchedWriteRes<[CyUnitVM]> {let Latency = 3;} +def CyReadVShiftAcc : SchedReadAdvance<1, + [CyWriteVShiftAcc, CyWriteVSHR, CyWriteVSRSHR]>; +def : InstRW<[CyWriteVShiftAcc, CyReadVShiftAcc], + (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>; + +// SSHL,USHL are WriteV. + +def : InstRW<[CyWriteV3], (instregex "SRSHLv","URSHLv")>; + +// SQSHL,SQSHLU,UQSHL are WriteV. + +def : InstRW<[CyWriteV3], (instregex "SQRSHLv","UQRSHLv")>; + +// WriteV includes: +// SHLL,SSHLL,USHLL +// SLI,SRI +// BIF,BIT,BSL +// EXT +// CLS,CLZ,CNT,RBIT,REV16,REV32,REV64,XTN +// XTN2 + +def : InstRW<[CyWriteV4], + (instregex "RSHRNv","SHRNv", + "SQRSHRNv","SQRSHRUNv","SQSHRNv","SQSHRUNv", + "UQRSHRNv","UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>; + +//--- +// 7.9.5 Multiplication +//--- + +def CyWriteVMul : SchedWriteRes<[CyUnitVM]> { let Latency = 4;} +def : InstRW<[CyWriteVMul], (instregex "MULv","SMULLv","UMULLv", + "SQDMULLv","SQDMULHv","SQRDMULHv")>; + +// FMUL,FMULX,FNMUL default to WriteFMul. +def : WriteRes<WriteFMul, [CyUnitVM]> { let Latency = 4;} + +def CyWriteV64Mul : SchedWriteRes<[CyUnitVM]> { let Latency = 5;} +def : InstRW<[CyWriteV64Mul], (instrs FMULDrr,FMULv2f64,FMULv2i64_indexed, + FNMULDrr,FMULX64,FMULXv2f64,FMULXv2i64_indexed)>; + +def CyReadVMulAcc : SchedReadAdvance<1, [CyWriteVMul, CyWriteV64Mul]>; +def : InstRW<[CyWriteVMul, CyReadVMulAcc], + (instregex "MLA","MLS","SMLAL","SMLSL","UMLAL","UMLSL", + "SQDMLAL","SQDMLSL")>; + +def CyWriteSMul : SchedWriteRes<[CyUnitVM]> { let Latency = 8;} +def CyWriteDMul : SchedWriteRes<[CyUnitVM]> { let Latency = 10;} +def CyReadSMul : SchedReadAdvance<4, [CyWriteSMul]>; +def CyReadDMul : SchedReadAdvance<5, [CyWriteDMul]>; + +def : InstRW<[CyWriteSMul, CyReadSMul], + (instrs FMADDSrrr,FMSUBSrrr,FNMADDSrrr,FNMSUBSrrr, + FMLAv2f32,FMLAv4f32, + FMLAv1i32_indexed,FMLAv1i64_indexed,FMLAv2i32_indexed)>; +def : InstRW<[CyWriteDMul, CyReadDMul], + (instrs FMADDDrrr,FMSUBDrrr,FNMADDDrrr,FNMSUBDrrr, + FMLAv2f64,FMLAv2i64_indexed, + FMLSv2f64,FMLSv2i64_indexed)>; + +def CyWritePMUL : SchedWriteRes<[CyUnitVD]> { let Latency = 3; } +def : InstRW<[CyWritePMUL], (instregex "PMULv", "PMULLv")>; + +//--- +// 7.9.6 Divide and Square Root +//--- + +// FDIV,FSQRT +// TODO: Add 64-bit variant with 19 cycle latency. +// TODO: Specialize FSQRT for longer latency. +def : WriteRes<WriteFDiv, [CyUnitVD, CyUnitFloatDiv]> { + let Latency = 17; + let ResourceCycles = [2, 17]; +} + +def : InstRW<[CyWriteV4], (instregex "FRECPEv","FRECPXv","URECPEv","URSQRTEv")>; + +def WriteFRSQRTE : SchedWriteRes<[CyUnitVM]> { let Latency = 4; } +def : InstRW<[WriteFRSQRTE], (instregex "FRSQRTEv")>; + +def WriteFRECPS : SchedWriteRes<[CyUnitVM]> { let Latency = 8; } +def WriteFRSQRTS : SchedWriteRes<[CyUnitVM]> { let Latency = 10; } +def : InstRW<[WriteFRECPS], (instregex "FRECPSv")>; +def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>; + +//--- +// 7.9.7 Integer-FP Conversions +//--- + +// FCVT lengthen f16/s32 +def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>; + +// FCVT,FCVTN,FCVTXN +// SCVTF,UCVTF V,V +// FRINT(AIMNPXZ) V,V +def : WriteRes<WriteFCvt, [CyUnitV]> {let Latency = 4;} + +// SCVT/UCVT S/D, Rd = VLD5+V4: 9 cycles. +def CyWriteCvtToFPR : WriteSequence<[WriteVLD, CyWriteV4]>; +def : InstRW<[CyWriteCopyToFPR], (instregex "FCVT[AMNPZ][SU][SU][WX][SD]r")>; + +// FCVT Rd, S/D = V6+LD4: 10 cycles +def CyWriteCvtToGPR : WriteSequence<[CyWriteV6, WriteLD]>; +def : InstRW<[CyWriteCvtToGPR], (instregex "[SU]CVTF[SU][WX][SD]r")>; + +// FCVTL is a WriteV + +//--- +// 7.9.8-7.9.10 Cryptography, Data Transposition, Table Lookup +//--- + +def CyWriteCrypto2 : SchedWriteRes<[CyUnitVD]> {let Latency = 2;} +def : InstRW<[CyWriteCrypto2], (instrs AESIMCrr, AESMCrr, SHA1Hrr, + AESDrr, AESErr, SHA1SU1rr, SHA256SU0rr, + SHA1SU0rrr)>; + +def CyWriteCrypto3 : SchedWriteRes<[CyUnitVD]> {let Latency = 3;} +def : InstRW<[CyWriteCrypto3], (instrs SHA256SU1rrr)>; + +def CyWriteCrypto6 : SchedWriteRes<[CyUnitVD]> {let Latency = 6;} +def : InstRW<[CyWriteCrypto6], (instrs SHA1Crrr, SHA1Mrrr, SHA1Prrr, + SHA256Hrrr,SHA256H2rrr)>; + +// TRN,UZP,ZUP are WriteV. + +// TBL,TBX are WriteV. + +//--- +// 7.9.11-7.9.14 Load/Store, single element and paired +//--- + +// Loading into the vector unit takes 5 cycles vs 4 for integer loads. +def : WriteRes<WriteVLD, [CyUnitLS]> { + let Latency = 5; +} + +// Store-load forwarding is 4 cycles. +def : WriteRes<WriteVST, [CyUnitLS]> { + let Latency = 4; +} + +// WriteVLDPair/VSTPair sequences are expanded by the target description. + +//--- +// 7.9.15 Load, element operations +//--- + +// Only the first WriteVLD and WriteAdr for writeback matches def operands. +// Subsequent WriteVLDs consume resources. Since all loaded values have the +// same latency, this is acceptable. + +// Vd is read 5 cycles after issuing the vector load. +def : ReadAdvance<ReadVLD, 5>; + +def : InstRW<[WriteVLD], + (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLD, WriteAdr], + (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; + +// Register writes from the load's high half are fused micro-ops. +def : InstRW<[WriteVLD], + (instregex "LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[WriteVLD, WriteAdr], + (instregex "LD1Twov(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVLD, WriteVLD], + (instregex "LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLD, WriteAdr, WriteVLD], + (instregex "LD1Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLD, WriteVLD], + (instregex "LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[WriteVLD, WriteAdr, WriteVLD], + (instregex "LD1Threev(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVLD, WriteVLD, WriteVLD], + (instregex "LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD], + (instregex "LD1Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLD, WriteVLD], + (instregex "LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[WriteVLD, WriteAdr, WriteVLD], + (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVLD, WriteVLD, WriteVLD, WriteVLD], + (instregex "LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD, WriteVLD], + (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD], + (instregex "LD1i(8|16|32)$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr], + (instregex "LD1i(8|16|32)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD], (instrs LD1i64)>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr],(instrs LD1i64_POST)>; + +def : InstRW<[WriteVLDShuffle], + (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr], + (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[WriteVLDShuffle, WriteV], + (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV], + (instregex "LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle], + (instregex "LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle], + (instregex "LD2Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV], + (instregex "LD2i(8|16|32)$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV], + (instregex "LD2i(8|16|32)_POST")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV], + (instregex "LD2i64$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV], + (instregex "LD2i64_POST")>; + +def : InstRW<[WriteVLDShuffle, WriteV], + (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV], + (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV], + (instregex "LD3Threev(8b|4h|2s)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV], + (instregex "LD3Threev(8b|4h|2s)_POST")>; +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle], + (instregex "LD3Threev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle], + (instregex "LD3Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV], + (instregex "LD3i(8|16|32)$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV], + (instregex "LD3i(8|16|32)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV], + (instregex "LD3i64$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV], + (instregex "LD3i64_POST")>; + +def : InstRW<[WriteVLDShuffle, WriteV, WriteV], + (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV], + (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>; + +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV], + (instrs LD3Rv1d,LD3Rv2d)>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV], + (instrs LD3Rv1d_POST,LD3Rv2d_POST)>; + +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV], + (instregex "LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV], + (instregex "LD4Fourv(8b|4h|2s)_POST")>; +def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle, + WriteVLDPairShuffle, WriteVLDPairShuffle], + (instregex "LD4Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle, + WriteVLDPairShuffle, WriteVLDPairShuffle], + (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV], + (instregex "LD4i(8|16|32)$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV], + (instregex "LD4i(8|16|32)_POST")>; + + +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV], + (instrs LD4i64)>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV], + (instrs LD4i64_POST)>; + +def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV], + (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV], + (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>; + +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV], + (instrs LD4Rv1d,LD4Rv2d)>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV], + (instrs LD4Rv1d_POST,LD4Rv2d_POST)>; + +//--- +// 7.9.16 Store, element operations +//--- + +// Only the WriteAdr for writeback matches a def operands. +// Subsequent WriteVLDs only consume resources. + +def : InstRW<[WriteVST], + (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVST], + (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle], + (instregex "ST1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle], + (instregex "ST1Twov(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVST, WriteVST], + (instregex "ST1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVST, WriteVST], + (instregex "ST1Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle, WriteVST], + (instregex "ST1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVST], + (instregex "ST1Threev(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVST, WriteVST, WriteVST], + (instregex "ST1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST], + (instregex "ST1Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST1Fourv(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], + (instregex "ST1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST, WriteVST], + (instregex "ST1Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle], (instregex "ST1i(8|16|32)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST1i(8|16|32)_POST")>; + +def : InstRW<[WriteVSTShuffle], (instrs ST1i64)>; +def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST1i64_POST)>; + +def : InstRW<[WriteVSTShuffle], + (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle], + (instregex "ST2Twov(8b|4h|2s)_POST")>; +def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST2Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle], (instregex "ST2i(8|16|32)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST2i(8|16|32)_POST")>; +def : InstRW<[WriteVSTShuffle], (instrs ST2i64)>; +def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST2i64_POST)>; + +def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST3Threev(8b|4h|2s)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST3Threev(8b|4h|2s)_POST")>; +def : InstRW<[WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST3Threev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST3Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle], (instregex "ST3i(8|16|32)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST3i(8|16|32)_POST")>; + +def :InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64)>; +def :InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64_POST)>; + +def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle], + (instregex "ST4Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle], + (instregex "ST4Fourv(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle, + WriteVSTPairShuffle, WriteVSTPairShuffle], + (instregex "ST4Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle, + WriteVSTPairShuffle, WriteVSTPairShuffle], + (instregex "ST4Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTPairShuffle], (instregex "ST4i(8|16|32)$")>; +def : InstRW<[WriteAdr, WriteVSTPairShuffle], (instregex "ST4i(8|16|32)_POST")>; + +def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST4i64)>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>; + +// Atomic operations are not supported. +def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } + +//--- +// Unused SchedRead types +//--- + +def : ReadAdvance<ReadI, 0>; +def : ReadAdvance<ReadISReg, 0>; +def : ReadAdvance<ReadIEReg, 0>; +def : ReadAdvance<ReadIM, 0>; +def : ReadAdvance<ReadIMA, 0>; +def : ReadAdvance<ReadID, 0>; + +} // SchedModel = CycloneModel diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM1.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM1.td new file mode 100644 index 000000000..ecc68aed1 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM1.td @@ -0,0 +1,847 @@ +//=- AArch64SchedExynosM1.td - Samsung Exynos M1 Sched Defs --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for the Samsung Exynos M1 to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// The Exynos-M1 is a traditional superscalar microprocessor with a +// 4-wide in-order stage for decode and dispatch and a wider issue stage. +// The execution units and loads and stores are out-of-order. + +def ExynosM1Model : SchedMachineModel { + let IssueWidth = 4; // Up to 4 uops per cycle. + let MicroOpBufferSize = 96; // ROB size. + let LoopMicroOpBufferSize = 24; // Based on the instruction queue size. + let LoadLatency = 4; // Optimistic load cases. + let MispredictPenalty = 14; // Minimum branch misprediction penalty. + let CompleteModel = 1; // Use the default model otherwise. + + list<Predicate> UnsupportedFeatures = [HasSVE]; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on the Exynos-M1, +// which has 9 pipelines, each with its own queue with out-of-order dispatch. + +let SchedModel = ExynosM1Model in { + +def M1UnitA : ProcResource<2>; // Simple integer +def M1UnitC : ProcResource<1>; // Simple and complex integer +def M1UnitD : ProcResource<1>; // Integer division (inside C, serialized) +def M1UnitB : ProcResource<2>; // Branch +def M1UnitL : ProcResource<1>; // Load +def M1UnitS : ProcResource<1>; // Store +def M1PipeF0 : ProcResource<1>; // FP #0 +let Super = M1PipeF0 in { + def M1UnitFMAC : ProcResource<1>; // FP multiplication + def M1UnitNAL0 : ProcResource<1>; // Simple vector + def M1UnitNMISC : ProcResource<1>; // Miscellanea + def M1UnitFCVT : ProcResource<1>; // FP conversion + def M1UnitNCRYPT : ProcResource<1>; // Cryptographic +} +def M1PipeF1 : ProcResource<1>; // FP #1 +let Super = M1PipeF1 in { + def M1UnitFADD : ProcResource<1>; // Simple FP + def M1UnitNAL1 : ProcResource<1>; // Simple vector + def M1UnitFVAR : ProcResource<1>; // FP division & square root (serialized) + def M1UnitFST : ProcResource<1>; // FP store +} + +def M1UnitALU : ProcResGroup<[M1UnitA, + M1UnitC]>; // All integer +def M1UnitNALU : ProcResGroup<[M1UnitNAL0, + M1UnitNAL1]>; // All simple vector + +//===----------------------------------------------------------------------===// +// Predicates. + +def M1BranchLinkFastPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR && + MI->getOperand(0).getReg() != AArch64::LR}]>; +def M1ShiftLeftFastPred : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>; + +//===----------------------------------------------------------------------===// +// Coarse scheduling model. + +def M1WriteA1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; } +def M1WriteA2 : SchedWriteRes<[M1UnitALU]> { let Latency = 2; } +def M1WriteAA : SchedWriteRes<[M1UnitALU]> { let Latency = 2; + let ResourceCycles = [2]; } +def M1WriteAB : SchedWriteRes<[M1UnitALU, + M1UnitC]> { let Latency = 1; + let NumMicroOps = 2; } +def M1WriteAC : SchedWriteRes<[M1UnitALU, + M1UnitALU, + M1UnitC]> { let Latency = 2; + let NumMicroOps = 3; } +def M1WriteAD : SchedWriteRes<[M1UnitALU, + M1UnitC]> { let Latency = 2; + let NumMicroOps = 2; } +def M1WriteAX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteA1]>, + SchedVar<NoSchedPred, [M1WriteAA]>]>; +def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; } +def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; } + +def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; } +def M1WriteBX : SchedWriteVariant<[SchedVar<M1BranchLinkFastPred, [M1WriteAB]>, + SchedVar<NoSchedPred, [M1WriteAC]>]>; + +def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; } +def M1WriteL6 : SchedWriteRes<[M1UnitL]> { let Latency = 6; } +def M1WriteLA : SchedWriteRes<[M1UnitL]> { let Latency = 6; + let ResourceCycles = [2]; } +def M1WriteLB : SchedWriteRes<[M1UnitL, + M1UnitA]> { let Latency = 4; + let NumMicroOps = 2; } +def M1WriteLC : SchedWriteRes<[M1UnitL, + M1UnitA]> { let Latency = 5; + let NumMicroOps = 2; } +def M1WriteLD : SchedWriteRes<[M1UnitL, + M1UnitA]> { let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [2, 1]; } +def M1WriteLH : SchedWriteRes<[]> { let Latency = 5; + let NumMicroOps = 0; } +def M1WriteLX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>, + SchedVar<NoSchedPred, [M1WriteLC]>]>; +def M1WriteLY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteL5]>, + SchedVar<NoSchedPred, [M1WriteLD]>]>; + +def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; } +def M1WriteS3 : SchedWriteRes<[M1UnitS]> { let Latency = 3; } +def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; } +def M1WriteSA : SchedWriteRes<[M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST]> { let Latency = 1; + let NumMicroOps = 2; } +def M1WriteSB : SchedWriteRes<[M1UnitS, + M1UnitFST, + M1UnitA]> { let Latency = 3; + let NumMicroOps = 2; } +def M1WriteSC : SchedWriteRes<[M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitA]> { let Latency = 3; + let NumMicroOps = 3; } +def M1WriteSD : SchedWriteRes<[M1UnitS, + M1UnitFST, + M1UnitA]> { let Latency = 1; + let NumMicroOps = 2; } +def M1WriteSE : SchedWriteRes<[M1UnitS, + M1UnitA]> { let Latency = 2; + let NumMicroOps = 2; } +def M1WriteSX : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>, + SchedVar<NoSchedPred, [M1WriteSE]>]>; +def M1WriteSY : SchedWriteVariant<[SchedVar<M1ShiftLeftFastPred, [M1WriteS1]>, + SchedVar<NoSchedPred, [M1WriteSB]>]>; + +def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>, + SchedVar<NoSchedPred, [ReadDefault]>]>; + +// Branch instructions. +def : WriteRes<WriteBr, []> { let Latency = 0; } +def : WriteRes<WriteBrReg, [M1UnitC]> { let Latency = 1; } + +// Arithmetic and logical integer instructions. +def : WriteRes<WriteI, [M1UnitALU]> { let Latency = 1; } +def : WriteRes<WriteISReg, [M1UnitALU]> { let Latency = 1; } +def : WriteRes<WriteIEReg, [M1UnitALU]> { let Latency = 1; } +def : WriteRes<WriteIS, [M1UnitALU]> { let Latency = 1; } + +// Move instructions. +def : WriteRes<WriteImm, [M1UnitALU]> { let Latency = 1; } + +// Divide and multiply instructions. +def : WriteRes<WriteID32, [M1UnitC, + M1UnitD]> { let Latency = 13; + let ResourceCycles = [1, 13]; } +def : WriteRes<WriteID64, [M1UnitC, + M1UnitD]> { let Latency = 21; + let ResourceCycles = [1, 21]; } +// TODO: Long multiplication take 5 cycles and also the ALU. +def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; } +def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4; + let ResourceCycles = [2]; } + +// Miscellaneous instructions. +def : WriteRes<WriteExtr, [M1UnitALU, + M1UnitALU]> { let Latency = 2; + let NumMicroOps = 2; } + +// Addressing modes. +def : WriteRes<WriteAdr, []> { let Latency = 1; + let NumMicroOps = 0; } +def : SchedAlias<ReadAdrBase, M1ReadAdrBase>; + +// Load instructions. +def : WriteRes<WriteLD, [M1UnitL]> { let Latency = 4; } +def : WriteRes<WriteLDHi, []> { let Latency = 4; + let NumMicroOps = 0; } +def : SchedAlias<WriteLDIdx, M1WriteLX>; + +// Store instructions. +def : WriteRes<WriteST, [M1UnitS]> { let Latency = 1; } +def : WriteRes<WriteSTP, [M1UnitS]> { let Latency = 1; } +def : WriteRes<WriteSTX, [M1UnitS]> { let Latency = 1; } +def : SchedAlias<WriteSTIdx, M1WriteSX>; + +// FP data instructions. +def : WriteRes<WriteF, [M1UnitFADD]> { let Latency = 3; } +def : WriteRes<WriteFCmp, [M1UnitNMISC]> { let Latency = 4; } +def : WriteRes<WriteFDiv, [M1UnitFVAR]> { let Latency = 15; + let ResourceCycles = [15]; } +def : WriteRes<WriteFMul, [M1UnitFMAC]> { let Latency = 4; } + +// FP miscellaneous instructions. +def : WriteRes<WriteFCvt, [M1UnitFCVT]> { let Latency = 3; } +def : WriteRes<WriteFImm, [M1UnitNALU]> { let Latency = 1; } +def : WriteRes<WriteFCopy, [M1UnitS]> { let Latency = 4; } + +// FP load instructions. +def : WriteRes<WriteVLD, [M1UnitL]> { let Latency = 5; } + +// FP store instructions. +def : WriteRes<WriteVST, [M1UnitS, + M1UnitFST]> { let Latency = 1; + let NumMicroOps = 1; } + +// ASIMD FP instructions. +def : WriteRes<WriteV, [M1UnitFADD]> { let Latency = 3; } + +// Other miscellaneous instructions. +def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } +def : WriteRes<WriteBarrier, []> { let Latency = 1; } +def : WriteRes<WriteHint, []> { let Latency = 1; } +def : WriteRes<WriteSys, []> { let Latency = 1; } + +//===----------------------------------------------------------------------===// +// Fast forwarding. + +// TODO: Add FP register forwarding rules. +def : ReadAdvance<ReadI, 0>; +def : ReadAdvance<ReadISReg, 0>; +def : ReadAdvance<ReadIEReg, 0>; +def : ReadAdvance<ReadIM, 0>; +// TODO: The forwarding for WriteIM32 saves actually 2 cycles. +def : ReadAdvance<ReadIMA, 3, [WriteIM32, WriteIM64]>; +def : ReadAdvance<ReadID, 0>; +def : ReadAdvance<ReadExtrHi, 0>; +def : ReadAdvance<ReadAdrBase, 0>; +def : ReadAdvance<ReadVLD, 0>; + +//===----------------------------------------------------------------------===// +// Finer scheduling model. + +def M1WriteNEONA : SchedWriteRes<[M1UnitNALU, + M1UnitNALU, + M1UnitFADD]> { let Latency = 9; + let NumMicroOps = 3; } +def M1WriteNEONB : SchedWriteRes<[M1UnitNALU, + M1UnitFST]> { let Latency = 5; + let NumMicroOps = 2;} +def M1WriteNEONC : SchedWriteRes<[M1UnitNALU, + M1UnitFST]> { let Latency = 6; + let NumMicroOps = 2; } +def M1WriteNEOND : SchedWriteRes<[M1UnitNALU, + M1UnitFST, + M1UnitL]> { let Latency = 10; + let NumMicroOps = 3; } +def M1WriteNEONE : SchedWriteRes<[M1UnitFCVT, + M1UnitFST]> { let Latency = 8; + let NumMicroOps = 2; } +def M1WriteNEONF : SchedWriteRes<[M1UnitFCVT, + M1UnitFST, + M1UnitL]> { let Latency = 13; + let NumMicroOps = 3; } +def M1WriteNEONG : SchedWriteRes<[M1UnitNMISC, + M1UnitFST]> { let Latency = 6; + let NumMicroOps = 2; } +def M1WriteNEONH : SchedWriteRes<[M1UnitNALU, + M1UnitFST]> { let Latency = 3; + let NumMicroOps = 2; } +def M1WriteNEONI : SchedWriteRes<[M1UnitFST, + M1UnitL]> { let Latency = 9; + let NumMicroOps = 2; } +def M1WriteNEONJ : SchedWriteRes<[M1UnitNMISC, + M1UnitFMAC]> { let Latency = 6; + let NumMicroOps = 2; } +def M1WriteNEONK : SchedWriteRes<[M1UnitNMISC, + M1UnitFMAC]> { let Latency = 7; + let NumMicroOps = 2; } +def M1WriteNEONL : SchedWriteRes<[M1UnitNALU]> { let Latency = 2; + let ResourceCycles = [2]; } +def M1WriteFADD3 : SchedWriteRes<[M1UnitFADD]> { let Latency = 3; } +def M1WriteFCVT3 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 3; } +def M1WriteFCVT4 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 4; } +def M1WriteFMAC4 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 4; } +def M1WriteFMAC5 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 5; } +// TODO +def M1WriteFVAR15 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 15; + let ResourceCycles = [15]; } +def M1WriteFVAR23 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 23; + let ResourceCycles = [23]; } +def M1WriteNALU1 : SchedWriteRes<[M1UnitNALU]> { let Latency = 1; } +def M1WriteNALU2 : SchedWriteRes<[M1UnitNALU]> { let Latency = 2; } +def M1WriteNAL11 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 1; } +def M1WriteNAL12 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 2; } +def M1WriteNAL13 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 3; } +def M1WriteNCRYPT1 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; } +def M1WriteNCRYPT5 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 5; } +def M1WriteNMISC1 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 1; } +def M1WriteNMISC2 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 2; } +def M1WriteNMISC3 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 3; } +def M1WriteNMISC4 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 4; } +def M1WriteTB : SchedWriteRes<[M1UnitC, + M1UnitALU]> { let Latency = 2; + let NumMicroOps = 2; } +def M1WriteVLDA : SchedWriteRes<[M1UnitL, + M1UnitL]> { let Latency = 6; + let NumMicroOps = 2; } +def M1WriteVLDB : SchedWriteRes<[M1UnitL, + M1UnitL, + M1UnitL]> { let Latency = 7; + let NumMicroOps = 3; } +def M1WriteVLDC : SchedWriteRes<[M1UnitL, + M1UnitL, + M1UnitL, + M1UnitL]> { let Latency = 8; + let NumMicroOps = 4; } +def M1WriteVLDD : SchedWriteRes<[M1UnitL, + M1UnitNALU]> { let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [2, 1]; } +def M1WriteVLDE : SchedWriteRes<[M1UnitL, + M1UnitNALU]> { let Latency = 6; + let NumMicroOps = 2; } +def M1WriteVLDF : SchedWriteRes<[M1UnitL, + M1UnitL]> { let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; } +def M1WriteVLDG : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU]> { let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2, 1, 1]; } +def M1WriteVLDH : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU]> { let Latency = 6; + let NumMicroOps = 3; } +def M1WriteVLDI : SchedWriteRes<[M1UnitL, + M1UnitL, + M1UnitL]> { let Latency = 12; + let NumMicroOps = 3; + let ResourceCycles = [2, 2, 2]; } +def M1WriteVLDJ : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU]> { let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1, 1]; } +def M1WriteVLDK : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU]> { let Latency = 9; + let NumMicroOps = 5; + let ResourceCycles = [2, 1, 1, 1, 1]; } +def M1WriteVLDL : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU, + M1UnitL, + M1UnitNALU]> { let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1, 1, 1, 1, 1]; } +def M1WriteVLDM : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU, + M1UnitL, + M1UnitNALU, + M1UnitNALU]> { let Latency = 7; + let NumMicroOps = 6; + let ResourceCycles = [1, 1, 1, 1, 1, 1]; } +def M1WriteVLDN : SchedWriteRes<[M1UnitL, + M1UnitL, + M1UnitL, + M1UnitL]> { let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 2, 1]; } +def M1WriteVSTA : WriteSequence<[WriteVST], 2>; +def M1WriteVSTB : WriteSequence<[WriteVST], 3>; +def M1WriteVSTC : WriteSequence<[WriteVST], 4>; +def M1WriteVSTD : SchedWriteRes<[M1UnitS, + M1UnitFST, + M1UnitFST]> { let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [7, 1, 1]; } +def M1WriteVSTE : SchedWriteRes<[M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitFST]> { let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [7, 1, 1, 1, 1]; } +def M1WriteVSTF : SchedWriteRes<[M1UnitNALU, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitFST, + M1UnitFST]> { let Latency = 15; + let NumMicroOps = 5; + let ResourceCycles = [1, 7, 1, 7, 1, 1, 1]; } +def M1WriteVSTG : SchedWriteRes<[M1UnitNALU, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitFST, + M1UnitFST]> { let Latency = 16; + let NumMicroOps = 6; + let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1]; } +def M1WriteVSTH : SchedWriteRes<[M1UnitNALU, + M1UnitS, + M1UnitFST, + M1UnitFST, + M1UnitFST]> { let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [1, 7, 1, 7, 1]; } +def M1WriteVSTI : SchedWriteRes<[M1UnitNALU, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitFST, + M1UnitFST]> { let Latency = 17; + let NumMicroOps = 7; + let ResourceCycles = [1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 1]; } + +// Branch instructions +def : InstRW<[M1WriteB1], (instrs Bcc)>; +def : InstRW<[M1WriteA1], (instrs BL)>; +def : InstRW<[M1WriteBX], (instrs BLR)>; +def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>; +def : InstRW<[M1WriteAD], (instregex "^TBN?Z[WX]")>; + +// Arithmetic and logical integer instructions. +def : InstRW<[M1WriteA1], (instrs COPY)>; +def : InstRW<[M1WriteAX], (instregex ".+r[sx](64)?$")>; + +// Divide and multiply instructions. + +// Miscellaneous instructions. + +// Load instructions. +def : InstRW<[M1WriteLB, + WriteLDHi, + WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>; +def : InstRW<[M1WriteLX, + ReadAdrBase], (instregex "^PRFMro[WX]")>; + +// Store instructions. + +// FP data instructions. +def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)[DS]r")>; +def : InstRW<[M1WriteFADD3], (instregex "^F(ADD|SUB)[DS]rr")>; +def : InstRW<[M1WriteNEONG], (instregex "^FCCMPE?[DS]rr")>; +def : InstRW<[M1WriteNMISC4], (instregex "^FCMPE?[DS]r")>; +def : InstRW<[M1WriteFVAR15], (instrs FDIVSrr)>; +def : InstRW<[M1WriteFVAR23], (instrs FDIVDrr)>; +def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN).+rr")>; +def : InstRW<[M1WriteFMAC4], (instregex "^FN?MUL[DS]rr")>; +def : InstRW<[M1WriteFMAC5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>; +def : InstRW<[M1WriteFCVT3], (instregex "^FRINT.+r")>; +def : InstRW<[M1WriteNEONH], (instregex "^FCSEL[DS]rrr")>; +def : InstRW<[M1WriteFVAR15], (instrs FSQRTSr)>; +def : InstRW<[M1WriteFVAR23], (instrs FSQRTDr)>; + +// FP miscellaneous instructions. +def : InstRW<[M1WriteFCVT3], (instregex "^FCVT[DS][DS]r")>; +def : InstRW<[M1WriteNEONF], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>; +def : InstRW<[M1WriteNEONE], (instregex "^[SU]CVTF[SU]")>; +def : InstRW<[M1WriteNALU1], (instregex "^FMOV[DS][ir]")>; +def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev1")>; +def : InstRW<[M1WriteNMISC1], (instregex "^FRECPXv1")>; +def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)S(16|32|64)")>; +def : InstRW<[M1WriteS4], (instregex "^FMOV[WX][DS](High)?r")>; +def : InstRW<[M1WriteNEONI], (instregex "^FMOV[DS][WX](High)?r")>; + +// FP load instructions. +def : InstRW<[WriteVLD], (instregex "^LDR[DSQ]l")>; +def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>; +def : InstRW<[WriteVLD, + WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>; +def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>; +def : InstRW<[M1WriteLY, + ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>; +def : InstRW<[M1WriteLD, + ReadAdrBase], (instregex "^LDRQro[WX]")>; +def : InstRW<[WriteVLD, + M1WriteLH], (instregex "^LDN?P[DS]i")>; +def : InstRW<[M1WriteLA, + M1WriteLH], (instregex "^LDN?PQi")>; +def : InstRW<[M1WriteLC, + M1WriteLH, + WriteAdr], (instregex "^LDP[DS](post|pre)")>; +def : InstRW<[M1WriteLD, + M1WriteLH, + WriteAdr], (instregex "^LDPQ(post|pre)")>; + +// FP store instructions. +def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>; +def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>; +def : InstRW<[M1WriteSY, + ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>; +def : InstRW<[M1WriteSB, + ReadAdrBase], (instregex "^STRQro[WX]")>; +def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "^STP[DS](post|pre)")>; +def : InstRW<[M1WriteSC, + WriteAdr], (instregex "^STPQ(post|pre)")>; + +// ASIMD instructions. +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]ABAL?v")>; +def : InstRW<[M1WriteNMISC1], (instregex "^[SU]ABDL?v")>; +def : InstRW<[M1WriteNMISC1], (instregex "^(SQ)?ABSv")>; +def : InstRW<[M1WriteNMISC1], (instregex "^SQNEGv")>; +def : InstRW<[M1WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?H(ADD|SUB)v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?AD[AD](L|LP|P|W)V?2?v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?SUB[LW]2?v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^R?(ADD|SUB)HN?2?v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]+Q(ADD|SUB)v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]RHADDv")>; +def : InstRW<[M1WriteNMISC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>; +def : InstRW<[M1WriteNALU1], (instregex "^CMTSTv")>; +def : InstRW<[M1WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>; +def : InstRW<[M1WriteNMISC1], (instregex "^[SU](MIN|MAX)v")>; +def : InstRW<[M1WriteNMISC2], (instregex "^[SU](MIN|MAX)Pv")>; +def : InstRW<[M1WriteNMISC3], (instregex "^[SU](MIN|MAX)Vv")>; +def : InstRW<[M1WriteNMISC4], (instregex "^(MUL|SQR?DMULH)v")>; +def : InstRW<[M1WriteNMISC4], (instregex "^ML[AS]v")>; +def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>; +def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>; +def : InstRW<[M1WriteNAL13], (instregex "^(S|SR|U|UR)SRAv")>; +def : InstRW<[M1WriteNALU1], (instregex "^SHL[dv]")>; +def : InstRW<[M1WriteNALU1], (instregex "^[SU]SH[LR][dv]")>; +def : InstRW<[M1WriteNALU1], (instregex "^S[RS]I[dv]")>; +def : InstRW<[M1WriteNAL13], (instregex "^(([SU]Q)?R)?SHRU?N[bhsv]")>; +def : InstRW<[M1WriteNAL13], (instregex "^[SU]RSH[LR][dv]")>; +def : InstRW<[M1WriteNAL13], (instregex "^[SU]QR?SHLU?[bdhsv]")>; + +// ASIMD FP instructions. +def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)v")>; +def : InstRW<[M1WriteNMISC3], (instregex "^F(ABD|ADD|SUB)v")>; +def : InstRW<[M1WriteNEONA], (instregex "^FADDP")>; +def : InstRW<[M1WriteNMISC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>; +def : InstRW<[M1WriteFCVT3], (instregex "^[FVSU]CVTX?[AFLMNPZ][SU]?(_Int)?v")>; +def : InstRW<[M1WriteFVAR15], (instregex "FDIVv.f32")>; +def : InstRW<[M1WriteFVAR23], (instregex "FDIVv2f64")>; +def : InstRW<[M1WriteFVAR15], (instregex "FSQRTv.f32")>; +def : InstRW<[M1WriteFVAR23], (instregex "FSQRTv2f64")>; +def : InstRW<[M1WriteNMISC1], (instregex "^F(MAX|MIN)(NM)?V?v")>; +def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN)(NM)?Pv")>; +def : InstRW<[M1WriteNEONJ], (instregex "^FMULX?v.i")>; +def : InstRW<[M1WriteFMAC4], (instregex "^FMULX?v.f")>; +def : InstRW<[M1WriteNEONK], (instregex "^FML[AS]v.i")>; +def : InstRW<[M1WriteFMAC5], (instregex "^FML[AS]v.f")>; +def : InstRW<[M1WriteFCVT3], (instregex "^FRINT[AIMNPXZ]v")>; + +// ASIMD miscellaneous instructions. +def : InstRW<[M1WriteNALU1], (instregex "^RBITv")>; +def : InstRW<[M1WriteNAL11], (instregex "^(BIF|BIT|BSL)v")>; +def : InstRW<[M1WriteNEONB], (instregex "^DUPv.+gpr")>; +def : InstRW<[M1WriteNALU1], (instregex "^DUPv.+lane")>; +def : InstRW<[M1WriteNALU1], (instregex "^EXTv8")>; +def : InstRW<[M1WriteNEONL], (instregex "^EXTv16")>; +def : InstRW<[M1WriteNAL13], (instregex "^[SU]?Q?XTU?Nv")>; +def : InstRW<[M1WriteNALU1], (instregex "^CPY")>; +def : InstRW<[M1WriteNALU1], (instregex "^INSv.+lane")>; +def : InstRW<[M1WriteNALU1], (instregex "^MOVI[Dv]")>; +def : InstRW<[M1WriteNALU1], (instregex "^FMOVv")>; +def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev[248]")>; +def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)Sv")>; +def : InstRW<[M1WriteNALU1], (instregex "^REV(16|32|64)v")>; +def : InstRW<[M1WriteNAL11], (instregex "^TB[LX]v8i8One")>; +def : InstRW<[WriteSequence<[M1WriteNAL11], 2>], + (instregex "^TB[LX]v8i8Two")>; +def : InstRW<[WriteSequence<[M1WriteNAL11], 3>], + (instregex "^TB[LX]v8i8Three")>; +def : InstRW<[WriteSequence<[M1WriteNAL11], 4>], + (instregex "^TB[LX]v8i8Four")>; +def : InstRW<[M1WriteNAL12], (instregex "^TB[LX]v16i8One")>; +def : InstRW<[WriteSequence<[M1WriteNAL12], 2>], + (instregex "^TB[LX]v16i8Two")>; +def : InstRW<[WriteSequence<[M1WriteNAL12], 3>], + (instregex "^TB[LX]v16i8Three")>; +def : InstRW<[WriteSequence<[M1WriteNAL12], 4>], + (instregex "^TB[LX]v16i8Four")>; +def : InstRW<[M1WriteNEOND], (instregex "^[SU]MOVv")>; +def : InstRW<[M1WriteNEONC], (instregex "^INSv.+gpr")>; +def : InstRW<[M1WriteNALU1], (instregex "^(TRN|UZP)[12](v8i8|v4i16|v2i32)")>; +def : InstRW<[M1WriteNALU2], (instregex "^(TRN|UZP)[12](v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[M1WriteNALU1], (instregex "^ZIP[12]v")>; + +// ASIMD load instructions. +def : InstRW<[M1WriteVLDD], (instregex "LD1i(8|16|32)$")>; +def : InstRW<[M1WriteVLDD, + WriteAdr], (instregex "LD1i(8|16|32)_POST$")>; +def : InstRW<[M1WriteVLDE], (instregex "LD1i(64)$")>; +def : InstRW<[M1WriteVLDE, + WriteAdr], (instregex "LD1i(64)_POST$")>; + +def : InstRW<[M1WriteL5], (instregex "LD1Rv(8b|4h|2s)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteL5], (instregex "LD1Rv(1d)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Rv(1d)_POST$")>; +def : InstRW<[M1WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[M1WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[M1WriteVLDG], (instregex "LD2i(8|16)$")>; +def : InstRW<[M1WriteVLDG, + WriteAdr], (instregex "LD2i(8|16)_POST$")>; +def : InstRW<[M1WriteVLDG], (instregex "LD2i(32)$")>; +def : InstRW<[M1WriteVLDG, + WriteAdr], (instregex "LD2i(32)_POST$")>; +def : InstRW<[M1WriteVLDH], (instregex "LD2i(64)$")>; +def : InstRW<[M1WriteVLDH, + WriteAdr], (instregex "LD2i(64)_POST$")>; + +def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(1d)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD2Rv(1d)_POST$")>; +def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDF, + WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDF, + WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(2d)$")>; +def : InstRW<[M1WriteVLDF, + WriteAdr], (instregex "LD2Twov(2d)_POST$")>; + +def : InstRW<[M1WriteVLDJ], (instregex "LD3i(8|16)$")>; +def : InstRW<[M1WriteVLDJ, + WriteAdr], (instregex "LD3i(8|16)_POST$")>; +def : InstRW<[M1WriteVLDJ], (instregex "LD3i(32)$")>; +def : InstRW<[M1WriteVLDJ, + WriteAdr], (instregex "LD3i(32)_POST$")>; +def : InstRW<[M1WriteVLDL], (instregex "LD3i(64)$")>; +def : InstRW<[M1WriteVLDL, + WriteAdr], (instregex "LD3i(64)_POST$")>; + +def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(1d)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD3Rv(1d)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(2d)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD3Rv(2d)_POST$")>; + +def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDI, + WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDI, + WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(2d)$")>; +def : InstRW<[M1WriteVLDI, + WriteAdr], (instregex "LD3Threev(2d)_POST$")>; + +def : InstRW<[M1WriteVLDK], (instregex "LD4i(8|16)$")>; +def : InstRW<[M1WriteVLDK, + WriteAdr], (instregex "LD4i(8|16)_POST$")>; +def : InstRW<[M1WriteVLDK], (instregex "LD4i(32)$")>; +def : InstRW<[M1WriteVLDK, + WriteAdr], (instregex "LD4i(32)_POST$")>; +def : InstRW<[M1WriteVLDM], (instregex "LD4i(64)$")>; +def : InstRW<[M1WriteVLDM, + WriteAdr], (instregex "LD4i(64)_POST$")>; + +def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(1d)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD4Rv(1d)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(2d)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD4Rv(2d)_POST$")>; + +def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDN, + WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDN, + WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(2d)$")>; +def : InstRW<[M1WriteVLDN, + WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; + +// ASIMD store instructions. +def : InstRW<[M1WriteVSTD], (instregex "ST1i(8|16|32)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST1i(8|16|32)_POST$")>; +def : InstRW<[M1WriteVSTD], (instregex "ST1i(64)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST1i(64)_POST$")>; + +def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteVST], (instregex "ST1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVSTA, + WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVSTA, + WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVSTB, + WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVSTB, + WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVSTC, + WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVSTC, + WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[M1WriteVSTD], (instregex "ST2i(8|16|32)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST2i(8|16|32)_POST$")>; +def : InstRW<[M1WriteVSTD], (instregex "ST2i(64)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST2i(64)_POST$")>; + +def : InstRW<[M1WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(16b|8h|4s)$")>; +def : InstRW<[M1WriteVSTE, + WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(2d)$")>; +def : InstRW<[M1WriteVSTE, + WriteAdr], (instregex "ST2Twov(2d)_POST$")>; + +def : InstRW<[M1WriteVSTH], (instregex "ST3i(8|16)$")>; +def : InstRW<[M1WriteVSTH, + WriteAdr], (instregex "ST3i(8|16)_POST$")>; +def : InstRW<[M1WriteVSTH], (instregex "ST3i(32)$")>; +def : InstRW<[M1WriteVSTH, + WriteAdr], (instregex "ST3i(32)_POST$")>; +def : InstRW<[M1WriteVSTF], (instregex "ST3i(64)$")>; +def : InstRW<[M1WriteVSTF, + WriteAdr], (instregex "ST3i(64)_POST$")>; + +def : InstRW<[M1WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>; +def : InstRW<[M1WriteVSTF, + WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(16b|8h|4s)$")>; +def : InstRW<[M1WriteVSTG, + WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(2d)$")>; +def : InstRW<[M1WriteVSTG, + WriteAdr], (instregex "ST3Threev(2d)_POST$")>; + +def : InstRW<[M1WriteVSTH], (instregex "ST4i(8|16)$")>; +def : InstRW<[M1WriteVSTH, + WriteAdr], (instregex "ST4i(8|16)_POST$")>; +def : InstRW<[M1WriteVSTH], (instregex "ST4i(32)$")>; +def : InstRW<[M1WriteVSTH, + WriteAdr], (instregex "ST4i(32)_POST$")>; +def : InstRW<[M1WriteVSTF], (instregex "ST4i(64)$")>; +def : InstRW<[M1WriteVSTF, + WriteAdr], (instregex "ST4i(64)_POST$")>; + +def : InstRW<[M1WriteVSTF], (instregex "ST4Fourv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVSTF, + WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(16b|8h|4s)$")>; +def : InstRW<[M1WriteVSTI, + WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(2d)$")>; +def : InstRW<[M1WriteVSTI, + WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; + +// Cryptography instructions. +def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; } +def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>; +def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>; +def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>; + +def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>; +def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>; +def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA1[CMP]")>; +def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA256SU0")>; +def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA256(H|SU1)")>; + +// CRC instructions. +def : InstRW<[M1WriteC2], (instregex "^CRC32")>; + +} // SchedModel = ExynosM1Model diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td new file mode 100644 index 000000000..5e5369a5a --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedExynosM3.td @@ -0,0 +1,860 @@ +//=- AArch64SchedExynosM3.td - Samsung Exynos M3 Sched Defs --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for the Samsung Exynos M3 to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// The Exynos-M3 is an advanced superscalar microprocessor with a 6-wide +// in-order stage for decode and dispatch and a wider issue stage. +// The execution units and loads and stores are out-of-order. + +def ExynosM3Model : SchedMachineModel { + let IssueWidth = 6; // Up to 6 uops per cycle. + let MicroOpBufferSize = 228; // ROB size. + let LoopMicroOpBufferSize = 40; // Based on the instruction queue size. + let LoadLatency = 4; // Optimistic load cases. + let MispredictPenalty = 16; // Minimum branch misprediction penalty. + let CompleteModel = 1; // Use the default model otherwise. + + list<Predicate> UnsupportedFeatures = [HasSVE]; + + // FIXME: Remove when all errors have been fixed. + let FullInstRWOverlapCheck = 0; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on the Exynos-M3, +// which has 12 pipelines, each with its own queue with out-of-order dispatch. + +let SchedModel = ExynosM3Model in { + +def M3UnitA : ProcResource<2>; // Simple integer +def M3UnitC : ProcResource<2>; // Simple and complex integer +def M3UnitD : ProcResource<1>; // Integer division (inside C0, serialized) +def M3UnitB : ProcResource<2>; // Branch +def M3UnitL : ProcResource<2>; // Load +def M3UnitS : ProcResource<1>; // Store +def M3PipeF0 : ProcResource<1>; // FP #0 +let Super = M3PipeF0 in { + def M3UnitFMAC0 : ProcResource<1>; // FP multiplication + def M3UnitFADD0 : ProcResource<1>; // Simple FP + def M3UnitFCVT0 : ProcResource<1>; // FP conversion + def M3UnitFSQR : ProcResource<2>; // FP square root (serialized) + def M3UnitNALU0 : ProcResource<1>; // Simple vector + def M3UnitNMSC : ProcResource<1>; // FP and vector miscellanea + def M3UnitNSHT0 : ProcResource<1>; // Vector shifting + def M3UnitNSHF0 : ProcResource<1>; // Vector shuffling +} +def M3PipeF1 : ProcResource<1>; // FP #1 +let Super = M3PipeF1 in { + def M3UnitFMAC1 : ProcResource<1>; // FP multiplication + def M3UnitFADD1 : ProcResource<1>; // Simple FP + def M3UnitFDIV0 : ProcResource<2>; // FP division (serialized) + def M3UnitFCVT1 : ProcResource<1>; // FP conversion + def M3UnitFST0 : ProcResource<1>; // FP store + def M3UnitNALU1 : ProcResource<1>; // Simple vector + def M3UnitNCRY0 : ProcResource<1>; // Cryptographic + def M3UnitNMUL : ProcResource<1>; // Vector multiplication + def M3UnitNSHT1 : ProcResource<1>; // Vector shifting + def M3UnitNSHF1 : ProcResource<1>; // Vector shuffling +} +def M3PipeF2 : ProcResource<1>; // FP #2 +let Super = M3PipeF2 in { + def M3UnitFMAC2 : ProcResource<1>; // FP multiplication + def M3UnitFADD2 : ProcResource<1>; // Simple FP + def M3UnitFDIV1 : ProcResource<2>; // FP division (serialized) + def M3UnitFST1 : ProcResource<1>; // FP store + def M3UnitNALU2 : ProcResource<1>; // Simple vector + def M3UnitNCRY1 : ProcResource<1>; // Cryptographic + def M3UnitNSHT2 : ProcResource<1>; // Vector shifting + def M3UnitNSHF2 : ProcResource<1>; // Vector shuffling +} + + +def M3UnitALU : ProcResGroup<[M3UnitA, + M3UnitC]>; +def M3UnitFMAC : ProcResGroup<[M3UnitFMAC0, + M3UnitFMAC1, + M3UnitFMAC2]>; +def M3UnitFADD : ProcResGroup<[M3UnitFADD0, + M3UnitFADD1, + M3UnitFADD2]>; +def M3UnitFDIV : ProcResGroup<[M3UnitFDIV0, + M3UnitFDIV1]>; +def M3UnitFCVT : ProcResGroup<[M3UnitFCVT0, + M3UnitFCVT1]>; +def M3UnitFST : ProcResGroup<[M3UnitFST0, + M3UnitFST1]>; +def M3UnitNALU : ProcResGroup<[M3UnitNALU0, + M3UnitNALU1, + M3UnitNALU2]>; +def M3UnitNCRY : ProcResGroup<[M3UnitNCRY0, + M3UnitNCRY1]>; +def M3UnitNSHT : ProcResGroup<[M3UnitNSHT0, + M3UnitNSHT1, + M3UnitNSHT2]>; +def M3UnitNSHF : ProcResGroup<[M3UnitNSHF0, + M3UnitNSHF1, + M3UnitNSHF2]>; + +//===----------------------------------------------------------------------===// +// Predicates. + +def M3BranchLinkFastPred : SchedPredicate<[{MI->getOpcode() == AArch64::BLR && + MI->getOperand(0).isReg() && + MI->getOperand(0).getReg() != AArch64::LR}]>; +def M3ResetFastPred : SchedPredicate<[{TII->isExynosResetFast(*MI)}]>; +def M3RotateRightFastPred : SchedPredicate<[{(MI->getOpcode() == AArch64::EXTRWrri || + MI->getOpcode() == AArch64::EXTRXrri) && + MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && + MI->getOperand(1).getReg() == MI->getOperand(2).getReg()}]>; +def M3ShiftLeftFastPred : SchedPredicate<[{TII->isExynosShiftLeftFast(*MI)}]>; + +//===----------------------------------------------------------------------===// +// Coarse scheduling model. + +def M3WriteZ0 : SchedWriteRes<[]> { let Latency = 0; + let NumMicroOps = 1; } + +def M3WriteA1 : SchedWriteRes<[M3UnitALU]> { let Latency = 1; } +def M3WriteAA : SchedWriteRes<[M3UnitALU]> { let Latency = 2; + let ResourceCycles = [2]; } +def M3WriteAB : SchedWriteRes<[M3UnitALU, + M3UnitC]> { let Latency = 1; + let NumMicroOps = 2; } +def M3WriteAC : SchedWriteRes<[M3UnitALU, + M3UnitALU, + M3UnitC]> { let Latency = 2; + let NumMicroOps = 3; } +def M3WriteAD : SchedWriteRes<[M3UnitALU, + M3UnitC]> { let Latency = 2; + let NumMicroOps = 2; } +def M3WriteC1 : SchedWriteRes<[M3UnitC]> { let Latency = 1; } +def M3WriteC2 : SchedWriteRes<[M3UnitC]> { let Latency = 2; } +def M3WriteAX : SchedWriteVariant<[SchedVar<M3ResetFastPred, [M3WriteZ0]>, + SchedVar<M3ShiftLeftFastPred, [M3WriteA1]>, + SchedVar<NoSchedPred, [M3WriteAA]>]>; +def M3WriteAY : SchedWriteVariant<[SchedVar<M3RotateRightFastPred, [M3WriteA1]>, + SchedVar<NoSchedPred, [M3WriteAA]>]>; + +def M3WriteB1 : SchedWriteRes<[M3UnitB]> { let Latency = 1; } +def M3WriteBX : SchedWriteVariant<[SchedVar<M3BranchLinkFastPred, [M3WriteAB]>, + SchedVar<NoSchedPred, [M3WriteAC]>]>; + +def M3WriteL4 : SchedWriteRes<[M3UnitL]> { let Latency = 4; } +def M3WriteL5 : SchedWriteRes<[M3UnitL]> { let Latency = 5; } +def M3WriteLA : SchedWriteRes<[M3UnitL, + M3UnitL]> { let Latency = 5; + let NumMicroOps = 1; } +def M3WriteLB : SchedWriteRes<[M3UnitA, + M3UnitL]> { let Latency = 5; + let NumMicroOps = 2; } +def M3WriteLC : SchedWriteRes<[M3UnitA, + M3UnitL, + M3UnitL]> { let Latency = 5; + let NumMicroOps = 2; } +def M3WriteLD : SchedWriteRes<[M3UnitA, + M3UnitL]> { let Latency = 4; + let NumMicroOps = 2; } +def M3WriteLH : SchedWriteRes<[]> { let Latency = 5; + let NumMicroOps = 0; } + +def M3WriteLX : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteL5]>, + SchedVar<NoSchedPred, [M3WriteLB]>]>; + +def M3WriteS1 : SchedWriteRes<[M3UnitS]> { let Latency = 1; } +def M3WriteSA : SchedWriteRes<[M3UnitA, + M3UnitS, + M3UnitFST]> { let Latency = 2; + let NumMicroOps = 2; } +def M3WriteSB : SchedWriteRes<[M3UnitA, + M3UnitS]> { let Latency = 1; + let NumMicroOps = 2; } +def M3WriteSC : SchedWriteRes<[M3UnitA, + M3UnitS]> { let Latency = 2; + let NumMicroOps = 2; } + +def M3WriteSX : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteS1]>, + SchedVar<NoSchedPred, [M3WriteSB]>]>; +def M3WriteSY : SchedWriteVariant<[SchedVar<M3ShiftLeftFastPred, [M3WriteS1]>, + SchedVar<NoSchedPred, [M3WriteSC]>]>; + +def M3ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>, + SchedVar<NoSchedPred, [ReadDefault]>]>; + +// Branch instructions. +def : SchedAlias<WriteBr, M3WriteZ0>; +def : WriteRes<WriteBrReg, [M3UnitC]> { let Latency = 1; } + +// Arithmetic and logical integer instructions. +def : WriteRes<WriteI, [M3UnitALU]> { let Latency = 1; } +def : WriteRes<WriteISReg, [M3UnitALU]> { let Latency = 1; } +def : WriteRes<WriteIEReg, [M3UnitALU]> { let Latency = 1; } +def : WriteRes<WriteIS, [M3UnitALU]> { let Latency = 1; } + +// Move instructions. +def : WriteRes<WriteImm, [M3UnitALU]> { let Latency = 1; } + +// Divide and multiply instructions. +def : WriteRes<WriteID32, [M3UnitC, + M3UnitD]> { let Latency = 12; + let ResourceCycles = [1, 12]; } +def : WriteRes<WriteID64, [M3UnitC, + M3UnitD]> { let Latency = 21; + let ResourceCycles = [1, 21]; } +def : WriteRes<WriteIM32, [M3UnitC]> { let Latency = 3; } +def : WriteRes<WriteIM64, [M3UnitC]> { let Latency = 4; + let ResourceCycles = [2]; } + +// Miscellaneous instructions. +def : WriteRes<WriteExtr, [M3UnitALU, + M3UnitALU]> { let Latency = 1; + let NumMicroOps = 2; } + +// Addressing modes. +def : WriteRes<WriteAdr, []> { let Latency = 1; + let NumMicroOps = 0; } +def : SchedAlias<ReadAdrBase, M3ReadAdrBase>; + +// Load instructions. +def : SchedAlias<WriteLD, M3WriteL4>; +def : WriteRes<WriteLDHi, []> { let Latency = 4; + let NumMicroOps = 0; } +def : SchedAlias<WriteLDIdx, M3WriteLX>; + +// Store instructions. +def : SchedAlias<WriteST, M3WriteS1>; +def : SchedAlias<WriteSTP, M3WriteS1>; +def : SchedAlias<WriteSTX, M3WriteS1>; +def : SchedAlias<WriteSTIdx, M3WriteSX>; + +// FP data instructions. +def : WriteRes<WriteF, [M3UnitFADD]> { let Latency = 2; } +def : WriteRes<WriteFCmp, [M3UnitNMSC]> { let Latency = 2; } +def : WriteRes<WriteFDiv, [M3UnitFDIV]> { let Latency = 12; + let ResourceCycles = [12]; } +def : WriteRes<WriteFMul, [M3UnitFMAC]> { let Latency = 4; } + +// FP miscellaneous instructions. +// TODO: Conversion between register files is much different. +def : WriteRes<WriteFCvt, [M3UnitFCVT]> { let Latency = 3; } +def : WriteRes<WriteFImm, [M3UnitNALU]> { let Latency = 1; } +def : WriteRes<WriteFCopy, [M3UnitNALU]> { let Latency = 1; } + +// FP load instructions. +def : SchedAlias<WriteVLD, M3WriteL5>; + +// FP store instructions. +def : WriteRes<WriteVST, [M3UnitS, + M3UnitFST]> { let Latency = 1; + let NumMicroOps = 1; } + +// ASIMD FP instructions. +def : WriteRes<WriteV, [M3UnitNALU]> { let Latency = 3; } + +// Other miscellaneous instructions. +def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } +def : WriteRes<WriteBarrier, []> { let Latency = 1; } +def : WriteRes<WriteHint, []> { let Latency = 1; } +def : WriteRes<WriteSys, []> { let Latency = 1; } + +//===----------------------------------------------------------------------===// +// Generic fast forwarding. + +// TODO: Add FP register forwarding rules. + +def : ReadAdvance<ReadI, 0>; +def : ReadAdvance<ReadISReg, 0>; +def : ReadAdvance<ReadIEReg, 0>; +def : ReadAdvance<ReadIM, 0>; +// TODO: The forwarding for 32 bits actually saves 2 cycles. +def : ReadAdvance<ReadIMA, 3, [WriteIM32, WriteIM64]>; +def : ReadAdvance<ReadID, 0>; +def : ReadAdvance<ReadExtrHi, 0>; +def : ReadAdvance<ReadAdrBase, 0>; +def : ReadAdvance<ReadVLD, 0>; + +//===----------------------------------------------------------------------===// +// Finer scheduling model. + +def M3WriteNEONA : SchedWriteRes<[M3UnitNSHF, + M3UnitFADD]> { let Latency = 3; + let NumMicroOps = 2; } +def M3WriteNEONB : SchedWriteRes<[M3UnitNALU, + M3UnitFST]> { let Latency = 10; + let NumMicroOps = 2; } +def M3WriteNEOND : SchedWriteRes<[M3UnitNSHF, + M3UnitFST]> { let Latency = 6; + let NumMicroOps = 2; } +def M3WriteNEONH : SchedWriteRes<[M3UnitNALU, + M3UnitS]> { let Latency = 5; + let NumMicroOps = 2; } +def M3WriteNEONI : SchedWriteRes<[M3UnitNSHF, + M3UnitS]> { let Latency = 5; + let NumMicroOps = 2; } +def M3WriteNEONV : SchedWriteRes<[M3UnitFDIV0, + M3UnitFDIV1]> { let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [8, 8]; } +def M3WriteNEONW : SchedWriteRes<[M3UnitFDIV0, + M3UnitFDIV1]> { let Latency = 12; + let NumMicroOps = 2; + let ResourceCycles = [13, 13]; } +def M3WriteNEONX : SchedWriteRes<[M3UnitFSQR, + M3UnitFSQR]> { let Latency = 18; + let NumMicroOps = 2; + let ResourceCycles = [19, 19]; } +def M3WriteNEONY : SchedWriteRes<[M3UnitFSQR, + M3UnitFSQR]> { let Latency = 25; + let NumMicroOps = 2; + let ResourceCycles = [26, 26]; } +def M3WriteNEONZ : SchedWriteRes<[M3UnitNMSC, + M3UnitNMSC]> { let Latency = 5; + let NumMicroOps = 2; } +def M3WriteFADD2 : SchedWriteRes<[M3UnitFADD]> { let Latency = 2; } +def M3WriteFCVT2 : SchedWriteRes<[M3UnitFCVT]> { let Latency = 2; } +def M3WriteFCVT3 : SchedWriteRes<[M3UnitFCVT]> { let Latency = 3; } +def M3WriteFCVT3A : SchedWriteRes<[M3UnitFCVT0]> { let Latency = 3; } +def M3WriteFCVT4A : SchedWriteRes<[M3UnitFCVT0]> { let Latency = 4; } +def M3WriteFCVT4 : SchedWriteRes<[M3UnitFCVT]> { let Latency = 4; } +def M3WriteFDIV10 : SchedWriteRes<[M3UnitFDIV]> { let Latency = 7; + let ResourceCycles = [8]; } +def M3WriteFDIV12 : SchedWriteRes<[M3UnitFDIV]> { let Latency = 12; + let ResourceCycles = [13]; } +def M3WriteFMAC3 : SchedWriteRes<[M3UnitFMAC]> { let Latency = 3; } +def M3WriteFMAC4 : SchedWriteRes<[M3UnitFMAC]> { let Latency = 4; } +def M3WriteFMAC5 : SchedWriteRes<[M3UnitFMAC]> { let Latency = 5; } +def M3WriteFSQR17 : SchedWriteRes<[M3UnitFSQR]> { let Latency = 18; + let ResourceCycles = [19]; } +def M3WriteFSQR25 : SchedWriteRes<[M3UnitFSQR]> { let Latency = 25; + let ResourceCycles = [26]; } +def M3WriteNALU1 : SchedWriteRes<[M3UnitNALU]> { let Latency = 1; } +def M3WriteNCRY1A : SchedWriteRes<[M3UnitNCRY0]> { let Latency = 1; } +def M3WriteNCRY3A : SchedWriteRes<[M3UnitNCRY0]> { let Latency = 3; } +def M3WriteNCRY5A : SchedWriteRes<[M3UnitNCRY]> { let Latency = 5; } +def M3WriteNMSC1 : SchedWriteRes<[M3UnitNMSC]> { let Latency = 1; } +def M3WriteNMSC2 : SchedWriteRes<[M3UnitNMSC]> { let Latency = 2; } +def M3WriteNMSC3 : SchedWriteRes<[M3UnitNMSC]> { let Latency = 3; } +def M3WriteNMUL3 : SchedWriteRes<[M3UnitNMUL]> { let Latency = 3; } +def M3WriteNSHF1 : SchedWriteRes<[M3UnitNSHF]> { let Latency = 1; } +def M3WriteNSHF3 : SchedWriteRes<[M3UnitNSHF]> { let Latency = 3; } +def M3WriteNSHT1 : SchedWriteRes<[M3UnitNSHT]> { let Latency = 1; } +def M3WriteNSHT2 : SchedWriteRes<[M3UnitNSHT]> { let Latency = 2; } +def M3WriteNSHT3 : SchedWriteRes<[M3UnitNSHT]> { let Latency = 3; } +def M3WriteVLDA : SchedWriteRes<[M3UnitL, + M3UnitL]> { let Latency = 5; + let NumMicroOps = 2; } +def M3WriteVLDB : SchedWriteRes<[M3UnitL, + M3UnitL, + M3UnitL]> { let Latency = 6; + let NumMicroOps = 3; } +def M3WriteVLDC : SchedWriteRes<[M3UnitL, + M3UnitL, + M3UnitL, + M3UnitL]> { let Latency = 6; + let NumMicroOps = 4; } +def M3WriteVLDD : SchedWriteRes<[M3UnitL, + M3UnitNALU]> { let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [2, 1]; } +def M3WriteVLDE : SchedWriteRes<[M3UnitL, + M3UnitNALU]> { let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [2, 1]; } +def M3WriteVLDF : SchedWriteRes<[M3UnitL, + M3UnitL]> { let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [5, 5]; } +def M3WriteVLDG : SchedWriteRes<[M3UnitL, + M3UnitNALU, + M3UnitNALU]> { let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2, 1, 1]; } +def M3WriteVLDH : SchedWriteRes<[M3UnitL, + M3UnitNALU, + M3UnitNALU]> { let Latency = 6; + let NumMicroOps = 3; + let ResourceCycles = [2, 1, 1]; } +def M3WriteVLDI : SchedWriteRes<[M3UnitL, + M3UnitL, + M3UnitL]> { let Latency = 12; + let NumMicroOps = 3; + let ResourceCycles = [6, 6, 6]; } +def M3WriteVLDJ : SchedWriteRes<[M3UnitL, + M3UnitNALU, + M3UnitNALU, + M3UnitNALU]> { let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1, 1]; } +def M3WriteVLDK : SchedWriteRes<[M3UnitL, + M3UnitNALU, + M3UnitNALU, + M3UnitNALU, + M3UnitNALU]> { let Latency = 9; + let NumMicroOps = 5; + let ResourceCycles = [4, 1, 1, 1, 1]; } +def M3WriteVLDL : SchedWriteRes<[M3UnitL, + M3UnitNALU, + M3UnitNALU, + M3UnitL, + M3UnitNALU]> { let Latency = 6; + let NumMicroOps = 5; + let ResourceCycles = [6, 1, 1, 6, 1]; } +def M3WriteVLDM : SchedWriteRes<[M3UnitL, + M3UnitNALU, + M3UnitNALU, + M3UnitL, + M3UnitNALU, + M3UnitNALU]> { let Latency = 7; + let NumMicroOps = 6; + let ResourceCycles = [6, 1, 1, 6, 1, 1]; } +def M3WriteVLDN : SchedWriteRes<[M3UnitL, + M3UnitL, + M3UnitL, + M3UnitL]> { let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [6, 6, 6, 6]; } +def M3WriteVSTA : WriteSequence<[WriteVST], 2>; +def M3WriteVSTB : WriteSequence<[WriteVST], 3>; +def M3WriteVSTC : WriteSequence<[WriteVST], 4>; +def M3WriteVSTD : SchedWriteRes<[M3UnitS, + M3UnitFST, + M3UnitS, + M3UnitFST]> { let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1, 3, 1, 3]; } +def M3WriteVSTE : SchedWriteRes<[M3UnitS, + M3UnitFST, + M3UnitS, + M3UnitFST, + M3UnitS, + M3UnitFST]> { let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1, 3, 1, 3, 1, 3]; } +def M3WriteVSTF : SchedWriteRes<[M3UnitNALU, + M3UnitFST, + M3UnitFST, + M3UnitS, + M3UnitFST, + M3UnitS, + M3UnitFST]> { let Latency = 15; + let NumMicroOps = 7; + let ResourceCycles = [1, 3, 3, 1, 3, 1, 3]; } +def M3WriteVSTG : SchedWriteRes<[M3UnitNALU, + M3UnitFST, + M3UnitFST, + M3UnitS, + M3UnitFST, + M3UnitS, + M3UnitFST, + M3UnitS, + M3UnitFST]> { let Latency = 16; + let NumMicroOps = 9; + let ResourceCycles = [1, 3, 3, 1, 3, 1, 3, 1, 3]; } +def M3WriteVSTH : SchedWriteRes<[M3UnitNALU, + M3UnitFST, + M3UnitFST, + M3UnitS, + M3UnitFST]> { let Latency = 14; + let NumMicroOps = 5; + let ResourceCycles = [1, 3, 3, 1, 3]; } +def M3WriteVSTI : SchedWriteRes<[M3UnitNALU, + M3UnitFST, + M3UnitFST, + M3UnitS, + M3UnitFST, + M3UnitS, + M3UnitFST, + M3UnitS, + M3UnitFST]> { let Latency = 17; + let NumMicroOps = 9; + let ResourceCycles = [1, 3, 3, 1, 3, 1, 3, 1, 3]; } + +// Special cases. +def M3WriteAES : SchedWriteRes<[M3UnitNCRY]> { let Latency = 1; } +def M3ReadAES : SchedReadAdvance<1, [M3WriteAES]>; +def M3ReadFMAC : SchedReadAdvance<1, [M3WriteFMAC4, + M3WriteFMAC5]>; +def M3WriteMOVI : SchedWriteVariant<[SchedVar<M3ResetFastPred, [M3WriteZ0]>, + SchedVar<NoSchedPred, [M3WriteNALU1]>]>; +def M3ReadNMUL : SchedReadAdvance<1, [M3WriteNMUL3]>; + +// Branch instructions +def : InstRW<[M3WriteB1], (instrs Bcc)>; +def : InstRW<[M3WriteA1], (instrs BL)>; +def : InstRW<[M3WriteBX], (instrs BLR)>; +def : InstRW<[M3WriteC1], (instregex "^CBN?Z[WX]")>; +def : InstRW<[M3WriteAD], (instregex "^TBN?Z[WX]")>; + +// Arithmetic and logical integer instructions. +def : InstRW<[M3WriteA1], (instrs COPY)>; +def : InstRW<[M3WriteAX], (instregex "^(ADD|SUB)S?Xrx64")>; +def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]$")>; +def : InstRW<[M3WriteAX], (instregex "^(ADD|BIC|SUB)S[WX]r[sx]$")>; +def : InstRW<[M3WriteAX], (instregex "^(ADD|AND|EOR|ORR|SUB)[WX]ri")>; + +// Move instructions. +def : InstRW<[M3WriteZ0], (instrs ADR, ADRP)>; +def : InstRW<[M3WriteZ0], (instregex "^MOV[NZ][WX]i")>; + +// Divide and multiply instructions. + +// Miscellaneous instructions. +def : InstRW<[M3WriteAY], (instrs EXTRWrri, EXTRXrri)>; + +// Load instructions. +def : InstRW<[M3WriteLD, + WriteLDHi, + WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>; +def : InstRW<[M3WriteLX, + ReadAdrBase], (instregex "^PRFMro[WX]")>; + +// Store instructions. + +// FP data instructions. +def : InstRW<[M3WriteNSHF1], (instregex "^FABS[DS]r")>; +def : InstRW<[M3WriteFADD2], (instregex "^F(ADD|SUB)[DS]rr")>; +def : InstRW<[M3WriteFDIV10], (instrs FDIVSrr)>; +def : InstRW<[M3WriteFDIV12], (instrs FDIVDrr)>; +def : InstRW<[M3WriteNMSC1], (instregex "^F(MAX|MIN).+rr")>; +def : InstRW<[M3WriteFMAC3], (instregex "^FN?MUL[DS]rr")>; +def : InstRW<[M3WriteFMAC4, + M3ReadFMAC], (instregex "^FN?M(ADD|SUB)[DS]rrr")>; +def : InstRW<[M3WriteNALU1], (instregex "^FNEG[DS]r")>; +def : InstRW<[M3WriteFCVT3A], (instregex "^FRINT.+r")>; +def : InstRW<[M3WriteNEONH], (instregex "^FCSEL[DS]rrr")>; +def : InstRW<[M3WriteFSQR17], (instrs FSQRTSr)>; +def : InstRW<[M3WriteFSQR25], (instrs FSQRTDr)>; + +// FP miscellaneous instructions. +def : InstRW<[M3WriteFCVT3], (instregex "^FCVT[DHS][DHS]r")>; +def : InstRW<[M3WriteFCVT4A], (instregex "^[SU]CVTF[SU][XW][DHS]ri")>; +def : InstRW<[M3WriteFCVT3A], (instregex "^FCVT[AMNPZ][SU]U[XW][DHS]r")>; +def : InstRW<[M3WriteFCVT3A], (instregex "^FCVTZ[SU][dhs]")>; +def : InstRW<[M3WriteNALU1], (instregex "^FMOV[DS][ir]")>; +def : InstRW<[M3WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev1")>; +def : InstRW<[M3WriteNMSC1], (instregex "^FRECPXv1")>; +def : InstRW<[M3WriteFMAC4, + M3ReadFMAC], (instregex "^F(RECP|RSQRT)S(16|32|64)")>; +def : InstRW<[M3WriteNALU1], (instregex "^FMOV[WX][DS]r")>; +def : InstRW<[M3WriteNALU1], (instregex "^FMOV[DS][WX]r")>; +def : InstRW<[M3WriteNEONI], (instregex "^FMOV(DX|XD)Highr")>; + +// FP load instructions. +def : InstRW<[WriteVLD], (instregex "^LDR[DSQ]l")>; +def : InstRW<[WriteVLD], (instregex "^LDUR[BDHSQ]i")>; +def : InstRW<[WriteVLD, + WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>; +def : InstRW<[WriteVLD], (instregex "^LDR[BDHSQ]ui")>; +def : InstRW<[M3WriteLX, + ReadAdrBase], (instregex "^LDR[BDHS]ro[WX]")>; +def : InstRW<[M3WriteLB, + ReadAdrBase], (instregex "^LDRQro[WX]")>; +def : InstRW<[WriteVLD, + M3WriteLH], (instregex "^LDN?P[DS]i")>; +def : InstRW<[M3WriteLA, + M3WriteLH], (instregex "^LDN?PQi")>; +def : InstRW<[M3WriteLB, + M3WriteLH, + WriteAdr], (instregex "^LDP[DS](post|pre)")>; +def : InstRW<[M3WriteLC, + M3WriteLH, + WriteAdr], (instregex "^LDPQ(post|pre)")>; + +// FP store instructions. +def : InstRW<[WriteVST], (instregex "^STUR[BDHSQ]i")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "^STR[BDHSQ](post|pre)")>; +def : InstRW<[WriteVST], (instregex "^STR[BDHSQ]ui")>; +def : InstRW<[M3WriteSY, + ReadAdrBase], (instregex "^STR[BDHS]ro[WX]")>; +def : InstRW<[M3WriteSA, + ReadAdrBase], (instregex "^STRQro[WX]")>; +def : InstRW<[WriteVST], (instregex "^STN?P[DSQ]i")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "^STP[DS](post|pre)")>; +def : InstRW<[M3WriteSA, + WriteAdr], (instregex "^STPQ(post|pre)")>; + +// ASIMD instructions. +def : InstRW<[M3WriteNMSC3], (instregex "^[SU]ABAL?v")>; +def : InstRW<[M3WriteNMSC1], (instregex "^[SU]ABDL?v")>; +def : InstRW<[M3WriteNMSC1], (instregex "^(SQ)?(ABS|NEG)v")>; +def : InstRW<[M3WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>; +def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Pv")>; +def : InstRW<[M3WriteNMSC3], (instregex "^[SU]H(ADD|SUB)v")>; +def : InstRW<[M3WriteNMSC3], (instregex "^[SU](ADD|SUB)[LW]V?v")>; +def : InstRW<[M3WriteNMSC3], (instregex "^R?(ADD|SUB)HN2?v")>; +def : InstRW<[M3WriteNMSC3], (instregex "^[SU]Q(ADD|SUB)v")>; +def : InstRW<[M3WriteNMSC3], (instregex "^(SU|US)QADDv")>; +def : InstRW<[M3WriteNMSC3], (instregex "^[SU]RHADDv")>; +def : InstRW<[M3WriteNMSC3], (instregex "^[SU]?ADDL?Vv")>; +def : InstRW<[M3WriteNMSC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>; +def : InstRW<[M3WriteNALU1], (instregex "^CMTSTv")>; +def : InstRW<[M3WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>; +def : InstRW<[M3WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>; +def : InstRW<[M3WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>; +def : InstRW<[M3WriteNMSC3], (instregex "^[SU](MIN|MAX)Vv")>; +def : InstRW<[M3WriteNMUL3], (instregex "^(MUL|SQR?DMULH)v")>; +def : InstRW<[M3WriteNMUL3, + M3ReadNMUL], (instregex "^ML[AS]v")>; +def : InstRW<[M3WriteNMUL3], (instregex "^[SU]ML[AS]Lv")>; +def : InstRW<[M3WriteNMUL3], (instregex "^SQDML[AS]L")>; +def : InstRW<[M3WriteNMUL3], (instregex "^(S|U|SQD)MULLv")>; +def : InstRW<[M3WriteNMSC3], (instregex "^[SU]ADALPv")>; +def : InstRW<[M3WriteNSHT3], (instregex "^[SU]R?SRAv")>; +def : InstRW<[M3WriteNSHT1], (instregex "^SHL[dv]")>; +def : InstRW<[M3WriteNSHT1], (instregex "^[SU]SH[LR][dv]")>; +def : InstRW<[M3WriteNSHT1], (instregex "^S[RS]I[dv]")>; +def : InstRW<[M3WriteNSHT2], (instregex "^[SU]?SHLLv")>; +def : InstRW<[M3WriteNSHT3], (instregex "^(([SU]Q)?R)?SHRU?N[bhsv]")>; +def : InstRW<[M3WriteNSHT3], (instregex "^[SU]RSH[LR][dv]")>; +def : InstRW<[M3WriteNSHT3], (instregex "^[SU]QR?SHLU?[bdhsv]")>; + +// ASIMD FP instructions. +def : InstRW<[M3WriteNSHF1], (instregex "^FABSv")>; +def : InstRW<[M3WriteFADD2], (instregex "^F(ABD|ADD|SUB)v")>; +def : InstRW<[M3WriteNEONA], (instregex "^FADDP")>; +def : InstRW<[M3WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>; +def : InstRW<[M3WriteFCVT3], (instregex "^FCVT(L|N|XN)v")>; +def : InstRW<[M3WriteFCVT2], (instregex "^FCVT[AMNPZ][SU]v")>; +def : InstRW<[M3WriteFCVT2], (instregex "^[SU]CVTFv")>; +def : InstRW<[M3WriteFDIV10], (instrs FDIVv2f32)>; +def : InstRW<[M3WriteNEONV], (instrs FDIVv4f32)>; +def : InstRW<[M3WriteNEONW], (instrs FDIVv2f64)>; +def : InstRW<[M3WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?v")>; +def : InstRW<[M3WriteNMSC2], (instregex "^F(MAX|MIN)(NM)?Pv")>; +def : InstRW<[M3WriteNEONZ], (instregex "^F(MAX|MIN)(NM)?Vv")>; +def : InstRW<[M3WriteFMAC3], (instregex "^FMULX?v.[fi]")>; +def : InstRW<[M3WriteFMAC4, + M3ReadFMAC], (instregex "^FML[AS]v.f")>; +def : InstRW<[M3WriteFMAC5, + M3ReadFMAC], (instregex "^FML[AS]v.i")>; +def : InstRW<[M3WriteNALU1], (instregex "^FNEGv")>; +def : InstRW<[M3WriteFCVT3A], (instregex "^FRINT[AIMNPXZ]v")>; +def : InstRW<[M3WriteFSQR17], (instrs FSQRTv2f32)>; +def : InstRW<[M3WriteNEONX], (instrs FSQRTv4f32)>; +def : InstRW<[M3WriteNEONY], (instrs FSQRTv2f64)>; + +// ASIMD miscellaneous instructions. +def : InstRW<[M3WriteNALU1], (instregex "^RBITv")>; +def : InstRW<[M3WriteNALU1], (instregex "^(BIF|BIT|BSL)v")>; +def : InstRW<[M3WriteNEONB], (instregex "^DUPv.+gpr")>; +def : InstRW<[M3WriteNSHF1], (instregex "^DUPv.+lane")>; +def : InstRW<[M3WriteNSHF1], (instregex "^EXTv")>; +def : InstRW<[M3WriteNSHF1], (instregex "^[SU]?Q?XTU?Nv")>; +def : InstRW<[M3WriteNSHF1], (instregex "^CPY")>; +def : InstRW<[M3WriteNSHF1], (instregex "^INSv.+lane")>; +def : InstRW<[M3WriteMOVI], (instregex "^MOVI")>; +def : InstRW<[M3WriteNALU1], (instregex "^FMOVv")>; +def : InstRW<[M3WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev[248]")>; +def : InstRW<[M3WriteFMAC4, + M3ReadFMAC], (instregex "^F(RECP|RSQRT)Sv")>; +def : InstRW<[M3WriteNSHF1], (instregex "^REV(16|32|64)v")>; +def : InstRW<[M3WriteNSHF1], (instregex "^TB[LX]v")>; +def : InstRW<[M3WriteNEOND], (instregex "^[SU]MOVv")>; +def : InstRW<[M3WriteNSHF3], (instregex "^INSv.+gpr")>; +def : InstRW<[M3WriteNSHF1], (instregex "^(TRN|UZP|ZIP)[12]v")>; + +// ASIMD load instructions. +def : InstRW<[M3WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteL5, + WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteL5, + WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVLDA, + WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDA, + WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVLDB, + WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDB, + WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVLDC, + WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDC, + WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDD], (instregex "LD1i(8|16|32)$")>; +def : InstRW<[M3WriteVLDD, + WriteAdr], (instregex "LD1i(8|16|32)_POST")>; +def : InstRW<[M3WriteVLDE], (instregex "LD1i(64)$")>; +def : InstRW<[M3WriteVLDE, + WriteAdr], (instregex "LD1i(64)_POST")>; + +def : InstRW<[M3WriteL5], (instregex "LD1Rv(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteL5, + WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteL5, + WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[M3WriteVLDF, + WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST")>; +def : InstRW<[M3WriteVLDF], (instregex "LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDF, + WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDG], (instregex "LD2i(8|16|32)$")>; +def : InstRW<[M3WriteVLDG, + WriteAdr], (instregex "LD2i(8|16|32)_POST")>; +def : InstRW<[M3WriteVLDH], (instregex "LD2i(64)$")>; +def : InstRW<[M3WriteVLDH, + WriteAdr], (instregex "LD2i(64)_POST")>; + +def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVLDA, + WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDA, + WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>; +def : InstRW<[M3WriteVLDI, + WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST")>; +def : InstRW<[M3WriteVLDI], (instregex "LD3Threev(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDI, + WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDJ], (instregex "LD3i(8|16|32)$")>; +def : InstRW<[M3WriteVLDJ, + WriteAdr], (instregex "LD3i(8|16|32)_POST")>; +def : InstRW<[M3WriteVLDL], (instregex "LD3i(64)$")>; +def : InstRW<[M3WriteVLDL, + WriteAdr], (instregex "LD3i(64)_POST")>; + +def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVLDB, + WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVLDB], (instregex "LD3Rv(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDB, + WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[M3WriteVLDN, + WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST")>; +def : InstRW<[M3WriteVLDN], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDN, + WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVLDK], (instregex "LD4i(8|16|32)$")>; +def : InstRW<[M3WriteVLDK, + WriteAdr], (instregex "LD4i(8|16|32)_POST")>; +def : InstRW<[M3WriteVLDM], (instregex "LD4i(64)$")>; +def : InstRW<[M3WriteVLDM, + WriteAdr], (instregex "LD4i(64)_POST")>; + +def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVLDC, + WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVLDC], (instregex "LD4Rv(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVLDC, + WriteAdr], (instregex "LD4Rv(16b|8h|4s|2d)_POST")>; + +// ASIMD store instructions. +def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVST], (instregex "ST1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVSTA, + WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVSTA, + WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVSTB, + WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVSTB, + WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[M3WriteVSTC, + WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST")>; +def : InstRW<[M3WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVSTC, + WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVSTD], (instregex "ST1i(8|16|32|64)$")>; +def : InstRW<[M3WriteVSTD, + WriteAdr], (instregex "ST1i(8|16|32|64)_POST")>; + +def : InstRW<[M3WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[M3WriteVSTD, + WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST")>; +def : InstRW<[M3WriteVSTE], (instregex "ST2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVSTE, + WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVSTD], (instregex "ST2i(8|16|32)$")>; +def : InstRW<[M3WriteVSTD, + WriteAdr], (instregex "ST2i(8|16|32)_POST")>; +def : InstRW<[M3WriteVSTD], (instregex "ST2i(64)$")>; +def : InstRW<[M3WriteVSTD, + WriteAdr], (instregex "ST2i(64)_POST")>; + +def : InstRW<[M3WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>; +def : InstRW<[M3WriteVSTF, + WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST")>; +def : InstRW<[M3WriteVSTG], (instregex "ST3Threev(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVSTG, + WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVSTH], (instregex "ST3i(8|16|32)$")>; +def : InstRW<[M3WriteVSTH, + WriteAdr], (instregex "ST3i(8|16|32)_POST")>; +def : InstRW<[M3WriteVSTF], (instregex "ST3i(64)$")>; +def : InstRW<[M3WriteVSTF, + WriteAdr], (instregex "ST3i(64)_POST")>; + +def : InstRW<[M3WriteVSTF], (instregex "ST4Fourv(8b|4h|2s)$")>; +def : InstRW<[M3WriteVSTF, + WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST")>; +def : InstRW<[M3WriteVSTI], (instregex "ST4Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[M3WriteVSTI, + WriteAdr], (instregex "ST4Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[M3WriteVSTF], (instregex "ST4i(8|16|32|64)$")>; +def : InstRW<[M3WriteVSTF, + WriteAdr], (instregex "ST4i(8|16|32|64)_POST")>; + +// Cryptography instructions. +def : InstRW<[M3WriteAES], (instregex "^AES[DE]")>; +def : InstRW<[M3WriteAES, + M3ReadAES], (instregex "^AESI?MC")>; + +def : InstRW<[M3WriteNCRY3A], (instregex "^PMULL?v")>; + +def : InstRW<[M3WriteNCRY1A], (instregex "^SHA1([CHMP]|SU[01])")>; +def : InstRW<[M3WriteNCRY1A], (instregex "^SHA256SU0")>; +def : InstRW<[M3WriteNCRY5A], (instregex "^SHA256(H2?|SU1)")>; + +// CRC instructions. +def : InstRW<[M3WriteC2], (instregex "^CRC32")>; + +} // SchedModel = ExynosM3Model diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td new file mode 100644 index 000000000..84825458e --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkor.td @@ -0,0 +1,119 @@ +//==- AArch64SchedFalkor.td - Falkor Scheduling Definitions -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Qualcomm Falkor to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Define the SchedMachineModel and provide basic properties for coarse grained +// instruction cost model. + +def FalkorModel : SchedMachineModel { + let IssueWidth = 8; // 8 uops are dispatched per cycle. + let MicroOpBufferSize = 128; // Out-of-order with temporary unified issue buffer. + let LoopMicroOpBufferSize = 16; + let LoadLatency = 3; // Optimistic load latency. + let MispredictPenalty = 11; // Minimum branch misprediction penalty. + let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = [HasSVE]; + + // FIXME: Remove when all errors have been fixed. + let FullInstRWOverlapCheck = 0; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Falkor. + +let SchedModel = FalkorModel in { + + def FalkorUnitB : ProcResource<1>; // Branch + def FalkorUnitLD : ProcResource<1>; // Load pipe + def FalkorUnitSD : ProcResource<1>; // Store data + def FalkorUnitST : ProcResource<1>; // Store pipe + def FalkorUnitX : ProcResource<1>; // Complex arithmetic + def FalkorUnitY : ProcResource<1>; // Simple arithmetic + def FalkorUnitZ : ProcResource<1>; // Simple arithmetic + + def FalkorUnitVSD : ProcResource<1>; // Vector store data + def FalkorUnitVX : ProcResource<1>; // Vector X-pipe + def FalkorUnitVY : ProcResource<1>; // Vector Y-pipe + + def FalkorUnitGTOV : ProcResource<1>; // Scalar to Vector + def FalkorUnitVTOG : ProcResource<1>; // Vector to Scalar + + // Define the resource groups. + def FalkorUnitXY : ProcResGroup<[FalkorUnitX, FalkorUnitY]>; + def FalkorUnitXYZ : ProcResGroup<[FalkorUnitX, FalkorUnitY, FalkorUnitZ]>; + def FalkorUnitXYZB : ProcResGroup<[FalkorUnitX, FalkorUnitY, FalkorUnitZ, + FalkorUnitB]>; + def FalkorUnitZB : ProcResGroup<[FalkorUnitZ, FalkorUnitB]>; + def FalkorUnitVXVY : ProcResGroup<[FalkorUnitVX, FalkorUnitVY]>; + +} + +//===----------------------------------------------------------------------===// +// Map the target-defined scheduler read/write resources and latency for +// Falkor. + +let SchedModel = FalkorModel in { + +// These WriteRes entries are not used in the Falkor sched model. +def : WriteRes<WriteImm, []> { let Unsupported = 1; } +def : WriteRes<WriteI, []> { let Unsupported = 1; } +def : WriteRes<WriteISReg, []> { let Unsupported = 1; } +def : WriteRes<WriteIEReg, []> { let Unsupported = 1; } +def : WriteRes<WriteExtr, []> { let Unsupported = 1; } +def : WriteRes<WriteIS, []> { let Unsupported = 1; } +def : WriteRes<WriteID32, []> { let Unsupported = 1; } +def : WriteRes<WriteID64, []> { let Unsupported = 1; } +def : WriteRes<WriteIM32, []> { let Unsupported = 1; } +def : WriteRes<WriteIM64, []> { let Unsupported = 1; } +def : WriteRes<WriteBr, []> { let Unsupported = 1; } +def : WriteRes<WriteBrReg, []> { let Unsupported = 1; } +def : WriteRes<WriteLD, []> { let Unsupported = 1; } +def : WriteRes<WriteST, []> { let Unsupported = 1; } +def : WriteRes<WriteSTP, []> { let Unsupported = 1; } +def : WriteRes<WriteAdr, []> { let Unsupported = 1; } +def : WriteRes<WriteLDIdx, []> { let Unsupported = 1; } +def : WriteRes<WriteSTIdx, []> { let Unsupported = 1; } +def : WriteRes<WriteF, []> { let Unsupported = 1; } +def : WriteRes<WriteFCmp, []> { let Unsupported = 1; } +def : WriteRes<WriteFCvt, []> { let Unsupported = 1; } +def : WriteRes<WriteFCopy, []> { let Unsupported = 1; } +def : WriteRes<WriteFImm, []> { let Unsupported = 1; } +def : WriteRes<WriteFMul, []> { let Unsupported = 1; } +def : WriteRes<WriteFDiv, []> { let Unsupported = 1; } +def : WriteRes<WriteV, []> { let Unsupported = 1; } +def : WriteRes<WriteVLD, []> { let Unsupported = 1; } +def : WriteRes<WriteVST, []> { let Unsupported = 1; } +def : WriteRes<WriteSys, []> { let Unsupported = 1; } +def : WriteRes<WriteBarrier, []> { let Unsupported = 1; } +def : WriteRes<WriteHint, []> { let Unsupported = 1; } +def : WriteRes<WriteLDHi, []> { let Unsupported = 1; } +def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } + +// These ReadAdvance entries are not used in the Falkor sched model. +def : ReadAdvance<ReadI, 0>; +def : ReadAdvance<ReadISReg, 0>; +def : ReadAdvance<ReadIEReg, 0>; +def : ReadAdvance<ReadIM, 0>; +def : ReadAdvance<ReadIMA, 0>; +def : ReadAdvance<ReadID, 0>; +def : ReadAdvance<ReadExtrHi, 0>; +def : ReadAdvance<ReadAdrBase, 0>; +def : ReadAdvance<ReadVLD, 0>; + +// Detailed Refinements +// ----------------------------------------------------------------------------- +include "AArch64SchedFalkorDetails.td" + +} diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td new file mode 100644 index 000000000..ff14e639d --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedFalkorDetails.td @@ -0,0 +1,1292 @@ +//==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the uop and latency details for the machine model for the +// Qualcomm Falkor subtarget. +// +//===----------------------------------------------------------------------===// + +// Contains all of the Falkor specific SchedWriteRes types. The approach +// below is to define a generic SchedWriteRes for every combination of +// latency and microOps. The naming conventions is to use a prefix, one field +// for latency, and one or more microOp count/type designators. +// Prefix: FalkorWr +// MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD) +// Latency: #cyc +// +// e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued +// down one Z pipe, six SD pipes, four VX pipes and the total latency is +// six cycles. +// +// Contains all of the Falkor specific ReadAdvance types for forwarding logic. +// +// Contains all of the Falkor specific WriteVariant types for immediate zero +// and LSLFast. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Define 0 micro-op types +def FalkorWr_LdInc_none_2cyc : SchedWriteRes<[]> { + let Latency = 2; + let NumMicroOps = 0; +} +def FalkorWr_StInc_none_2cyc : SchedWriteRes<[]> { + let Latency = 2; + let NumMicroOps = 0; +} +def FalkorWr_none_3cyc : SchedWriteRes<[]> { + let Latency = 3; + let NumMicroOps = 0; +} +def FalkorWr_none_4cyc : SchedWriteRes<[]> { + let Latency = 4; + let NumMicroOps = 0; +} + +//===----------------------------------------------------------------------===// +// Define 1 micro-op types + +def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; } +def FalkorWr_IMUL32_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } +def FalkorWr_IMUL64_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } +def FalkorWr_IMUL64_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; } +def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; } +def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; } +def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; } +def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; } +def FalkorWr_1XYZ_0cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 0; } +def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; } +def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; } +def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; } +def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; } +def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; } + +def FalkorWr_1VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 0; } +def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; } +def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; } +def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; } +def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; } +def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; } +def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; } +def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; } +def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; } +def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; } + +def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; } +def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; } +def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; } + +def FalkorWr_1GTOV_0cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 0; } +def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; } +def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; } +def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; } + +//===----------------------------------------------------------------------===// +// Define 2 micro-op types + +def FalkorWr_2VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 0; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 1; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 2; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_VMUL32_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 5; + let NumMicroOps = 2; +} +def FalkorWr_FMUL32_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 5; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 2; +} +def FalkorWr_FMUL64_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_1XYZ_1LD_4cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_2LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 10; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_12cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 12; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 14; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 21; + let NumMicroOps = 2; +} + +def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def FalkorWr_2GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def FalkorWr_2XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> { + let Latency = 0; + let NumMicroOps = 2; +} + +def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [2, 8]; +} + +def FalkorWr_1X_1Z_11cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [2, 11]; +} + +def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 2; +} + +def FalkorWr_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitVSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 2; +} + +//===----------------------------------------------------------------------===// +// Define 3 micro-op types + +def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, + FalkorUnitLD]> { + let Latency = 0; + let NumMicroOps = 3; +} + +def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, + FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 5; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 3; +} + +def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 3; +} + +def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitZ]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_1XYZ_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 3; +} +def FalkorWr_1XYZ_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitVSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 3; +} +//===----------------------------------------------------------------------===// +// Define 4 micro-op types + +def FalkorWr_2VX_2VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 14; + let NumMicroOps = 4; +} + +def FalkorWr_2VX_2VY_20cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 20; + let NumMicroOps = 4; +} + +def FalkorWr_2VX_2VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 21; + let NumMicroOps = 4; +} + +def FalkorWr_2VX_2VY_24cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 24; + let NumMicroOps = 4; +} + +def FalkorWr_4VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 2; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 4; +} + +def FalkorWr_4LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 4; +} + +def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST, + FalkorUnitSD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def FalkorWr_2VSD_2ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 4; +} + +//===----------------------------------------------------------------------===// +// Define 5 micro-op types + +def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 5; +} +def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 5; +} +def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY]> { + let Latency = 7; + let NumMicroOps = 5; +} +def FalkorWr_1XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST, + FalkorUnitVSD, FalkorUnitST, + FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 5; +} +def FalkorWr_1VXVY_2ST_2VSD_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitST, + FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 5; +} +//===----------------------------------------------------------------------===// +// Define 6 micro-op types + +def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 6; +} + +def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST, + FalkorUnitVSD, FalkorUnitXYZ, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + +def FalkorWr_2VXVY_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + +def FalkorWr_3VSD_3ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + +//===----------------------------------------------------------------------===// +// Define 8 micro-op types + +def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 8; +} + +def FalkorWr_4VSD_4ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 8; +} + +//===----------------------------------------------------------------------===// +// Define 9 micro-op types + +def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, + FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitLD, + FalkorUnitLD, FalkorUnitXYZ, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 9; +} + +def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, + FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitXYZ, + FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 9; +} + +//===----------------------------------------------------------------------===// +// Define 10 micro-op types + +def FalkorWr_2VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 10; +} + +//===----------------------------------------------------------------------===// +// Define 12 micro-op types + +def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 12; +} + +// Forwarding logic is modeled for multiply add/accumulate and +// load/store base register increment. +// ----------------------------------------------------------------------------- +def FalkorReadIMA32 : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>; +def FalkorReadIMA64 : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>; +def FalkorReadVMA : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr_VMUL32_2VXVY_4cyc]>; +def FalkorReadFMA32 : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>; +def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>; + +def FalkorReadIncLd : SchedReadAdvance<1, [FalkorWr_LdInc_none_2cyc]>; +def FalkorReadIncSt : SchedReadAdvance<1, [FalkorWr_StInc_none_2cyc]>; + +// SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast +// ----------------------------------------------------------------------------- +def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).isImm() && + MI->getOperand(1).getImm() == 0}]>; +def FalkorOp1ZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR || + + MI->getOperand(1).getReg() == AArch64::XZR}]>; +def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>; + +def FalkorWr_FMOV : SchedWriteVariant<[ + SchedVar<FalkorOp1ZrReg, [FalkorWr_1none_0cyc]>, + SchedVar<NoSchedPred, [FalkorWr_1GTOV_1cyc]>]>; + +def FalkorWr_MOVZ : SchedWriteVariant<[ + SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>, + SchedVar<NoSchedPred, [FalkorWr_1XYZB_0cyc]>]>; // imm fwd + + +def FalkorWr_ADDSUBsx : SchedWriteVariant<[ + SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_1cyc]>, + SchedVar<NoSchedPred, [FalkorWr_2XYZ_2cyc]>]>; + +def FalkorWr_LDRro : SchedWriteVariant<[ + SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_3cyc]>, + SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_4cyc]>]>; + +def FalkorWr_LDRSro : SchedWriteVariant<[ + SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_4cyc]>, + SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_5cyc]>]>; + +def FalkorWr_ORRi : SchedWriteVariant<[ + SchedVar<FalkorOp1ZrReg, [FalkorWr_1XYZ_0cyc]>, // imm fwd + SchedVar<NoSchedPred, [FalkorWr_1XYZ_1cyc]>]>; + +def FalkorWr_PRFMro : SchedWriteVariant<[ + SchedVar<FalkorShiftExtFastPred, [FalkorWr_1ST_3cyc]>, + SchedVar<NoSchedPred, [FalkorWr_1XYZ_1ST_4cyc]>]>; + +def FalkorWr_STRVro : SchedWriteVariant<[ + SchedVar<FalkorShiftExtFastPred, [FalkorWr_1VSD_1ST_0cyc]>, + SchedVar<NoSchedPred, [FalkorWr_1XYZ_1VSD_1ST_0cyc]>]>; + +def FalkorWr_STRQro : SchedWriteVariant<[ + SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_2ST_2VSD_0cyc]>, + SchedVar<NoSchedPred, [FalkorWr_2XYZ_2ST_2VSD_0cyc]>]>; + +def FalkorWr_STRro : SchedWriteVariant<[ + SchedVar<FalkorShiftExtFastPred, [FalkorWr_1SD_1ST_0cyc]>, + SchedVar<NoSchedPred, [FalkorWr_1XYZ_1SD_1ST_0cyc]>]>; + +//===----------------------------------------------------------------------===// +// Specialize the coarse model by associating instruction groups with the +// subtarget-defined types. As the modeled is refined, this will override most +// of the earlier mappings. + +// Miscellaneous +// ----------------------------------------------------------------------------- + +// FIXME: This could be better modeled by looking at the regclasses of the operands. +def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs COPY)>; + +// SIMD Floating-point Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)v2f32$")>; + +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v2i32p)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(32|64)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(32|64|v2f32|v2i32)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64|v2i32)rz$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)v2f32$")>; + +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?Vv4i32v$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)v2f32$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i32p|v2i64p|v2f32)$")>; + +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v2f32)$")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTXNv1i64)>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)v2i32(_shift)?$")>; + +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instregex "^(FMUL|FMULX)(v2f32|(v1i32_indexed|v2i32_indexed))$")>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instrs FMULX32)>; + +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instregex "^(FMUL|FMULX)v1i64_indexed$")>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instrs FMULX64)>; + +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32)$")>; + +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v2i64p)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs FCVTLv4i16, FCVTLv2i32)>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)$")>; + +def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVv2f32)>; +def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTv2f32)>; + +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32)$")>; + +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32)$")>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs FCVTLv8i16, FCVTLv4i32)>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32)(_shift)?$")>; + +def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], + (instregex "^(FMUL|FMULX)(v2f64|v4f32|v4i32_indexed)$")>; + +def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], + (instregex "^(FMUL|FMULX)v2i64_indexed$")>; + +def : InstRW<[FalkorWr_3VXVY_4cyc], (instrs FCVTNv4i16, FCVTNv2i32, FCVTXNv2f32)>; +def : InstRW<[FalkorWr_3VXVY_5cyc], (instrs FCVTNv8i16, FCVTNv4i32, FCVTXNv4f32)>; + +def : InstRW<[FalkorWr_2VX_2VY_14cyc],(instrs FDIVv2f64)>; +def : InstRW<[FalkorWr_2VX_2VY_20cyc],(instrs FDIVv4f32)>; +def : InstRW<[FalkorWr_2VX_2VY_21cyc],(instrs FSQRTv2f64)>; +def : InstRW<[FalkorWr_2VX_2VY_24cyc],(instrs FSQRTv4f32)>; + +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], + (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>; + +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32], + (instregex "^FML(A|S)(v2f32|(v1i32_indexed|v2i32_indexed))$")>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64], + (instregex "^FML(A|S)v1i64_indexed$")>; +def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32], + (instregex "^FML(A|S)(v4f32|v4i32_indexed)$")>; +def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64], + (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>; + +// SIMD Integer Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^ADD(v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs ADDPv2i64p)>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v8i8$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIC|ORR)(v2i32|v4i16)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^NEG(v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$")>; + +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHLv1i64$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHRd$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs PMULv8i8)>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHLd$")>; + +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(ABD|ADALP)(v8i8|v4i16|v2i32)(_v.*)?$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)ADDLVv4i16v$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(s|h|b)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHRd$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^R?SHRN(v2i32|v4i16|v8i8)_shift$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs ADDVv4i16v)>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQABS(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64)$")>; + +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)ADDLVv8i8v$")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs ADDVv8i8v)>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], + (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], + (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], + (instregex "^SQDMULL(i16|i32)$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], + (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; + +def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>; + +def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs ADDVv4i32v)>; + +def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs ADDVv8i16v)>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(ADD|SUB)HNv.*$")>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)ABA(v2i32|v4i16|v8i8)$")>; + +def : InstRW<[FalkorWr_2VXVY_5cyc], (instrs ADDVv16i8v)>; + +def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32)_shift?$")>; +def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^R(ADD|SUB)HNv.*$")>; + +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^ADD(v16i8|v8i16|v4i32|v2i64)$")>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs ADDPv2i64)>; // sz==11 +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v16i8$")>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIC|ORR)(v8i16|v4i32)$")>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(NEG|SUB)(v16i8|v8i16|v4i32|v2i64)$")>; + +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)ADDLv.*$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v16i8|v2i64|v4i32|v8i16)(_v.*)?$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHR(v16i8|v8i16|v4i32|v2i64)_shift$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SUBLv.*$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS)(v16i8|v2i64|v4i32|v8i16)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^ADDP(v4i32|v8i16|v16i8)$")>; // sz!=11 +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL(v8i8|v16i8)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>; + +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABD(v16i8|v8i16|v4i32|v2i64)$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABDLv.*$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16|v4i32|v2i64)(_v.*)?$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^R?SHRN(v2i64|v4i32|v8i16|v16i8)_shift$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL(v1i64|v2i64)$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>; + +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], + (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], + (instregex "^SQDMULLv.*$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>; + +def : InstRW<[FalkorWr_3VXVY_3cyc], (instregex "^(S|U)ADDLVv4i32v$")>; + +def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(S|U)ADDLVv8i16v$")>; + +def : InstRW<[FalkorWr_3VXVY_6cyc], (instregex "^(S|U)ADDLVv16i8v$")>; + +def : InstRW<[FalkorWr_4VXVY_2cyc], (instregex "^(S|U)(ADD|SUB)Wv.*$")>; + +def : InstRW<[FalkorWr_4VXVY_3cyc], (instregex "^(S|U)ABALv.*$")>; + +def : InstRW<[FalkorWr_4VXVY_4cyc], (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>; + +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], + (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^SQD(MLAL|MLSL)v[248].*$")>; + +// SIMD Load Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instrs LD2i64)>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instrs LD2i64_POST)>; + +def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd], (instregex "^LD1i(8|16|32)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD1i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(8b|4h|2s)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD3i64)>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instrs LD3i64_POST)>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD4i64)>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd], + (instrs LD4i64_POST)>; + +def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd], (instregex "^LD2i(8|16|32)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD2i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], + (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd], + (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instrs LD3Threev2d)>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd], + (instrs LD3Threev2d_POST)>; +def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd], + (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd], (instregex "^LD3i(8|16|32)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD3i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], + (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], + (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd], + (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instrs LD4Fourv2d)>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd], + (instrs LD4Fourv2d_POST)>; +def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd], + (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd], (instregex "^LD4i(8|16|32)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD4i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd], + (instregex "^LD3Threev(8b|4h|2s)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd], + (instregex "^LD3Threev(8b|4h|2s)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd], + (instregex "^LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd], + (instregex "^LD4Fourv(8b|4h|2s)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD3Threev(16b|8h|4s)$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD4Fourv(16b|8h|4s)$")>; + +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD3Threev(16b|8h|4s)_POST$")>; + +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, FalkorReadIncLd], + (instregex "^LD4Fourv(16b|8h|4s)_POST$")>; + +// Arithmetic and Logical Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADC(S)?(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADD(S)?(W|X)r(r|i)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^AND(S)?(W|X)r(i|r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_ORRi], (instregex "^ORR(W|X)ri$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SBC(S)?(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SUB(S)?(W|X)r(r|i)$")>; +def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>; +def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>; + +// SIMD Miscellaneous Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^CPY(i8|i16|i32|i64)$")>; +def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs EXTv8i8)>; +def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd +def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>; + +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v2i32|v4i16|v8i8)$")>; + +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "(S|U)QXTU?Nv.*$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPXv1i32, FRECPXv1i64)>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs URECPEv2i32, URSQRTEv2i32)>; + +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>; + +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instrs FRECPS64, FRSQRTS64)>; + +def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc], + (instregex "^INSv(i32|i64)(gpr|lane)$")>; +def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v16i8$")>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>; +def : InstRW<[FalkorWr_2VXVY_0cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd +def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>; + +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs URECPEv4i32, URSQRTEv4i32)>; + +def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs TBLv8i8Two)>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^TBX(v8|v16)i8One$")>; + +def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], + (instrs FRECPSv4f32, FRSQRTSv4f32)>; + +def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], + (instrs FRECPSv2f64, FRSQRTSv2f64)>; + +def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBL(v8i8Three|v16i8Two)$")>; +def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBX(v8i8Two|v16i8Two)$")>; + +def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBL(v8i8Four|v16i8Three)$")>; +def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBX(v8i8Three|v16i8Three)$")>; + +def : InstRW<[FalkorWr_5VXVY_7cyc], (instrs TBLv16i8Four)>; +def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>; + +// SIMD Store Instructions +// ----------------------------------------------------------------------------- + +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STR(Q|D|S|H|B)ui$")>; +def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STR(Q|D|S|H|B)(post|pre)$")>; +def : InstRW<[FalkorWr_STRVro, ReadDefault, FalkorReadIncSt], + (instregex "^STR(D|S|H|B)ro(W|X)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STPQi$")>; +def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STPQ(post|pre)$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STP(D|S)(i)$")>; +def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STP(D|S)(post|pre)$")>; +def : InstRW<[FalkorWr_STRQro, ReadDefault, FalkorReadIncSt], + (instregex "^STRQro(W|X)$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STUR(Q|D|S|B|H)i$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instrs STNPDi, STNPSi)>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instrs STNPQi)>; + +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>; +def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1(One(v8b|v4h|v2s|v1d)_POST|(i8|i16|i32|i64)_POST)$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))_POST$")>; + +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3(i8|i16|i32|i64)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4(i8|i16|i32|i64)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3(i8|i16|i32|i64)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4(i8|i16|i32|i64)_POST$")>; + +def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3Three(v8b|v4h|v2s)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3Three(v8b|v4h|v2s)_POST$")>; + +def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], + (instrs ST3Threev2d)>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt], + (instrs ST3Threev2d_POST)>; + +def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4Four(v8b|v4h|v2s)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4Four(v8b|v4h|v2s)_POST$")>; + +def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], + (instrs ST4Fourv2d)>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt], + (instrs ST4Fourv2d_POST)>; + +def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3Three(v16b|v8h|v4s)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST3Three(v16b|v8h|v4s)_POST$")>; + +def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4Four(v16b|v8h|v4s)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^ST4Four(v16b|v8h|v4s)_POST$")>; + +// Branch Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1none_0cyc], (instrs B, TCRETURNdi)>; +def : InstRW<[FalkorWr_1Z_0cyc], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>; +def : InstRW<[FalkorWr_1Z_0cyc], (instrs RET_ReallyLR, TCRETURNri)>; +def : InstRW<[FalkorWr_1ZB_0cyc], (instrs Bcc)>; +def : InstRW<[FalkorWr_1XYZB_0cyc], (instrs BL)>; +def : InstRW<[FalkorWr_1Z_1XY_0cyc], (instrs BLR)>; + +// Cryptography Extensions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs SHA1Hrr)>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs AESIMCrr, AESMCrr)>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs AESDrr, AESErr)>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>; +def : InstRW<[FalkorWr_1VX_1VY_4cyc], (instregex "^SHA1(C|M|P)rrr$")>; +def : InstRW<[FalkorWr_1VX_1VY_5cyc], (instrs SHA256H2rrr, SHA256Hrrr)>; +def : InstRW<[FalkorWr_4VXVY_3cyc], (instrs SHA256SU1rrr)>; + +// FP Load Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDUR(Q|D|S|H|B)i$")>; +def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd], + (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instrs LDNPQi)>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instrs LDPQi)>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "LDNP(D|S)i$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "LDP(D|S)i$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "LDP(D|S)(pre|post)$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "^LDPQ(pre|post)$")>; + +// FP Data Processing Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(S|D)rr$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(S|D)r(r|i)$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(S|D)r$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(S|D)rrr$")>; + +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(S|D)rr$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(32|64)p$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTSHr, FCVTDHr)>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(S|D)r$")>; + +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(32|64)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(S|D)rr$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTHSr, FCVTHDr)>; + +def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTSDr, FCVTDSr)>; + +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instregex "^F(N)?MULSrr$")>; + +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instregex "^F(N)?MULDrr$")>; + +def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVSrr)>; +def : InstRW<[FalkorWr_1VX_1VY_14cyc],(instrs FDIVDrr)>; +def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTSr)>; +def : InstRW<[FalkorWr_1VX_1VY_21cyc],(instrs FSQRTDr)>; + +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32], + (instregex "^F(N)?M(ADD|SUB)Srrr$")>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64], + (instregex "^F(N)?M(ADD|SUB)Drrr$")>; + +// FP Miscellaneous Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(WS|XD|XDHigh)r$")>; +def : InstRW<[FalkorWr_1GTOV_0cyc], (instregex "^FMOV(S|D)i$")>; // imm fwd +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(d|s)$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(SW|DX|DXHigh)r$")>; +def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "^FMOV(Sr|Dr|v.*_ns)$")>; // imm fwd +// FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr +def : InstRW<[FalkorWr_2VXVY_0cyc], (instrs FMOVD0, FMOVS0)>; // imm fwd + +def : InstRW<[FalkorWr_1GTOV_4cyc], (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>; + +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>; + +// Load Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>; +def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFUMi)>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "^LDNP(W|X)i$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "^LDP(W|X)i$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "^LDP(W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR(BB|HH|W|X)ui$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR(BB|HH|W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd], + (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDR(W|X)l$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDTR(B|H|W|X)i$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^LDUR(BB|HH|W|X)i$")>; +def : InstRW<[FalkorWr_PRFMro], (instregex "^PRFMro(W|X)$")>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd], + (instrs LDPSWi)>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd], + (instregex "^LDPSW(post|pre)$")>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>; +def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>; +def : InstRW<[FalkorWr_LDRSro, FalkorReadIncLd], + (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instrs LDRSWl)>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd], + (instregex "^LDURS(BW|BX|HW|HX|W)i$")>; + +// Miscellaneous Data-Processing Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(S|U)?BFM(W|X)ri$")>; +def : InstRW<[FalkorWr_1X_2cyc], (instregex "^CRC32.*$")>; +def : InstRW<[FalkorWr_1XYZ_2cyc], (instregex "^(CLS|CLZ|RBIT|REV|REV16|REV32)(W|X)r$")>; +def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^EXTR(W|X)rri$")>; + +// Divide and Multiply Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64], + (instregex "^(S|U)M(ADD|SUB)Lrrr$")>; +def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32], + (instregex "^M(ADD|SUB)Wrrr$")>; + +def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>; +def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64], + (instregex "^M(ADD|SUB)Xrrr$")>; + +def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>; +def : InstRW<[FalkorWr_1X_1Z_11cyc], (instregex "^(S|U)DIVXr$")>; + +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], + (instregex "^(S|U)MULLv.*$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^(S|U)(MLAL|MLSL)v.*$")>; + +// Move and Shift Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV)(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_0cyc], (instregex "^MOVK(W|X)i$")>; // imm fwd +def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^ADRP?$")>; // imm fwd +def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^MOVN(W|X)i$")>; // imm fwd +def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>; +def : InstRW<[FalkorWr_1XYZ_0cyc], (instrs MOVi32imm, MOVi64imm)>; // imm fwd (approximation) +def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>], + (instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>; +def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>], + (instrs LOADgot)>; + +// Other Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1LD_0cyc], (instrs CLREX, DMB, DSB)>; +def : InstRW<[FalkorWr_1none_0cyc], (instrs BRK, DCPS1, DCPS2, DCPS3, HINT, HLT, HVC, ISB, SMC, SVC)>; +def : InstRW<[FalkorWr_1ST_0cyc], (instrs SYSxt, SYSLxt)>; +def : InstRW<[FalkorWr_1Z_0cyc], (instrs MSRpstateImm1, MSRpstateImm4)>; + +def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], + (instregex "^(LDAR(B|H|W|X)|LDAXR(B|H|W|X)|LDXR(B|H|W|X))$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd], + (instregex "^(LDAXP(W|X)|LDXP(W|X))$")>; +def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS, MOVbaseTLS)>; + +def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>; + +def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instrs STNPWi, STNPXi)>; +def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>; + +def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STLR(B|H|W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STXP(W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STXR(B|H|W|X)$")>; + +def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STLXP(W|X)$")>; +def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STLXR(B|H|W|X)$")>; + +// Store Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STP(W|X)i$")>; +def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt], + (instregex "^STP(W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STR(BB|HH|W|X)ui$")>; +def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STR(BB|HH|W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_STRro, ReadDefault, FalkorReadIncSt], + (instregex "^STR(BB|HH|W|X)ro(W|X)$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STTR(B|H|W|X)i$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt], + (instregex "^STUR(BB|HH|W|X)i$")>; + diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td new file mode 100644 index 000000000..68de3e077 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryo.td @@ -0,0 +1,138 @@ +//==- AArch64SchedKryo.td - Qualcomm Kryo Scheduling Defs ---*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Qualcomm Kryo to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// The issue width is set to five, matching the five issue queues for expanded +// uops. Now, the latency spreadsheet has information based on fragmented uops, +// but these do not actually take up an issue queue. + +def KryoModel : SchedMachineModel { + let IssueWidth = 5; // 5-wide issue for expanded uops + let MicroOpBufferSize = 128; // Out-of-order with temporary unified issue buffer + let LoadLatency = 4; // Optimistic load latency + let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch + + // Enable partial & runtime unrolling. The magic number is chosen based on + // experiments and benchmarking data. + let LoopMicroOpBufferSize = 16; + let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = [HasSVE]; + + // FIXME: Remove when all errors have been fixed. + let FullInstRWOverlapCheck = 0; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Kryo. + +let SchedModel = KryoModel in { + def KryoUnitXA : ProcResource<1>; // Type X(A) micro-ops + def KryoUnitXB : ProcResource<1>; // Type X(B) micro-ops + def KryoUnitYA : ProcResource<1>; // Type Y(A) micro-ops + def KryoUnitYB : ProcResource<1>; // Type Y(B) micro-ops + def KryoUnitX : ProcResGroup<[KryoUnitXA, // Type X micro-ops + KryoUnitXB]>; + def KryoUnitY : ProcResGroup<[KryoUnitYA, // Type Y micro-ops + KryoUnitYB]>; + def KryoUnitXY : ProcResGroup<[KryoUnitXA, // Type XY micro-ops + KryoUnitXB, + KryoUnitYA, + KryoUnitYB]>; + def KryoUnitLSA : ProcResource<1>; // Type LS(A) micro-ops + def KryoUnitLSB : ProcResource<1>; // Type LS(B) micro-ops + def KryoUnitLS : ProcResGroup<[KryoUnitLSA, // Type LS micro-ops + KryoUnitLSB]>; +} + +let SchedModel = KryoModel in { + +//===----------------------------------------------------------------------===// +// Map the target-defined scheduler read/write resources and latency for +// Kryo. + +def : WriteRes<WriteImm, [KryoUnitXY]> { let Latency = 1; } +def : WriteRes<WriteI, [KryoUnitXY]> { let Latency = 1; } +def : WriteRes<WriteISReg, [KryoUnitXY, KryoUnitXY]> + { let Latency = 2; let NumMicroOps = 2; } +def : WriteRes<WriteIEReg, [KryoUnitXY, KryoUnitXY]> + { let Latency = 2; let NumMicroOps = 2; } +def : WriteRes<WriteExtr, [KryoUnitXY, KryoUnitX]> + { let Latency = 2; let NumMicroOps = 2; } +def : WriteRes<WriteIS, [KryoUnitXY]> { let Latency = 2; } +def : WriteRes<WriteID32, [KryoUnitXA, KryoUnitY]> + { let Latency = 8; let NumMicroOps = 1; } // Fragent -1 +def : WriteRes<WriteID64, [KryoUnitXA, KryoUnitY]> + { let Latency = 8; let NumMicroOps = 1; } // Fragent -1 +def : WriteRes<WriteIM32, [KryoUnitX]> { let Latency = 5; } +def : WriteRes<WriteIM64, [KryoUnitX]> { let Latency = 5; } +def : WriteRes<WriteBr, [KryoUnitXY]> { let Latency = 1; } +def : WriteRes<WriteBrReg, [KryoUnitXY]> { let Latency = 1; } +def : WriteRes<WriteLD, [KryoUnitLS]> { let Latency = 4; } +def : WriteRes<WriteST, [KryoUnitLS]> { let Latency = 4; } +def : WriteRes<WriteSTP, [KryoUnitLS]> { let Latency = 4; } +def : WriteRes<WriteAdr, [KryoUnitXY]> { let Latency = 6; } +def : WriteRes<WriteLDIdx, [KryoUnitLS]> { let Latency = 4; } +def : WriteRes<WriteSTIdx, [KryoUnitLS]> { let Latency = 4; } +def : WriteRes<WriteF, [KryoUnitXY, KryoUnitXY]> + { let Latency = 3; let NumMicroOps = 2; } +def : WriteRes<WriteFCmp, [KryoUnitXY]> { let Latency = 2; } +def : WriteRes<WriteFCvt, [KryoUnitX]> { let Latency = 4; } +def : WriteRes<WriteFCopy, [KryoUnitXY]> { let Latency = 6; } +def : WriteRes<WriteFImm, [KryoUnitXY]> { let Latency = 6; } +def : WriteRes<WriteFMul, [KryoUnitX, KryoUnitX]> + { let Latency = 6; let NumMicroOps = 2; } +def : WriteRes<WriteFDiv, [KryoUnitXA, KryoUnitY]> + { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1 +def : WriteRes<WriteV, [KryoUnitXY]> { let Latency = 6; } +def : WriteRes<WriteVLD, [KryoUnitLS]> { let Latency = 4; } +def : WriteRes<WriteVST, [KryoUnitLS]> { let Latency = 4; } + +def : WriteRes<WriteSys, []> { let Latency = 1; } +def : WriteRes<WriteBarrier, []> { let Latency = 1; } +def : WriteRes<WriteHint, []> { let Latency = 1; } + +def : WriteRes<WriteLDHi, []> { let Latency = 4; } + +def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } + +// No forwarding logic is modelled yet. +def : ReadAdvance<ReadI, 0>; +def : ReadAdvance<ReadISReg, 0>; +def : ReadAdvance<ReadIEReg, 0>; +def : ReadAdvance<ReadIM, 0>; +def : ReadAdvance<ReadIMA, 0>; +def : ReadAdvance<ReadID, 0>; +def : ReadAdvance<ReadExtrHi, 0>; +def : ReadAdvance<ReadAdrBase, 0>; +def : ReadAdvance<ReadVLD, 0>; + + +//===----------------------------------------------------------------------===// +// Specialize the coarse model by associating instruction groups with the +// subtarget-defined types. As the modeled is refined, this will override most +// of the above SchedWriteRes and SchedAlias mappings. + +// Miscellaneous +// ----------------------------------------------------------------------------- + +def : InstRW<[WriteI], (instrs COPY)>; + + +// Detailed Refinedments +// ----------------------------------------------------------------------------- +include "AArch64SchedKryoDetails.td" + + +} // SchedModel = KryoModel diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td new file mode 100644 index 000000000..cf4cdabb8 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedKryoDetails.td @@ -0,0 +1,2378 @@ +//=- AArch64SchedKryoDetails.td - QC Kryo Scheduling Defs ----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the uop and latency details for the machine model for the +// Qualcomm Kryo subtarget. +// +//===----------------------------------------------------------------------===// + +def KryoWrite_3cyc_X_noRSV_138ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_X_noRSV_138ln], + (instregex "(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)")>; + +def KryoWrite_3cyc_X_X_139ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_X_X_139ln], + (instregex "(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift")>; + +def KryoWrite_4cyc_XY_XY_noRSV_172ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 4; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_4cyc_XY_XY_noRSV_172ln], + (instregex "(S|U)ABA(v8i8|v4i16|v2i32)")>; +def KryoWrite_4cyc_XY_XY_XY_XY_178ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY, KryoUnitXY]> { + let Latency = 4; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_4cyc_XY_XY_XY_XY_178ln], + (instregex "(S|U)ABA(v16i8|v8i16|v4i32)")>; +def KryoWrite_3cyc_XY_XY_XY_XY_177ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_XY_XY_177ln], + (instregex "(S|U)ABALv.*")>; +def KryoWrite_3cyc_XY_XY_166ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_166ln], + (instregex "(S|U)(ABD|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)")>; +def KryoWrite_3cyc_XY_noRSV_159ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_159ln], + (instregex "(S|U)(ABD|RHADD)(v8i8|v4i16|v2i32)")>; +def KryoWrite_3cyc_XY_XY_165ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_165ln], + (instregex "(S|U)ABDLv.*")>; +def KryoWrite_3cyc_X_noRSV_154ln : + SchedWriteRes<[KryoUnitX]> { +let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_X_noRSV_154ln], + (instregex "(S|U)ADALP(v8i8|v4i16|v2i32)_v.*")>; +def KryoWrite_3cyc_X_X_155ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_X_X_155ln], + (instregex "(S|U)ADALP(v16i8|v8i16|v4i32)_v.*")>; +def KryoWrite_2cyc_XY_XY_151ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_151ln], + (instregex "(S|U)(ADD|SUB)Lv.*")>; +def KryoWrite_2cyc_XY_noRSV_148ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_148ln], + (instregex "((S|U)ADDLP|ABS)(v2i32|v4i16|v8i8)(_v.*)?")>; +def KryoWrite_2cyc_XY_XY_150ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_150ln], + (instregex "((S|U)ADDLP|ABS)(v2i64|v4i32|v8i16|v16i8)(_v.*)?")>; +def KryoWrite_3cyc_XY_XY_XY_noRSV_179ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_XY_noRSV_179ln], + (instrs SADDLVv4i32v, UADDLVv4i32v)>; +def KryoWrite_5cyc_XY_XY_XY_noRSV_180ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY]> { + let Latency = 5; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_5cyc_XY_XY_XY_noRSV_180ln], + (instrs SADDLVv8i16v, UADDLVv8i16v)>; +def KryoWrite_6cyc_XY_XY_X_noRSV_181ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitX]> { + let Latency = 6; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_6cyc_XY_XY_X_noRSV_181ln], + (instrs SADDLVv16i8v, UADDLVv16i8v)>; +def KryoWrite_3cyc_XY_noRSV_158ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_158ln], + (instrs SADDLVv4i16v, UADDLVv4i16v, ADDVv4i16v)>; +def KryoWrite_4cyc_X_noRSV_169ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_169ln], + (instrs SADDLVv8i8v, UADDLVv8i8v, ADDVv8i8v)>; +def KryoWrite_2cyc_XY_XY_XY_XY_176ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_XY_XY_176ln], + (instregex "(S|U)(ADDW|SUBW)v.*")>; +def KryoWrite_4cyc_X_noRSV_40ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_40ln], + (instregex "(S|U)CVTFS(W|X)(D|S)ri")>; +def KryoWrite_4cyc_X_noRSV_97ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_97ln], + (instregex "(S|U)CVTFU(W|X)(D|S)ri")>; +def KryoWrite_4cyc_X_noRSV_110ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_110ln], + (instregex "(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>; +def KryoWrite_4cyc_X_X_114ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_X_114ln], + (instregex "(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>; +def KryoWrite_1cyc_XA_Y_98ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XA_Y_98ln], + (instregex "(S|U)DIV(_Int)?(W|X)r")>; +def KryoWrite_2cyc_XY_XY_152ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_152ln], + (instregex "(S|U)H(ADD|SUB)(v16i8|v8i16|v4i32)")>; +def KryoWrite_2cyc_XY_noRSV_149ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_149ln], + (instregex "((S|U)H(ADD|SUB)|ADDP)(v8i8|v4i16|v2i32)")>; +def KryoWrite_4cyc_X_70ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_X_70ln], + (instregex "(S|U)(MADDL|MSUBL)rrr")>; +def KryoWrite_4cyc_X_X_191ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_X_191ln], + (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; +def KryoWrite_1cyc_XY_195ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_195ln], + (instregex "(S|U)MOVv.*")>; +def KryoWrite_5cyc_X_71ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_5cyc_X_71ln], + (instrs SMULHrr, UMULHrr)>; +def KryoWrite_3cyc_XY_noRSV_186ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_186ln], + (instregex "^(S|U)QADD(v8i8|v4i16|v2i32)")>; +def KryoWrite_3cyc_XY_XY_187ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_187ln], + (instregex "^(S|U)QADD(v16i8|v8i16|v4i32|v2i64)")>; +def KryoWrite_3cyc_XY_noRSV_69ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_69ln], + (instregex "(S|U|SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64)")>; +def KryoWrite_3cyc_XY_noRSV_248ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_248ln], + (instregex "(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>; +def KryoWrite_3cyc_XY_XY_250ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_250ln], + (instregex "(S|U)(QSHLU?|RSHR)(v16i8|v8i16|v4i32|v2i64)_shift$")>; +def KryoWrite_3cyc_XY_noRSV_246ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_246ln], + (instregex "(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32)$")>; +def KryoWrite_3cyc_XY_XY_251ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_251ln], + (instregex "(S|U)(QSHL|RSHL|QRSHL)(v16i8|v8i16|v4i32|v2i64)$")>; +def KryoWrite_6cyc_XY_X_238ln : + SchedWriteRes<[KryoUnitXY, KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_XY_X_238ln], + (instregex "((S|U)QR?SHRN|SQR?SHRUN)(v16i8|v8i16|v4i32)_shift$")>; +def KryoWrite_3cyc_XY_noRSV_249ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_249ln], + (instregex "((S|U)QR?SHRN|SQR?SHRUN)(s|h|b)?")>; +def KryoWrite_6cyc_XY_X_noRSV_252ln : + SchedWriteRes<[KryoUnitXY, KryoUnitX]> { + let Latency = 6; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_6cyc_XY_X_noRSV_252ln], + (instregex "((S|U)QR?SHRN|SQR?SHRUN)(v8i8|v4i16|v2i32)_shift?")>; +def KryoWrite_3cyc_XY_noRSV_161ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_161ln], + (instregex "(S|U)QSUB(v8i8|v4i16|v2i32|v1i64|v1i32|v1i16|v1i8)")>; +def KryoWrite_3cyc_XY_noRSV_163ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_163ln], + (instregex "(S|U)QXTU?N(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)")>; +def KryoWrite_3cyc_XY_noRSV_162ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_162ln], + (instregex "(S|U)QXTU?N(v1i8|v1i16|v1i32)")>; +def KryoWrite_3cyc_XY_noRSV_247ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_247ln], + (instregex "(S|U)RSHR(d|(v8i8|v4i16|v2i32)_shift)$")>; +def KryoWrite_2cyc_XY_noRSV_239ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_239ln], + (instregex "(S|U)SHL(d|v8i8|v4i16|v2i32|v1i64)$")>; +def KryoWrite_2cyc_XY_XY_243ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_243ln], + (instregex "(S|U)SHL(v16i8|v8i16|v4i32|v2i64)$")>; +def KryoWrite_2cyc_XY_XY_241ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_241ln], + (instregex "(S|U)?SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>; +def KryoWrite_2cyc_XY_noRSV_240ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_240ln], + (instregex "((S|U)SHR|SHL)(d|(v8i8|v4i16|v2i32)_shift)$")>; +def KryoWrite_2cyc_XY_XY_242ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_242ln], + (instregex "((S|U)SHR|SHL)(v16i8|v8i16|v4i32|v2i64)_shift$")>; +def KryoWrite_2cyc_XY_XY_183ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_183ln], + (instregex "(S|U)(MAX|MIN)P?(v16i8|v8i16|v4i32)")>; +def KryoWrite_2cyc_XY_noRSV_182ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_182ln], + (instregex "(S|U)(MAX|MIN)P?(v8i8|v4i16|v2i32)")>; +def KryoWrite_3cyc_XY_noRSV_184ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_184ln], + (instregex "(S|U)(MAX|MIN)V(v4i16v|v8i8v|v4i32)")>; +def KryoWrite_4cyc_X_noRSV_185ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_185ln], + (instregex "(S|U)(MAX|MIN)V(v16i8v|v8i16v)")>; +def KryoWrite_2cyc_XY_noRSV_67ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_67ln], + (instrs ABSv1i64)>; +def KryoWrite_1cyc_XY_63ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_63ln, ReadI, ReadI], + (instregex "ADC.*")>; +def KryoWrite_1cyc_XY_63_1ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_63_1ln], + (instregex "ADR.*")>; +def KryoWrite_1cyc_XY_62ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_62ln, ReadI], + (instregex "ADDS?(W|X)ri")>; +def KryoWrite_2cyc_XY_XY_64ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_64ln, ReadI, ReadI], + (instregex "ADDS?(W|X)r(r|s|x)(64)?")>; +def KryoWrite_1cyc_XY_noRSV_65ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_65ln], + (instrs ADDv1i64)>; +def KryoWrite_1cyc_XY_noRSV_144ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_144ln], + (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; +def KryoWrite_1cyc_XY_XY_146ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_146ln], + (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; +def KryoWrite_4cyc_XY_X_noRSV_171ln : + SchedWriteRes<[KryoUnitXY, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_4cyc_XY_X_noRSV_171ln], + (instregex "(ADD|SUB)HNv.*")>; +def KryoWrite_1cyc_XY_noRSV_66ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_66ln], + (instrs ADDPv2i64p)>; +def KryoWrite_2cyc_XY_XY_153ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_153ln], + (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; +def KryoWrite_3cyc_XY_XY_noRSV_170ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_noRSV_170ln], + (instrs ADDVv4i32v)>; +def KryoWrite_4cyc_XY_XY_noRSV_173ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 4; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_4cyc_XY_XY_noRSV_173ln], + (instrs ADDVv8i16v)>; +def KryoWrite_5cyc_XY_X_noRSV_174ln : + SchedWriteRes<[KryoUnitXY, KryoUnitX]> { + let Latency = 5; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_5cyc_XY_X_noRSV_174ln], + (instrs ADDVv16i8v)>; +def KryoWrite_3cyc_XY_XY_X_X_27ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_X_X_27ln], + (instrs AESDrr, AESErr)>; +def KryoWrite_2cyc_X_X_22ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_X_X_22ln], + (instrs AESIMCrr, AESMCrr)>; +def KryoWrite_1cyc_XY_noRSV_76ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_76ln], + (instregex "((AND|ORN|EOR|EON)S?(Wr[rsi]|v8i8|v4i16|v2i32)|(ORR|BIC)S?(Wr[rs]|v8i8|v4i16|v2i32))")>; +def KryoWrite_1cyc_XY_XY_79ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_79ln], + (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; +def KryoWrite_1cyc_X_72ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_X_72ln], + (instregex "(S|U)?BFM.*")>; +def KryoWrite_1cyc_XY_noRSV_77ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_77ln], + (instregex "(BIC|ORR)S?Wri")>; +def KryoWrite_1cyc_XY_XY_78ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_78ln], + (instregex "(BIC|ORR)S?Xri")>; +def KryoWrite_1cyc_X_noRSV_74ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_noRSV_74ln], + (instrs BIFv8i8, BITv8i8, BSLv8i8)>; +def KryoWrite_1cyc_X_X_75ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_X_75ln], + (instrs BIFv16i8, BITv16i8, BSLv16i8)>; +def KryoWrite_0cyc_noRSV_11ln : + SchedWriteRes<[]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_noRSV_11ln], + (instrs BRK, DCPS1, DCPS2, DCPS3, HLT, HVC, ISB, HINT, SMC, SVC)>; +def KryoWrite_0cyc_XY_16ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_XY_16ln, ReadI], + (instregex "(CCMN|CCMP)(W|X)i")>; +def KryoWrite_0cyc_XY_16_1ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_XY_16_1ln, ReadI, ReadI], + (instregex "(CCMN|CCMP)(W|X)r")>; +def KryoWrite_2cyc_XY_3ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_2cyc_XY_3ln, ReadI], + (instregex "(CLS|CLZ)(W|X)r")>; +def KryoWrite_2cyc_XY_noRSV_7ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_7ln], + (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; +def KryoWrite_2cyc_XY_XY_8ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_8ln], + (instregex "(CLS|CLZ|CNT)(v2i32|v4i16|v8i8)")>; +def KryoWrite_2cyc_XY_noRSV_80ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_80ln], + (instregex "CM(EQ|GE|HS|GT|HI|TST)(v8i8|v4i16|v2i32|v1i64)$")>; +def KryoWrite_2cyc_XY_XY_83ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_83ln], + (instregex "CM(EQ|GE|HS|GT|HI|TST)(v16i8|v8i16|v4i32|v2i64)$")>; +def KryoWrite_2cyc_XY_noRSV_81ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_81ln], + (instregex "CM(EQ|LE|GE|GT|LT)(v8i8|v4i16|v2i32|v1i64)rz$")>; +def KryoWrite_2cyc_XY_XY_82ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_82ln], + (instregex "CM(EQ|LE|GE|GT|LT)(v16i8|v8i16|v4i32|v2i64)rz$")>; +def KryoWrite_3cyc_XY_4ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_XY_4ln, ReadI, ReadISReg], + (instregex "CRC32.*")>; +def KryoWrite_1cyc_XY_20ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_20ln, ReadI, ReadI], + (instregex "CSEL(W|X)r")>; +def KryoWrite_1cyc_X_17ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_X_17ln, ReadI, ReadI], + (instregex "(CSINC|CSNEG)(W|X)r")>; +def KryoWrite_1cyc_XY_18ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_18ln, ReadI, ReadI], + (instregex "(CSINV)(W|X)r")>; +def KryoWrite_3cyc_LS_X_13ln : + SchedWriteRes<[KryoUnitLS, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_X_13ln], + (instrs DRPS)>; +def KryoWrite_0cyc_LS_10ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_LS_10ln], + (instrs DSB, DMB, CLREX)>; +def KryoWrite_1cyc_X_noRSV_196ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_noRSV_196ln], + (instregex "DUP(v8i8|v4i16|v2i32)(gpr|lane)")>; +def KryoWrite_1cyc_X_X_197ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_X_197ln], + (instregex "DUP(v16i8|v8i16|v4i32|v2i64)(gpr|lane)")>; +def KryoWrite_3cyc_LS_LS_X_15ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_X_15ln], + (instrs ERET)>; +def KryoWrite_1cyc_X_noRSV_207ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_noRSV_207ln], + (instrs EXTv8i8)>; +def KryoWrite_1cyc_X_X_212ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_X_212ln], + (instrs EXTv16i8)>; +def KryoWrite_2cyc_XY_X_136ln : + SchedWriteRes<[KryoUnitXY, KryoUnitX]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_X_136ln], + (instrs EXTRWrri, EXTRXrri)>; +def KryoWrite_2cyc_XY_noRSV_35ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_35ln], + (instregex "F(MAX|MIN)(NM)?P?(D|S)rr")>; +def KryoWrite_2cyc_XY_XY_106ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_106ln], + (instregex "(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2i64p|v2f64|v4f32)")>; +def KryoWrite_2cyc_XY_noRSV_104ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_104ln], + (instregex "(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f32|v2i32p)")>; +def KryoWrite_3cyc_XY_noRSV_107ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_107ln], + (instregex "F(MAX|MIN)(NM)?Vv4i32v")>; +def KryoWrite_3cyc_XY_noRSV_101ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_101ln], + (instregex "FABD(32|64|v2f32)")>; +def KryoWrite_3cyc_XY_XY_103ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_103ln], + (instregex "(FABD|FADD|FSUB|FADDP)(v4f32|v2f64)")>; +def KryoWrite_1cyc_XY_noRSV_48ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_48ln], + (instregex "F(ABS|NEG)(D|S)r")>; +def KryoWrite_1cyc_XY_noRSV_124ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_124ln], + (instregex "F(ABS|NEG)v2f32")>; +def KryoWrite_1cyc_XY_XY_125ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_125ln], + (instregex "F(ABS|NEG)(v2f64|v4f32)")>; +def KryoWrite_2cyc_XY_noRSV_33ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_33ln], + (instregex "(FAC(GE|GT)|FCM(EQ|GE|GT))(32|64)")>; +def KryoWrite_3cyc_XY_noRSV_30ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_30ln], + (instregex "(FADD|FSUB)(D|S)rr")>; +def KryoWrite_3cyc_XY_noRSV_100ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_100ln], + (instregex "(FADD|FSUB|FADDP)v2f32")>; +def KryoWrite_3cyc_XY_noRSV_29ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_29ln], + (instregex "FADDP(v2i32p|v2i64p)")>; +def KryoWrite_0cyc_XY_31ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_XY_31ln], + (instregex "FCCMPE?(D|S)rr")>; +def KryoWrite_2cyc_XY_noRSV_34ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_34ln], + (instregex "FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64)rz")>; +def KryoWrite_2cyc_XY_XY_36ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_36ln], + (instregex "FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz")>; +def KryoWrite_2cyc_XY_noRSV_105ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_105ln], + (instregex "FCM(EQ|LE|GE|GT|LT)v2i32rz")>; +def KryoWrite_0cyc_XY_32ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_XY_32ln], + (instregex "FCMPE?(D|S)r(r|i)")>; +def KryoWrite_1cyc_XY_noRSV_49ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_49ln], + (instrs FCSELDrrr, FCSELSrrr)>; +def KryoWrite_4cyc_X_noRSV_41ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_41ln], + (instrs FCVTDHr, FCVTDSr, FCVTHDr, FCVTHSr, FCVTSDr, FCVTSHr)>; +def KryoWrite_4cyc_X_38ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_X_38ln], + (instregex "FCVT(((A|N|M|P)(S|U)(S|U)|Z(S|U)_Int(S|U))(W|X)(D|S)ri?|Z(S|U)(d|s))$")>; +def KryoWrite_4cyc_X_noRSV_113ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_113ln], + (instregex "FCVT((A|N|M|P)(S|U)|Z(S|U)_Int)(v1i32|v1i64|v2f32)$")>; +def KryoWrite_4cyc_X_X_117ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_X_117ln], + (instregex "FCVT((A|N|M|P)(S|U)|Z(S|U)_Int)(v4f32|v2f64)$")>; +def KryoWrite_5cyc_X_X_XY_noRSV_119ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitXY]> { + let Latency = 5; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_5cyc_X_X_XY_noRSV_119ln], + (instregex "FCVTX?N(v2f32|v4f32|v2i32|v4i16|v4i32|v8i16)$")>; +def KryoWrite_4cyc_X_X_116ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_X_116ln], + (instregex "FCVTL(v2i32|v4i16|v4i32|v8i16)$")>; +def KryoWrite_4cyc_X_noRSV_112ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_112ln], + (instrs FCVTXNv1i64)>; +def KryoWrite_4cyc_X_37ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_X_37ln], + (instregex "FCVTZ(S|U)(S|U)(W|X)(D|S)ri?$")>; +def KryoWrite_4cyc_X_noRSV_111ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_111ln], + (instregex "FCVTZ(S|U)(v2f32|v1i32|v1i64|v2i32(_shift)?)$")>; +def KryoWrite_4cyc_X_X_115ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_X_115ln], + (instregex "FCVTZ(S|U)(v2f64|v4f32|(v2i64|v4i32)(_shift)?)$")>; +def KryoWrite_10cyc_XA_Y_noRSV_43ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY]> { + let Latency = 10; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_10cyc_XA_Y_noRSV_43ln], + (instrs FDIVSrr)>; +def KryoWrite_14cyc_XA_Y_noRSV_43ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY]> { + let Latency = 14; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_14cyc_XA_Y_noRSV_43ln], + (instrs FDIVDrr)>; +def KryoWrite_10cyc_XA_Y_noRSV_121ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY]> { + let Latency = 10; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_10cyc_XA_Y_noRSV_121ln], + (instrs FDIVv2f32)>; +def KryoWrite_14cyc_XA_Y_XA_Y_123ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> { + let Latency = 14; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_14cyc_XA_Y_XA_Y_123ln], + (instrs FDIVv2f64, FDIVv4f32)>; +def KryoWrite_5cyc_X_noRSV_55ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_noRSV_55ln], + (instregex "FN?M(ADD|SUB)Srrr")>; +def KryoWrite_6cyc_X_noRSV_57ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_X_noRSV_57ln], + (instregex "FN?M(ADD|SUB)Drrr")>; +def KryoWrite_5cyc_X_noRSV_51ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_noRSV_51ln], + (instrs FMLAv2f32, FMLSv2f32, FMLAv1i32_indexed, FMLSv1i32_indexed)>; +def KryoWrite_5cyc_X_X_56ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_X_56ln], + (instrs FMLAv4f32, FMLSv4f32)>; +def KryoWrite_6cyc_X_X_61ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_X_X_61ln], + (instrs FMLAv2f64, FMLSv2f64)>; +def KryoWrite_5cyc_X_noRSV_128ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_noRSV_128ln], + (instrs FMLAv2i32_indexed, FMLSv2i32_indexed)>; +def KryoWrite_5cyc_X_X_131ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_X_131ln], + (instrs FMLAv4i32_indexed, FMLSv4i32_indexed)>; +def KryoWrite_6cyc_X_X_134ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_X_X_134ln], + (instrs FMLAv2i64_indexed, FMLSv2i64_indexed)>; +def KryoWrite_6cyc_X_noRSV_60ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_X_noRSV_60ln], + (instrs FMLAv1i64_indexed, FMLSv1i64_indexed, FMULv1i64_indexed, FMULXv1i64_indexed)>; +def KryoWrite_1cyc_XY_45ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_45ln], + (instregex "FMOV(XDHigh|DXHigh|DX)r")>; +def KryoWrite_1cyc_XY_noRSV_47ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_47ln], + (instregex "FMOV(Di|Dr|Si|Sr|SWr|WSr|XDr|v.*_ns)")>; +def KryoWrite_5cyc_X_noRSV_53ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_noRSV_53ln], + (instrs FMULv1i32_indexed, FMULXv1i32_indexed)>; +def KryoWrite_5cyc_X_noRSV_127ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_noRSV_127ln], + (instrs FMULv2f32, FMULXv2f32, FMULv2i32_indexed, FMULXv2i32_indexed)>; +def KryoWrite_5cyc_X_X_130ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_X_130ln], + (instrs FMULv4f32, FMULXv4f32, FMULv4i32_indexed, FMULXv4i32_indexed)>; +def KryoWrite_6cyc_X_X_133ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_X_X_133ln], + (instrs FMULv2f64, FMULXv2f64, FMULv2i64_indexed, FMULXv2i64_indexed)>; +def KryoWrite_5cyc_X_noRSV_54ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_noRSV_54ln], + (instrs FMULSrr, FNMULSrr, FMULX32)>; +def KryoWrite_6cyc_X_noRSV_59ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_X_noRSV_59ln], + (instrs FMULDrr, FNMULDrr, FMULX64)>; +def KryoWrite_3cyc_XY_noRSV_28ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_28ln], + (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64 )>; +def KryoWrite_3cyc_XY_noRSV_99ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_99ln], + (instrs FRECPEv2f32, FRSQRTEv2f32)>; +def KryoWrite_3cyc_XY_XY_102ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_102ln], + (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>; +def KryoWrite_5cyc_X_noRSV_52ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_noRSV_52ln], + (instrs FRECPS32, FRSQRTS32)>; +def KryoWrite_6cyc_X_noRSV_58ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_X_noRSV_58ln], + (instrs FRECPS64, FRSQRTS64)>; +def KryoWrite_5cyc_X_noRSV_126ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_noRSV_126ln], + (instrs FRECPSv2f32, FRSQRTSv2f32)>; +def KryoWrite_5cyc_X_X_129ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_X_129ln], + (instrs FRECPSv4f32, FRSQRTSv4f32)>; +def KryoWrite_6cyc_X_X_132ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 6; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_6cyc_X_X_132ln], + (instrs FRECPSv2f64, FRSQRTSv2f64)>; +def KryoWrite_3cyc_XY_noRSV_50ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_50ln], + (instrs FRECPXv1i32, FRECPXv1i64)>; +def KryoWrite_2cyc_XY_noRSV_39ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_39ln], + (instregex "FRINT(A|I|M|N|P|X|Z)(S|D)r")>; +def KryoWrite_2cyc_XY_noRSV_108ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_108ln], + (instregex "FRINT(A|I|M|N|P|X|Z)v2f32")>; +def KryoWrite_2cyc_XY_XY_109ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_109ln], + (instregex "FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)")>; +def KryoWrite_12cyc_XA_Y_noRSV_42ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY]> { + let Latency = 12; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_12cyc_XA_Y_noRSV_42ln], + (instrs FSQRTSr)>; +def KryoWrite_21cyc_XA_Y_noRSV_42ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY]> { + let Latency = 21; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_21cyc_XA_Y_noRSV_42ln], + (instrs FSQRTDr)>; +def KryoWrite_12cyc_XA_Y_noRSV_120ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY]> { + let Latency = 12; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_12cyc_XA_Y_noRSV_120ln], + (instrs FSQRTv2f32)>; +def KryoWrite_21cyc_XA_Y_XA_Y_122ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> { + let Latency = 21; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_21cyc_XA_Y_XA_Y_122ln], + (instrs FSQRTv4f32)>; +def KryoWrite_36cyc_XA_Y_XA_Y_122ln : + SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> { + let Latency = 36; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_36cyc_XA_Y_XA_Y_122ln], + (instrs FSQRTv2f64)>; +def KryoWrite_1cyc_X_201ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_X_201ln], + (instregex "INSv.*")>; +def KryoWrite_3cyc_LS_255ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_255ln], + (instregex "LD1(One(v16b|v8h|v4s|v2d)|i64)$")>; +def KryoWrite_4cyc_LS_X_270ln : + SchedWriteRes<[KryoUnitLS, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_LS_X_270ln], + (instregex "LD1(i8|i16|i32)$")>; +def KryoWrite_3cyc_LS_noRSV_285ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_noRSV_285ln], + (instregex "LD1One(v8b|v4h|v2s|v1d)$")>; +def KryoWrite_3cyc_LS_XY_289ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_289ln, WriteAdr], + (instregex "LD1(One(v16b|v8h|v4s|v2d)|i64)_POST$")>; +def KryoWrite_4cyc_LS_XY_X_298ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_4cyc_LS_XY_X_298ln, WriteAdr], + (instregex "LD1(i8|i16|i32)_POST$")>; +def KryoWrite_3cyc_LS_LS_LS_308ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_LS_308ln], + (instregex "LD1Three(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_3cyc_LS_XY_noRSV_317ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_317ln, WriteAdr], + (instregex "LD1One(v8b|v4h|v2s|v1d)_POST$")>; +def KryoWrite_3cyc_LS_LS_LS_LS_328ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_LS_LS_328ln, WriteAdr], + (instregex "LD1Four(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_3cyc_LS_XY_LS_LS_332ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_332ln, WriteAdr], + (instregex "LD1Three(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_348ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_348ln], + (instregex "LD1Three(v8b|v4h|v2s|v1d)$")>; +def KryoWrite_3cyc_LS_XY_LS_LS_LS_351ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_LS_351ln], + (instregex "LD1Four(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_noRSV_358ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_noRSV_358ln], + (instregex "LD1Four(v8b|v4h|v2s|v1d)$")>; +def KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_360ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_360ln, WriteAdr], + (instregex "LD1Three(v8b|v4h|v2s|v1d)_POST$")>; +def KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_noRSV_368ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 7; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_noRSV_368ln, WriteAdr], + (instregex "LD1Four(v8b|v4h|v2s|v1d)_POST$")>; +def KryoWrite_3cyc_LS_LS_281ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_281ln], + (instregex "LD(1|2)Two(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_3cyc_LS_noRSV_noRSV_311ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_noRSV_noRSV_311ln], + (instregex "LD(1|2)Two(v8b|v4h|v2s|v1d)$")>; +def KryoWrite_3cyc_LS_XY_LS_313ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_313ln, WriteAdr], + (instregex "LD(1|2)Two(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_3cyc_LS_XY_noRSV_noRSV_334ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_noRSV_334ln, WriteAdr], + (instregex "LD(1|2)Two(v8b|v4h|v2s|v1d)_POST$")>; +def KryoWrite_3cyc_LS_256ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_256ln], + (instregex "LD1R(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_3cyc_LS_noRSV_286ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_noRSV_286ln], + (instregex "LD1R(v8b|v4h|v2s|v1d)$")>; +def KryoWrite_3cyc_LS_XY_290ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_290ln, WriteAdr], + (instregex "LD1R(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_3cyc_LS_XY_noRSV_318ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_318ln, WriteAdr], + (instregex "LD1R(v8b|v4h|v2s|v1d)_POST$")>; +def KryoWrite_3cyc_LS_257ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_257ln], + (instregex "LD2i64$")>; +def KryoWrite_3cyc_LS_XY_291ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_291ln, WriteAdr], + (instregex "LD2i64_POST$")>; +def KryoWrite_4cyc_LS_X_X_296ln : + SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_4cyc_LS_X_X_296ln], + (instregex "LD2(i8|i16|i32)$")>; +def KryoWrite_4cyc_LS_XY_X_X_321ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_4cyc_LS_XY_X_X_321ln, WriteAdr], + (instregex "LD2(i8|i16|i32)_POST$")>; +def KryoWrite_3cyc_LS_LS_282ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_282ln], + (instregex "LD2R(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_3cyc_LS_noRSV_noRSV_312ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_noRSV_noRSV_312ln], + (instregex "LD2R(v8b|v4h|v2s|v1d)$")>; +def KryoWrite_3cyc_LS_XY_LS_314ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_314ln, WriteAdr], + (instregex "LD2R(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_3cyc_LS_XY_noRSV_noRSV_335ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_noRSV_335ln, WriteAdr], + (instregex "LD2R(v8b|v4h|v2s|v1d)_POST$")>; +def KryoWrite_3cyc_LS_LS_283ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_283ln], + (instregex "LD3i64$")>; +def KryoWrite_3cyc_LS_LS_LS_309ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_LS_309ln], + (instregex "LD3Threev2d$")>; +def KryoWrite_3cyc_LS_XY_LS_315ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_315ln, WriteAdr], + (instregex "LD3i64_POST$")>; +def KryoWrite_4cyc_LS_X_X_X_320ln : + SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_4cyc_LS_X_X_X_320ln], + (instregex "LD3(i8|i16|i32)$")>; +def KryoWrite_3cyc_LS_XY_LS_LS_331ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_331ln, WriteAdr], + (instregex "LD3Threev2d_POST$")>; +def KryoWrite_4cyc_LS_XY_X_X_X_338ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_4cyc_LS_XY_X_X_X_338ln, WriteAdr], + (instregex "LD3(i8|i16|i32)_POST$")>; +def KryoWrite_4cyc_LS_LS_X_X_X_noRSV_noRSV_noRSV_373ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 8; +} +def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_noRSV_noRSV_noRSV_373ln], + (instregex "LD3Three(v8b|v4h|v2s)$")>; +def KryoWrite_4cyc_LS_XY_LS_X_X_X_noRSV_noRSV_noRSV_380ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitX, KryoUnitX, + KryoUnitX]> { + let Latency = 4; let NumMicroOps = 9; +} +def : InstRW<[KryoWrite_4cyc_LS_XY_LS_X_X_X_noRSV_noRSV_noRSV_380ln, WriteAdr], + (instregex "LD3Three(v8b|v4h|v2s)_POST$")>; +def KryoWrite_4cyc_LS_LS_X_X_X_LS_LS_X_X_X_381ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 10; +} +def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_LS_LS_X_X_X_381ln], + (instregex "LD3Three(v16b|v8h|v4s)$")>; +def KryoWrite_4cyc_LS_LS_X_X_X_LS_XY_LS_X_X_X_383ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitX, KryoUnitX, + KryoUnitX]> { + let Latency = 4; let NumMicroOps = 11; +} +def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_LS_XY_LS_X_X_X_383ln, WriteAdr], + (instregex "LD3Three(v16b|v8h|v4s)_POST$")>; +def KryoWrite_3cyc_LS_LS_LS_310ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_LS_310ln], + (instregex "LD3R(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_3cyc_LS_XY_LS_LS_333ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_333ln, WriteAdr], + (instregex "LD3R(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_349ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_349ln], + (instregex "LD3R(v8b|v4h|v2s|v1d)$")>; +def KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_361ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_361ln, WriteAdr], + (instregex "LD3R(v8b|v4h|v2s|v1d)_POST$")>; +def KryoWrite_3cyc_LS_LS_284ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_284ln], + (instregex "LD4i64$")>; +def KryoWrite_3cyc_LS_XY_LS_316ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_316ln, WriteAdr], + (instregex "LD4i64_POST$")>; +def KryoWrite_3cyc_LS_LS_LS_LS_329ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_LS_LS_329ln], + (instregex "LD4Four(v2d)$")>; +def KryoWrite_4cyc_LS_X_X_X_X_337ln : + SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_4cyc_LS_X_X_X_X_337ln], + (instregex "LD4(i8|i16|i32)$")>; +def KryoWrite_3cyc_LS_XY_LS_LS_LS_350ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_LS_350ln, WriteAdr], + (instregex "LD4Four(v2d)_POST$")>; +def KryoWrite_4cyc_LS_XY_X_X_X_X_355ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX]> { + let Latency = 4; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_4cyc_LS_XY_X_X_X_X_355ln, WriteAdr], + (instregex "LD4(i8|i16|i32)_POST$")>; +def KryoWrite_4cyc_LS_LS_X_X_X_X_noRSV_noRSV_noRSV_noRSV_382ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX]> { + let Latency = 4; let NumMicroOps = 10; +} +def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_X_noRSV_noRSV_noRSV_noRSV_382ln], + (instregex "LD4Four(v8b|v4h|v2s)$")>; +def KryoWrite_4cyc_LS_XY_LS_X_X_X_X_noRSV_noRSV_noRSV_noRSV_384ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 11; +} +def : InstRW<[KryoWrite_4cyc_LS_XY_LS_X_X_X_X_noRSV_noRSV_noRSV_noRSV_384ln, WriteAdr], + (instregex "LD4Four(v8b|v4h|v2s)_POST$")>; +def KryoWrite_4cyc_LS_LS_X_X_X_X_LS_LS_X_X_X_X_386ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 12; +} +def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_X_LS_LS_X_X_X_X_386ln], + (instregex "LD4Four(v16b|v8h|v4s)$")>; +def KryoWrite_4cyc_LS_LS_X_X_X_X_LS_XY_LS_X_X_X_X_389ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitX, + KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 13; +} +def : InstRW<[KryoWrite_4cyc_LS_LS_X_X_X_X_LS_XY_LS_X_X_X_X_389ln, WriteAdr], + (instregex "LD4Four(v16b|v8h|v4s)_POST$")>; +def KryoWrite_3cyc_LS_LS_LS_LS_330ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_LS_LS_330ln], + (instregex "LD4R(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_3cyc_LS_XY_LS_LS_LS_352ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS, KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_LS_LS_352ln, WriteAdr], + (instregex "LD4R(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_noRSV_359ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_noRSV_noRSV_noRSV_noRSV_359ln], + (instregex "LD4R(v8b|v4h|v2s|v1d)$")>; +def KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_noRSV_369ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 7; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_noRSV_noRSV_noRSV_noRSV_369ln, WriteAdr], + (instregex "LD4R(v8b|v4h|v2s|v1d)_POST$")>; +def KryoWrite_3cyc_LS_LS_400ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_400ln], + (instregex "LDAX?R(B|H|W|X)")>; +def : InstRW<[KryoWrite_3cyc_LS_LS_400ln, WriteLDHi], + (instregex "LDAXP(W|X)")>; +def KryoWrite_3cyc_LS_LS_401ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_401ln, WriteLDHi], + (instrs LDNPQi)>; +def KryoWrite_3cyc_LS_noRSV_noRSV_408ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_noRSV_noRSV_408ln, WriteLDHi], + (instrs LDNPDi, LDNPSi)>; +def KryoWrite_3cyc_LS_394ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_394ln, WriteLDHi], + (instrs LDNPWi, LDNPXi)>; +def KryoWrite_3cyc_LS_LS_402ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_402ln, WriteLDHi], + (instrs LDPQi)>; +def KryoWrite_3cyc_LS_noRSV_noRSV_409ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_noRSV_noRSV_409ln, WriteLDHi], + (instrs LDPDi, LDPSi)>; +def KryoWrite_3cyc_LS_XY_LS_410ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY, KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_LS_410ln, WriteLDHi, WriteAdr], + (instregex "LDPQ(post|pre)")>; +def KryoWrite_3cyc_LS_XY_noRSV_noRSV_411ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_noRSV_411ln, WriteLDHi, WriteAdr], + (instregex "LDP(D|S)(post|pre)")>; +def KryoWrite_3cyc_LS_393ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_393ln, WriteLDHi], + (instrs LDPWi, LDPXi)>; +def KryoWrite_3cyc_LS_XY_403ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_403ln, WriteLDHi, WriteAdr], + (instregex "LDP(W|X)(post|pre)")>; +def KryoWrite_4cyc_LS_395ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_LS_395ln, WriteLDHi], + (instrs LDPSWi)>; +def KryoWrite_4cyc_LS_XY_405ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_LS_XY_405ln, WriteLDHi, WriteAdr], + (instrs LDPSWpost, LDPSWpre)>; +def KryoWrite_3cyc_LS_264ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_264ln], + (instrs LDRQui, LDRQl)>; +def KryoWrite_4cyc_X_LS_271ln : + SchedWriteRes<[KryoUnitX, KryoUnitLS]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_LS_271ln], + (instrs LDRQroW, LDRQroX)>; +def KryoWrite_3cyc_LS_noRSV_287ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_noRSV_287ln], + (instregex "LDR((D|S)l|(D|S|H|B)ui)")>; +def KryoWrite_3cyc_LS_XY_293ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_293ln, WriteAdr], + (instrs LDRQpost, LDRQpre)>; +def KryoWrite_4cyc_X_LS_noRSV_297ln : + SchedWriteRes<[KryoUnitX, KryoUnitLS]> { + let Latency = 4; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_4cyc_X_LS_noRSV_297ln], + (instregex "LDR(D|S|H|B)ro(W|X)")>; +def KryoWrite_3cyc_LS_XY_noRSV_319ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_noRSV_319ln, WriteAdr], + (instregex "LDR(D|S|H|B)(post|pre)")>; +def KryoWrite_3cyc_LS_261ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_261ln], + (instregex "LDR(BB|HH|W|X)ui")>; +def KryoWrite_3cyc_LS_XY_292ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_XY_292ln, WriteAdr], + (instregex "LDR(BB|HH|W|X)(post|pre)")>; +def KryoWrite_4cyc_X_LS_272ln : + SchedWriteRes<[KryoUnitX, KryoUnitLS]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_LS_272ln], + (instregex "(LDR(BB|HH|W|X)ro(W|X)|PRFMro(W|X))")>; +def KryoWrite_3cyc_LS_262ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_262ln], + (instrs LDRWl, LDRXl)>; +def KryoWrite_4cyc_LS_268ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_LS_268ln], + (instregex "LDRS(BW|BX|HW|HX|W)ui")>; +def KryoWrite_5cyc_X_LS_273ln : + SchedWriteRes<[KryoUnitX, KryoUnitLS]> { + let Latency = 5; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_5cyc_X_LS_273ln], + (instregex "LDRS(BW|BX|HW|HX|W)ro(W|X)")>; +def KryoWrite_4cyc_LS_XY_294ln : + SchedWriteRes<[KryoUnitLS, KryoUnitXY]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_LS_XY_294ln, WriteAdr], + (instregex "LDRS(BW|BX|HW|HX|W)(post|pre)")>; +def KryoWrite_4cyc_LS_269ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_LS_269ln], + (instrs LDRSWl)>; +def KryoWrite_3cyc_LS_260ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_260ln], + (instregex "LDTR(B|H|W|X)i")>; +def KryoWrite_4cyc_LS_267ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_LS_267ln], + (instregex "LDTRS(BW|BX|HW|HX|W)i")>; +def KryoWrite_3cyc_LS_263ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_263ln], + (instrs LDURQi)>; +def KryoWrite_3cyc_LS_noRSV_288ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_noRSV_288ln], + (instregex "LDUR(D|S|H|B)i")>; +def KryoWrite_3cyc_LS_259ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_259ln], + (instregex "LDUR(BB|HH|W|X)i")>; +def KryoWrite_4cyc_LS_266ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_LS_266ln], + (instregex "LDURS(B|H)?(W|X)i")>; +def KryoWrite_3cyc_LS_258ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_258ln, WriteLDHi], + (instregex "LDXP(W|X)")>; +def KryoWrite_3cyc_LS_258_1ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 3; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_3cyc_LS_258_1ln], + (instregex "LDXR(B|H|W|X)")>; +def KryoWrite_2cyc_XY_XY_137ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_137ln], + (instrs LSLVWr, LSLVXr)>; +def KryoWrite_1cyc_XY_135ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_135ln], + (instregex "(LS|AS|RO)RV(W|X)r")>; +def KryoWrite_4cyc_X_84ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_4cyc_X_84ln], + (instrs MADDWrrr, MSUBWrrr)>; +def KryoWrite_5cyc_X_85ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 5; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_5cyc_X_85ln], + (instrs MADDXrrr, MSUBXrrr)>; +def KryoWrite_4cyc_X_noRSV_188ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_188ln], + (instregex "(MLA|MLS|MUL)(v8i8|v4i16|v2i32)(_indexed)?")>; +def KryoWrite_4cyc_X_X_192ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_X_192ln], + (instregex "(MLA|MLS|MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?")>; +def KryoWrite_1cyc_XY_noRSV_198ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_198ln], + (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)")>; +def KryoWrite_1cyc_XY_XY_199ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_199ln], + (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)")>; +def KryoWrite_1cyc_X_89ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_X_89ln], + (instrs MOVKWi, MOVKXi)>; +def KryoWrite_1cyc_XY_91ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_91ln], + (instrs MOVNWi, MOVNXi)>; +def KryoWrite_1cyc_XY_90ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_90ln], + (instrs MOVZWi, MOVZXi)>; +def KryoWrite_2cyc_XY_93ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_2cyc_XY_93ln], + (instrs MRS)>; +def KryoWrite_0cyc_X_87ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_X_87ln], + (instrs MSRpstateImm4)>; +def : InstRW<[KryoWrite_0cyc_X_87ln], + (instrs MSRpstateImm1)>; +def KryoWrite_0cyc_XY_88ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_XY_88ln], + (instrs MSR)>; +def KryoWrite_1cyc_XY_noRSV_143ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_143ln], + (instregex "NEG(v8i8|v4i16|v2i32|v1i64)")>; +def KryoWrite_1cyc_XY_XY_145ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_145ln], + (instregex "NEG(v16i8|v8i16|v4i32|v2i64)")>; +def KryoWrite_1cyc_XY_noRSV_193ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_193ln], + (instrs NOTv8i8)>; +def KryoWrite_1cyc_XY_XY_194ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_194ln], + (instrs NOTv16i8)>; +def KryoWrite_2cyc_XY_noRSV_234ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_234ln], + (instrs PMULv8i8)>; +def KryoWrite_2cyc_XY_XY_236ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_236ln], + (instrs PMULv16i8)>; +def KryoWrite_2cyc_XY_XY_235ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_235ln], + (instrs PMULLv8i8, PMULLv16i8)>; +def KryoWrite_3cyc_XY_XY_237ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_237ln], + (instrs PMULLv1i64, PMULLv2i64)>; +def KryoWrite_0cyc_LS_254ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_LS_254ln], + (instrs PRFMl, PRFMui)>; +def KryoWrite_0cyc_LS_253ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_LS_253ln], + (instrs PRFUMi)>; +def KryoWrite_6cyc_XY_X_noRSV_175ln : + SchedWriteRes<[KryoUnitXY, KryoUnitX]> { + let Latency = 6; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_6cyc_XY_X_noRSV_175ln], + (instregex "R(ADD|SUB)HNv.*")>; +def KryoWrite_2cyc_XY_204ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_2cyc_XY_204ln], + (instrs RBITWr, RBITXr)>; +def KryoWrite_2cyc_XY_noRSV_218ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_noRSV_218ln], + (instrs RBITv8i8)>; +def KryoWrite_2cyc_XY_XY_219ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_219ln], + (instrs RBITv16i8)>; +def KryoWrite_1cyc_X_202ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_X_202ln], + (instregex "REV(16|32)?(W|X)r")>; +def KryoWrite_1cyc_XY_noRSV_214ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_214ln], + (instregex "REV(16|32|64)(v8i8|v4i16|v2i32)")>; +def KryoWrite_1cyc_XY_XY_216ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_216ln], + (instregex "REV(16|32|64)(v16i8|v8i16|v4i32)")>; +def KryoWrite_3cyc_X_noRSV_244ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_X_noRSV_244ln], + (instregex "S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)")>; +def KryoWrite_3cyc_X_X_245ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_X_X_245ln], + (instregex "S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift")>; +def KryoWrite_1cyc_XY_2ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_2ln, ReadI, ReadI], + (instregex "SBCS?(W|X)r")>; +def KryoWrite_2cyc_XA_XA_XA_24ln : + SchedWriteRes<[KryoUnitXA, KryoUnitXA, KryoUnitXA]> { + let Latency = 2; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_2cyc_XA_XA_XA_24ln], + (instrs SHA1Crrr, SHA1Mrrr, SHA1Prrr)>; +def KryoWrite_1cyc_XY_noRSV_21ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_21ln], + (instrs SHA1Hrr)>; +def KryoWrite_2cyc_X_X_23ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_X_X_23ln], + (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>; +def KryoWrite_4cyc_XA_XA_XA_25ln : + SchedWriteRes<[KryoUnitXA, KryoUnitXA, KryoUnitXA]> { + let Latency = 4; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_4cyc_XA_XA_XA_25ln], + (instrs SHA256Hrrr, SHA256H2rrr)>; +def KryoWrite_3cyc_XY_XY_X_X_26ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY, KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_X_X_26ln], + (instrs SHA256SU1rrr)>; +def KryoWrite_4cyc_X_noRSV_189ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_189ln], + (instregex "SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?")>; +def KryoWrite_3cyc_XY_noRSV_68ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_68ln], + (instregex "SQ(ABS|NEG)(v1i8|v1i16|v1i32|v1i64)")>; +def KryoWrite_3cyc_XY_noRSV_157ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_157ln], + (instregex "SQ(ABS|NEG)(v8i8|v4i16|v2i32)")>; +def KryoWrite_3cyc_XY_XY_164ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_164ln], + (instregex "SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)")>; +def KryoWrite_4cyc_X_noRSV_190ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 4; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_4cyc_X_noRSV_190ln], + (instregex "SQD(MLAL|MLSL|MULL)(i16|i32)")>; +def KryoWrite_0cyc_LS_Y_274ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_274ln], + (instregex "ST1(One(v8b|v4h|v2s|v1d|v16b|v8h|v4s|v2d)|(i8|i16|i32|i64)|Two(v8b|v4h|v2s|v1d))$")>; +def KryoWrite_1cyc_LS_Y_X_301ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_X_301ln], + (instregex "ST1(One(v8b|v4h|v2s|v1d|v16b|v8h|v4s|v2d)|(i8|i16|i32|i64)|Two(v8b|v4h|v2s|v1d))_POST$")>; +def KryoWrite_1cyc_LS_Y_XY_305ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_305ln], + (instregex "ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_323ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 4; +} +def : InstRW<[WriteAdr, KryoWrite_0cyc_LS_Y_LS_Y_323ln], + (instregex "ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>; +def KryoWrite_1cyc_LS_Y_XY_LS_Y_345ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_1cyc_LS_Y_XY_LS_Y_345ln], + (instregex "ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_356ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitLS, + KryoUnitY]> { + let Latency = 0; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_356ln], + (instregex "ST1Three(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_1cyc_LS_Y_XY_LS_Y_LS_Y_366ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY, + KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 7; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_LS_Y_366ln], + (instregex "ST1Three(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_LS_Y_371ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitLS, + KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 8; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_LS_Y_371ln], + (instregex "ST1Four(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_XY_LS_Y_LS_Y_377ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitXY, + KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 9; +} +def : InstRW<[WriteAdr, KryoWrite_0cyc_LS_Y_LS_Y_XY_LS_Y_LS_Y_377ln], + (instregex "ST1Four(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_0cyc_LS_Y_275ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_275ln], + (instregex "ST2(Two(v8b|v4h|v2s|v1d|v16b|v8h|v4s|v2d)|(i8|i16|i32|i64))$")>; +def KryoWrite_1cyc_LS_Y_XY_306ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_306ln], + (instregex "ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_322ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_322ln], + (instregex "ST2Two(v16b|v8h|v4s|v2d)$")>; +def KryoWrite_1cyc_LS_Y_XY_LS_Y_344ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 5; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_344ln], + (instregex "ST2Two(v16b|v8h|v4s|v2d)_POST$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_324ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_324ln], + (instregex "ST3(Threev1d|(i8|i16|i32|i64))$")>; +def KryoWrite_1cyc_LS_Y_XY_LS_Y_346ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 5; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_346ln], + (instregex "ST3(Threev1d|(i8|i16|i32|i64))_POST$")>; +def KryoWrite_1cyc_X_X_LS_Y_LS_Y_353ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitLS, + KryoUnitY]> { + let Latency = 1; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_1cyc_X_X_LS_Y_LS_Y_353ln], + (instregex "ST3Three(v8b|v4h|v2s)$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_357ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitLS, + KryoUnitY]> { + let Latency = 0; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_357ln], + (instregex "ST3Threev2d$")>; +def KryoWrite_1cyc_X_X_LS_Y_XY_LS_Y_363ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitXY, + KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 7; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_X_X_LS_Y_XY_LS_Y_363ln], + (instregex "ST3Three(v8b|v4h|v2s)_POST$")>; +def KryoWrite_1cyc_LS_Y_XY_LS_Y_LS_Y_367ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY, + KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 7; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_LS_Y_367ln], + (instregex "ST3Threev2d_POST$")>; +def KryoWrite_1cyc_X_X_LS_Y_LS_Y_X_X_LS_Y_LS_Y_385ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitLS, + KryoUnitY, KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, + KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 12; +} +def : InstRW<[KryoWrite_1cyc_X_X_LS_Y_LS_Y_X_X_LS_Y_LS_Y_385ln], + (instregex "ST3Three(v16b|v8h|v4s)$")>; +def KryoWrite_1cyc_X_X_LS_Y_LS_Y_X_X_LS_Y_XY_LS_Y_388ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitLS, + KryoUnitY, KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, + KryoUnitXY, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 13; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_X_X_LS_Y_LS_Y_X_X_LS_Y_XY_LS_Y_388ln], + (instregex "ST3Three(v16b|v8h|v4s)_POST$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_325ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_325ln], + (instregex "ST4(Fourv1d|(i8|i16|i32|i64))$")>; +def KryoWrite_1cyc_LS_Y_XY_LS_Y_347ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 5; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_XY_LS_Y_347ln], + (instregex "ST4(Fourv1d|(i8|i16|i32|i64))_POST$")>; +def KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_370ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX, + KryoUnitX, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 8; +} +def : InstRW<[KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_370ln], + (instregex "ST4Four(v8b|v4h|v2s)$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_LS_Y_372ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitLS, + KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 8; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_LS_Y_LS_Y_372ln], + (instregex "ST4Fourv2d$")>; +def KryoWrite_1cyc_X_X_LS_Y_XY_X_X_LS_Y_375ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitXY, + KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 9; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_X_X_LS_Y_XY_X_X_LS_Y_375ln], + (instregex "ST4Four(v8b|v4h|v2s)_POST$")>; +def KryoWrite_0cyc_LS_Y_LS_Y_XY_LS_Y_LS_Y_379ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY, KryoUnitXY, + KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 9; +} +def : InstRW<[WriteAdr, KryoWrite_0cyc_LS_Y_LS_Y_XY_LS_Y_LS_Y_379ln], + (instregex "ST4Fourv2d_POST$")>; +def KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_390ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX, + KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX, KryoUnitX, + KryoUnitLS, KryoUnitY, KryoUnitX, KryoUnitX, KryoUnitLS, + KryoUnitY]> { + let Latency = 1; let NumMicroOps = 16; +} +def : InstRW<[KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_390ln], + (instregex "ST4Four(v16b|v8h|v4s)$")>; +def KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_XY_X_X_LS_Y_392ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX, + KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitX, KryoUnitX, + KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitX, KryoUnitX, + KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 17; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_X_X_LS_Y_X_X_LS_Y_X_X_LS_Y_XY_X_X_LS_Y_392ln], + (instregex "ST4Four(v16b|v8h|v4s)_POST$")>; +def KryoWrite_0cyc_LS_LS_Y_299ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_0cyc_LS_LS_Y_299ln], + (instregex "STLR(B|H|W|X)")>; +def KryoWrite_3cyc_LS_LS_Y_307ln : + SchedWriteRes<[KryoUnitLS, KryoUnitLS, KryoUnitY]> { + let Latency = 3; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_3cyc_LS_LS_Y_307ln], + (instregex "STLX(P(W|X)|R(B|H|W|X))")>; +def KryoWrite_0cyc_LS_Y_276ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_276ln], + (instrs STNPDi, STNPSi)>; +def KryoWrite_0cyc_LS_Y_LS_Y_326ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_326ln], + (instrs STNPQi)>; +def KryoWrite_0cyc_LS_Y_280ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_280ln], + (instrs STNPWi, STNPXi)>; +def KryoWrite_0cyc_LS_Y_277ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_277ln], + (instregex "STP(D|S)i")>; +def KryoWrite_1cyc_LS_Y_X_303ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_X_303ln], + (instregex "STP(D|S)(post|pre)")>; +def KryoWrite_0cyc_LS_Y_LS_Y_327ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_LS_Y_327ln], + (instrs STPQi)>; +def KryoWrite_1cyc_LS_Y_X_LS_Y_343ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitX, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 5; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_X_LS_Y_343ln], + (instrs STPQpost, STPQpre)>; +def KryoWrite_0cyc_LS_Y_279ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_279ln], + (instregex "STP(W|X)i")>; +def KryoWrite_1cyc_LS_X_Y_300ln : + SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_X_Y_300ln], + (instregex "STP(W|X)(post|pre)")>; +def KryoWrite_0cyc_LS_Y_278ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_278ln], + (instregex "STR(Q|D|S|H|B)ui")>; +def KryoWrite_1cyc_X_LS_Y_295ln : + SchedWriteRes<[KryoUnitX, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_1cyc_X_LS_Y_295ln], + (instregex "STR(D|S|H|B)ro(W|X)")>; +def KryoWrite_1cyc_LS_Y_X_304ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_Y_X_304ln], + (instregex "STR(Q|D|S|H|B)(post|pre)")>; +def KryoWrite_2cyc_X_LS_Y_XY_LS_Y_354ln : + SchedWriteRes<[KryoUnitX, KryoUnitLS, KryoUnitY, KryoUnitXY, KryoUnitLS, + KryoUnitY]> { + let Latency = 2; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_2cyc_X_LS_Y_XY_LS_Y_354ln], + (instregex "STRQro(W|X)")>; +def KryoWrite_0cyc_LS_Y_399ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_399ln], + (instregex "STR(BB|HH|W|X)ui")>; +def KryoWrite_1cyc_X_LS_Y_406ln : + SchedWriteRes<[KryoUnitX, KryoUnitLS, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_1cyc_X_LS_Y_406ln], + (instregex "STR(BB|HH|W|X)ro(W|X)")>; +def KryoWrite_1cyc_LS_X_Y_407ln : + SchedWriteRes<[KryoUnitLS, KryoUnitX, KryoUnitY]> { + let Latency = 1; let NumMicroOps = 3; +} +def : InstRW<[WriteAdr, KryoWrite_1cyc_LS_X_Y_407ln], + (instregex "STR(BB|HH|W|X)(post|pre)")>; +def KryoWrite_0cyc_LS_Y_398ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_398ln], + (instregex "STTR(B|H|W|X)i")>; +def KryoWrite_0cyc_LS_Y_396ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_396ln], + (instregex "STUR(Q|D|S|H|B)i")>; +def KryoWrite_0cyc_LS_Y_397ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 0; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_0cyc_LS_Y_397ln], + (instregex "STUR(BB|HH|W|X)i")>; +def KryoWrite_3cyc_LS_Y_404ln : + SchedWriteRes<[KryoUnitLS, KryoUnitY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_LS_Y_404ln], + (instregex "STX(P(W|X)|R(B|H|W|X))")>; +def KryoWrite_3cyc_XY_noRSV_160ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_160ln], + (instregex "^(SU|US)QADD(v8i8|v4i16|v2i32)")>; +def KryoWrite_3cyc_XY_XY_167ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_167ln], + (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)")>; +def KryoWrite_1cyc_XY_1ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_1cyc_XY_1ln, ReadI], + (instregex "SUBS?(W|X)ri")>; +def KryoWrite_2cyc_XY_XY_5ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_5ln, ReadI, ReadIEReg], + (instregex "SUBS?(W|X)rx")>; +def KryoWrite_2cyc_XY_XY_5_1ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 2; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_2cyc_XY_XY_5_1ln, ReadI, ReadISReg], + (instregex "SUBS?(W|X)rs")>; +def KryoWrite_1cyc_XY_noRSV_6ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_6ln, ReadI, ReadI], + (instregex "SUBS?(W|X)rr")>; +def KryoWrite_0cyc_LS_9ln : + SchedWriteRes<[KryoUnitLS]> { + let Latency = 0; let NumMicroOps = 1; +} +def : InstRW<[KryoWrite_0cyc_LS_9ln], + (instregex "SYSL?xt")>; +def KryoWrite_1cyc_X_noRSV_205ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_noRSV_205ln], + (instrs TBLv8i8One)>; +def KryoWrite_1cyc_X_X_208ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_X_208ln], + (instrs TBLv16i8One)>; +def KryoWrite_2cyc_X_X_X_noRSV_222ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 2; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_2cyc_X_X_X_noRSV_222ln], + (instrs TBLv8i8Two)>; +def KryoWrite_2cyc_X_X_X_X_X_X_224ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX]> { + let Latency = 2; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_2cyc_X_X_X_X_X_X_224ln], + (instrs TBLv16i8Two)>; +def KryoWrite_3cyc_X_X_X_X_X_noRSV_225ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 6; +} +def : InstRW<[KryoWrite_3cyc_X_X_X_X_X_noRSV_225ln], + (instrs TBLv8i8Three)>; +def KryoWrite_3cyc_X_X_X_X_X_X_X_noRSV_228ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 8; +} +def : InstRW<[KryoWrite_3cyc_X_X_X_X_X_X_X_noRSV_228ln], + (instrs TBLv8i8Four)>; +def KryoWrite_4cyc_X_X_X_X_X_X_X_X_XY_X_X_230ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitXY, KryoUnitX, + KryoUnitX]> { + let Latency = 4; let NumMicroOps = 11; +} +def : InstRW<[KryoWrite_4cyc_X_X_X_X_X_X_X_X_XY_X_X_230ln], + (instrs TBLv16i8Three)>; +def KryoWrite_4cyc_X_X_X_X_X_X_X_X_X_X_XY_X_X_X_X_232ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitXY, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 15; +} +def : InstRW<[KryoWrite_4cyc_X_X_X_X_X_X_X_X_X_X_XY_X_X_X_X_232ln], + (instrs TBLv16i8Four)>; +def KryoWrite_2cyc_X_X_noRSV_220ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 2; let NumMicroOps = 3; +} +def : InstRW<[KryoWrite_2cyc_X_X_noRSV_220ln], + (instrs TBXv8i8One)>; +def KryoWrite_2cyc_X_X_X_X_221ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 2; let NumMicroOps = 4; +} +def : InstRW<[KryoWrite_2cyc_X_X_X_X_221ln], + (instrs TBXv16i8One)>; +def KryoWrite_3cyc_X_X_X_X_noRSV_223ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 5; +} +def : InstRW<[KryoWrite_3cyc_X_X_X_X_noRSV_223ln], + (instrs TBXv8i8Two)>; +def KryoWrite_4cyc_X_X_X_X_X_X_noRSV_226ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX]> { + let Latency = 4; let NumMicroOps = 7; +} +def : InstRW<[KryoWrite_4cyc_X_X_X_X_X_X_noRSV_226ln], + (instrs TBXv8i8Three)>; +def KryoWrite_3cyc_X_X_X_X_X_X_X_X_227ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 3; let NumMicroOps = 8; +} +def : InstRW<[KryoWrite_3cyc_X_X_X_X_X_X_X_X_227ln], + (instrs TBXv16i8Two)>; +def KryoWrite_4cyc_X_X_X_X_X_X_X_X_noRSV_229ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 4; let NumMicroOps = 9; +} +def : InstRW<[KryoWrite_4cyc_X_X_X_X_X_X_X_X_noRSV_229ln], + (instrs TBXv8i8Four)>; +def KryoWrite_5cyc_X_X_X_X_X_X_X_X_X_XY_X_X_X_231ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitXY, + KryoUnitX, KryoUnitX, KryoUnitX]> { + let Latency = 5; let NumMicroOps = 13; +} +def : InstRW<[KryoWrite_5cyc_X_X_X_X_X_X_X_X_X_XY_X_X_X_231ln], + (instrs TBXv16i8Three)>; +def KryoWrite_5cyc_X_X_X_X_X_X_X_X_X_X_X_XY_X_X_X_X_X_233ln : + SchedWriteRes<[KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitXY, KryoUnitX, KryoUnitX, KryoUnitX, + KryoUnitX, KryoUnitX]> { + let Latency = 5; let NumMicroOps = 17; +} +def : InstRW<[KryoWrite_5cyc_X_X_X_X_X_X_X_X_X_X_X_XY_X_X_X_X_X_233ln], + (instrs TBXv16i8Four)>; +def KryoWrite_1cyc_XY_XY_217ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_XY_217ln], + (instregex "((TRN1|TRN2|ZIP1|UZP1|UZP2)v2i64|ZIP2(v2i64|v4i32|v8i16|v16i8))")>; +def KryoWrite_1cyc_X_X_211ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_X_211ln], + (instregex "(TRN1|TRN2)(v4i32|v8i16|v16i8)")>; +def KryoWrite_1cyc_X_XY_213ln : + SchedWriteRes<[KryoUnitX, KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_XY_213ln], + (instregex "(TRN1|TRN2)(v2i32|v4i16|v8i8)")>; +def KryoWrite_3cyc_XY_noRSV_156ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_noRSV_156ln], + (instrs URECPEv2i32, URSQRTEv2i32)>; +def KryoWrite_3cyc_XY_XY_168ln : + SchedWriteRes<[KryoUnitXY, KryoUnitXY]> { + let Latency = 3; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_3cyc_XY_XY_168ln], + (instrs URECPEv4i32, URSQRTEv4i32)>; +def KryoWrite_1cyc_X_X_210ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_X_210ln], + (instregex "(UZP1|UZP2)(v4i32|v8i16|v16i8)")>; +def KryoWrite_1cyc_X_noRSV_206ln : + SchedWriteRes<[KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_noRSV_206ln], + (instregex "(UZP1|UZP2|ZIP1|ZIP2)(v2i32|v4i16|v8i8)")>; +def KryoWrite_1cyc_XY_noRSV_215ln : + SchedWriteRes<[KryoUnitXY]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_XY_noRSV_215ln], + (instregex "XTNv.*")>; +def KryoWrite_1cyc_X_X_209ln : + SchedWriteRes<[KryoUnitX, KryoUnitX]> { + let Latency = 1; let NumMicroOps = 2; +} +def : InstRW<[KryoWrite_1cyc_X_X_209ln], + (instregex "ZIP1(v4i32|v8i16|v16i8)")>; diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td new file mode 100644 index 000000000..fbbd3850d --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX.td @@ -0,0 +1,357 @@ +//==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the ARM ThunderX T8X +// (T88, T81, T83) processors. +// Loosely based on Cortex-A53 which is somewhat similar. +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// The following definitions describe the simpler per-operand machine model. +// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details. + +// Cavium ThunderX T8X scheduling machine model. +def ThunderXT8XModel : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops dispatched per cycle. + let MicroOpBufferSize = 0; // ThunderX T88/T81/T83 are in-order. + let LoadLatency = 3; // Optimistic load latency. + let MispredictPenalty = 8; // Branch mispredict penalty. + let PostRAScheduler = 1; // Use PostRA scheduler. + let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = [HasSVE]; + + // FIXME: Remove when all errors have been fixed. + let FullInstRWOverlapCheck = 0; +} + +// Modeling each pipeline with BufferSize == 0 since T8X is in-order. +def THXT8XUnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU +def THXT8XUnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC +def THXT8XUnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division +def THXT8XUnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store +def THXT8XUnitBr : ProcResource<1> { let BufferSize = 0; } // Branch +def THXT8XUnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU +def THXT8XUnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mul/Div/Sqrt + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedWrite types mapping the ProcResources and +// latencies. + +let SchedModel = ThunderXT8XModel in { + +// ALU +def : WriteRes<WriteImm, [THXT8XUnitALU]> { let Latency = 1; } +def : WriteRes<WriteI, [THXT8XUnitALU]> { let Latency = 1; } +def : WriteRes<WriteISReg, [THXT8XUnitALU]> { let Latency = 2; } +def : WriteRes<WriteIEReg, [THXT8XUnitALU]> { let Latency = 2; } +def : WriteRes<WriteIS, [THXT8XUnitALU]> { let Latency = 2; } +def : WriteRes<WriteExtr, [THXT8XUnitALU]> { let Latency = 2; } + +// MAC +def : WriteRes<WriteIM32, [THXT8XUnitMAC]> { + let Latency = 4; + let ResourceCycles = [1]; +} + +def : WriteRes<WriteIM64, [THXT8XUnitMAC]> { + let Latency = 4; + let ResourceCycles = [1]; +} + +// Div +def : WriteRes<WriteID32, [THXT8XUnitDiv]> { + let Latency = 12; + let ResourceCycles = [6]; +} + +def : WriteRes<WriteID64, [THXT8XUnitDiv]> { + let Latency = 14; + let ResourceCycles = [8]; +} + +// Load +def : WriteRes<WriteLD, [THXT8XUnitLdSt]> { let Latency = 3; } +def : WriteRes<WriteLDIdx, [THXT8XUnitLdSt]> { let Latency = 3; } +def : WriteRes<WriteLDHi, [THXT8XUnitLdSt]> { let Latency = 3; } + +// Vector Load +def : WriteRes<WriteVLD, [THXT8XUnitLdSt]> { + let Latency = 8; + let ResourceCycles = [3]; +} + +def THXT8XWriteVLD1 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 6; + let ResourceCycles = [1]; +} + +def THXT8XWriteVLD2 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 11; + let ResourceCycles = [7]; +} + +def THXT8XWriteVLD3 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 12; + let ResourceCycles = [8]; +} + +def THXT8XWriteVLD4 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 13; + let ResourceCycles = [9]; +} + +def THXT8XWriteVLD5 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 13; + let ResourceCycles = [9]; +} + +// Pre/Post Indexing +def : WriteRes<WriteAdr, []> { let Latency = 0; } + +// Store +def : WriteRes<WriteST, [THXT8XUnitLdSt]> { let Latency = 1; } +def : WriteRes<WriteSTP, [THXT8XUnitLdSt]> { let Latency = 1; } +def : WriteRes<WriteSTIdx, [THXT8XUnitLdSt]> { let Latency = 1; } +def : WriteRes<WriteSTX, [THXT8XUnitLdSt]> { let Latency = 1; } + +// Vector Store +def : WriteRes<WriteVST, [THXT8XUnitLdSt]>; +def THXT8XWriteVST1 : SchedWriteRes<[THXT8XUnitLdSt]>; + +def THXT8XWriteVST2 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 10; + let ResourceCycles = [9]; +} + +def THXT8XWriteVST3 : SchedWriteRes<[THXT8XUnitLdSt]> { + let Latency = 11; + let ResourceCycles = [10]; +} + +def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } + +// Branch +def : WriteRes<WriteBr, [THXT8XUnitBr]>; +def THXT8XWriteBR : SchedWriteRes<[THXT8XUnitBr]>; +def : WriteRes<WriteBrReg, [THXT8XUnitBr]>; +def THXT8XWriteBRR : SchedWriteRes<[THXT8XUnitBr]>; +def THXT8XWriteRET : SchedWriteRes<[THXT8XUnitALU]>; +def : WriteRes<WriteSys, [THXT8XUnitBr]>; +def : WriteRes<WriteBarrier, [THXT8XUnitBr]>; +def : WriteRes<WriteHint, [THXT8XUnitBr]>; + +// FP ALU +def : WriteRes<WriteF, [THXT8XUnitFPALU]> { let Latency = 6; } +def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; } +def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; } +def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; } +def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; } +def : WriteRes<WriteV, [THXT8XUnitFPALU]> { let Latency = 6; } + +// FP Mul, Div, Sqrt +def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; } +def : WriteRes<WriteFDiv, [THXT8XUnitFPMDS]> { + let Latency = 22; + let ResourceCycles = [19]; +} + +def THXT8XWriteFMAC : SchedWriteRes<[THXT8XUnitFPMDS]> { let Latency = 10; } + +def THXT8XWriteFDivSP : SchedWriteRes<[THXT8XUnitFPMDS]> { + let Latency = 12; + let ResourceCycles = [9]; +} + +def THXT8XWriteFDivDP : SchedWriteRes<[THXT8XUnitFPMDS]> { + let Latency = 22; + let ResourceCycles = [19]; +} + +def THXT8XWriteFSqrtSP : SchedWriteRes<[THXT8XUnitFPMDS]> { + let Latency = 17; + let ResourceCycles = [14]; +} + +def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> { + let Latency = 31; + let ResourceCycles = [28]; +} + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedRead types. + +// No forwarding for these reads. +def : ReadAdvance<ReadExtrHi, 1>; +def : ReadAdvance<ReadAdrBase, 2>; +def : ReadAdvance<ReadVLD, 2>; + +// FIXME: This needs more targeted benchmarking. +// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable +// operands are needed one cycle later if and only if they are to be +// shifted. Otherwise, they too are needed two cycles later. This same +// ReadAdvance applies to Extended registers as well, even though there is +// a separate SchedPredicate for them. +def : ReadAdvance<ReadI, 2, [WriteImm, WriteI, + WriteISReg, WriteIEReg, WriteIS, + WriteID32, WriteID64, + WriteIM32, WriteIM64]>; +def THXT8XReadShifted : SchedReadAdvance<1, [WriteImm, WriteI, + WriteISReg, WriteIEReg, WriteIS, + WriteID32, WriteID64, + WriteIM32, WriteIM64]>; +def THXT8XReadNotShifted : SchedReadAdvance<2, [WriteImm, WriteI, + WriteISReg, WriteIEReg, WriteIS, + WriteID32, WriteID64, + WriteIM32, WriteIM64]>; +def THXT8XReadISReg : SchedReadVariant<[ + SchedVar<RegShiftedPred, [THXT8XReadShifted]>, + SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>; +def : SchedAlias<ReadISReg, THXT8XReadISReg>; + +def THXT8XReadIEReg : SchedReadVariant<[ + SchedVar<RegExtendedPred, [THXT8XReadShifted]>, + SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>; +def : SchedAlias<ReadIEReg, THXT8XReadIEReg>; + +// MAC - Operands are generally needed one cycle later in the MAC pipe. +// Accumulator operands are needed two cycles later. +def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI, + WriteISReg, WriteIEReg, WriteIS, + WriteID32, WriteID64, + WriteIM32, WriteIM64]>; +def : ReadAdvance<ReadIMA, 2, [WriteImm, WriteI, + WriteISReg, WriteIEReg, WriteIS, + WriteID32, WriteID64, + WriteIM32, WriteIM64]>; + +// Div +def : ReadAdvance<ReadID, 1, [WriteImm, WriteI, + WriteISReg, WriteIEReg, WriteIS, + WriteID32, WriteID64, + WriteIM32, WriteIM64]>; + +//===----------------------------------------------------------------------===// +// Subtarget-specific InstRW. + +//--- +// Branch +//--- +def : InstRW<[THXT8XWriteBR], (instregex "^B$")>; +def : InstRW<[THXT8XWriteBR], (instregex "^BL$")>; +def : InstRW<[THXT8XWriteBR], (instregex "^B..$")>; +def : InstRW<[THXT8XWriteBR], (instregex "^CBNZ")>; +def : InstRW<[THXT8XWriteBR], (instregex "^CBZ")>; +def : InstRW<[THXT8XWriteBR], (instregex "^TBNZ")>; +def : InstRW<[THXT8XWriteBR], (instregex "^TBZ")>; +def : InstRW<[THXT8XWriteBRR], (instregex "^BR$")>; +def : InstRW<[THXT8XWriteBRR], (instregex "^BLR$")>; + +//--- +// Ret +//--- +def : InstRW<[THXT8XWriteRET], (instregex "^RET$")>; + +//--- +// Miscellaneous +//--- +def : InstRW<[WriteI], (instrs COPY)>; + +//--- +// Vector Loads +//--- +def : InstRW<[THXT8XWriteVLD1], (instregex "LD1i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[THXT8XWriteVLD1], (instregex "LD2i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[THXT8XWriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>; +def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; +def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; + +def : InstRW<[THXT8XWriteVLD2], (instregex "LD3i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[THXT8XWriteVLD3], (instregex "LD3Threev(2d)$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; + +def : InstRW<[THXT8XWriteVLD2], (instregex "LD4i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[THXT8XWriteVLD4], (instregex "LD4Fourv(2d)$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; + +//--- +// Vector Stores +//--- +def : InstRW<[THXT8XWriteVST1], (instregex "ST1i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[THXT8XWriteVST1], (instregex "ST2i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[THXT8XWriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[THXT8XWriteVST2], (instregex "ST3i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[THXT8XWriteVST2], (instregex "ST3Threev(2d)$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; + +def : InstRW<[THXT8XWriteVST2], (instregex "ST4i(8|16|32|64)$")>; +def : InstRW<[THXT8XWriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[THXT8XWriteVST2], (instregex "ST4Fourv(2d)$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; +def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; + +//--- +// Floating Point MAC, DIV, SQRT +//--- +def : InstRW<[THXT8XWriteFMAC], (instregex "^FN?M(ADD|SUB).*")>; +def : InstRW<[THXT8XWriteFMAC], (instregex "^FML(A|S).*")>; +def : InstRW<[THXT8XWriteFDivSP], (instrs FDIVSrr)>; +def : InstRW<[THXT8XWriteFDivDP], (instrs FDIVDrr)>; +def : InstRW<[THXT8XWriteFDivSP], (instregex "^FDIVv.*32$")>; +def : InstRW<[THXT8XWriteFDivDP], (instregex "^FDIVv.*64$")>; +def : InstRW<[THXT8XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; +def : InstRW<[THXT8XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; + +} diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td new file mode 100644 index 000000000..bee3392b6 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SchedThunderX2T99.td @@ -0,0 +1,1880 @@ +//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 ---*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the scheduling model for Cavium ThunderX2T99 +// processors. +// Based on Broadcom Vulcan. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// 2. Pipeline Description. + +def ThunderX2T99Model : SchedMachineModel { + let IssueWidth = 4; // 4 micro-ops dispatched at a time. + let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer. + let LoadLatency = 4; // Optimistic load latency. + let MispredictPenalty = 12; // Extra cycles for mispredicted branch. + // Determined via a mix of micro-arch details and experimentation. + let LoopMicroOpBufferSize = 128; + let PostRAScheduler = 1; // Using PostRA sched. + let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = [HasSVE]; + + // FIXME: Remove when all errors have been fixed. + let FullInstRWOverlapCheck = 0; +} + +let SchedModel = ThunderX2T99Model in { + +// Define the issue ports. + +// Port 0: ALU, FP/SIMD. +def THX2T99P0 : ProcResource<1>; + +// Port 1: ALU, FP/SIMD, integer mul/div. +def THX2T99P1 : ProcResource<1>; + +// Port 2: ALU, Branch. +def THX2T99P2 : ProcResource<1>; + +// Port 3: Store data. +def THX2T99P3 : ProcResource<1>; + +// Port 4: Load/store. +def THX2T99P4 : ProcResource<1>; + +// Port 5: Load/store. +def THX2T99P5 : ProcResource<1>; + +// Define groups for the functional units on each issue port. Each group +// created will be used by a WriteRes later on. +// +// NOTE: Some groups only contain one member. This is a way to create names for +// the various functional units that share a single issue port. For example, +// "THX2T99I1" for ALU ops on port 1 and "THX2T99F1" for FP ops on port 1. + +// Integer divide and multiply micro-ops only on port 1. +def THX2T99I1 : ProcResGroup<[THX2T99P1]>; + +// Branch micro-ops only on port 2. +def THX2T99I2 : ProcResGroup<[THX2T99P2]>; + +// ALU micro-ops on ports 0, 1, and 2. +def THX2T99I012 : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2]>; + +// Crypto FP/SIMD micro-ops only on port 1. +def THX2T99F1 : ProcResGroup<[THX2T99P1]>; + +// FP/SIMD micro-ops on ports 0 and 1. +def THX2T99F01 : ProcResGroup<[THX2T99P0, THX2T99P1]>; + +// Store data micro-ops only on port 3. +def THX2T99SD : ProcResGroup<[THX2T99P3]>; + +// Load/store micro-ops on ports 4 and 5. +def THX2T99LS01 : ProcResGroup<[THX2T99P4, THX2T99P5]>; + +// 60 entry unified scheduler. +def THX2T99Any : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2, + THX2T99P3, THX2T99P4, THX2T99P5]> { + let BufferSize = 60; +} + +// Define commonly used write types for InstRW specializations. +// All definitions follow the format: THX2T99Write_<NumCycles>Cyc_<Resources>. + +// 3 cycles on I1. +def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 3; + let NumMicroOps = 2; +} + +// 1 cycles on I2. +def THX2T99Write_1Cyc_I2 : SchedWriteRes<[THX2T99I2]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 4 cycles on I1. +def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// 23 cycles on I1. +def THX2T99Write_23Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 23; + let ResourceCycles = [13, 23]; + let NumMicroOps = 4; +} + +// 39 cycles on I1. +def THX2T99Write_39Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 39; + let ResourceCycles = [13, 39]; + let NumMicroOps = 4; +} + +// 1 cycle on I0, I1, or I2. +def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 2 cycles on I0, I1, or I2. +def THX2T99Write_2Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 2; + let NumMicroOps = 2; +} + +// 4 cycles on I0, I1, or I2. +def THX2T99Write_4Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 2; + let NumMicroOps = 3; +} + +// 5 cycles on I0, I1, or I2. +def THX2T99Write_5Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 2; + let NumMicroOps = 3; +} + +// 5 cycles on F1. +def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { + let Latency = 5; + let NumMicroOps = 2; +} + +// 7 cycles on F1. +def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { + let Latency = 7; + let NumMicroOps = 2; +} + +// 4 cycles on F0 or F1. +def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// 5 cycles on F0 or F1. +def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 5; + let NumMicroOps = 2; +} + +// 6 cycles on F0 or F1. +def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 7 cycles on F0 or F1. +def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 7; + let NumMicroOps = 3; +} + +// 8 cycles on F0 or F1. +def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 8; + let NumMicroOps = 3; +} + +// 10 cycles on F0 or F1. +def THX2T99Write_10Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 10; + let NumMicroOps = 3; +} + +// 16 cycles on F0 or F1. +def THX2T99Write_16Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 16; + let NumMicroOps = 3; + let ResourceCycles = [8]; +} + +// 23 cycles on F0 or F1. +def THX2T99Write_23Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 23; + let NumMicroOps = 3; + let ResourceCycles = [11]; +} + +// 1 cycles on LS0 or LS1. +def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 0; +} + +// 1 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_1Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 0; + let NumMicroOps = 2; +} + +// 1 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_1Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 0; + let NumMicroOps = 3; +} + +// 2 cycles on LS0 or LS1. +def THX2T99Write_2Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 4 cycles on LS0 or LS1. +def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 4; + let NumMicroOps = 4; +} + +// 5 cycles on LS0 or LS1. +def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 6 cycles on LS0 or LS1. +def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 4 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_4Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 4; + let NumMicroOps = 3; +} + +// 4 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_4Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 4; + let NumMicroOps = 3; +} + +// 5 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_5Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_5Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 6 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_6Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 6; + let NumMicroOps = 4; +} + +// 6 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_6Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 1 cycles on LS0 or LS1 and F0 or F1. +def THX2T99Write_1Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 5 cycles on LS0 or LS1 and F0 or F1. +def THX2T99Write_5Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 6 cycles on LS0 or LS1 and F0 or F1. +def THX2T99Write_6Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 7 cycles on LS0 or LS1 and F0 or F1. +def THX2T99Write_7Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { + let Latency = 7; + let NumMicroOps = 3; +} + +// 8 cycles on LS0 or LS1 and F0 or F1. +def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { + let Latency = 8; + let NumMicroOps = 3; +} + +// 8 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_8Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 8; + let NumMicroOps = 4; +} + +// 12 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_12Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 12; + let NumMicroOps = 6; +} + +// 16 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_16Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 16; + let NumMicroOps = 8; +} + +// 24 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_24Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 24; + let NumMicroOps = 12; +} + +// 32 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_32Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 32; + let NumMicroOps = 16; +} + +// Define commonly used read types. + +// No forwarding is provided for these types. +def : ReadAdvance<ReadI, 0>; +def : ReadAdvance<ReadISReg, 0>; +def : ReadAdvance<ReadIEReg, 0>; +def : ReadAdvance<ReadIM, 0>; +def : ReadAdvance<ReadIMA, 0>; +def : ReadAdvance<ReadID, 0>; +def : ReadAdvance<ReadExtrHi, 0>; +def : ReadAdvance<ReadAdrBase, 0>; +def : ReadAdvance<ReadVLD, 0>; + +//===----------------------------------------------------------------------===// +// 3. Instruction Tables. + +//--- +// 3.1 Branch Instructions +//--- + +// Branch, immed +// Branch and link, immed +// Compare and branch +def : WriteRes<WriteBr, [THX2T99I2]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// Branch, register +// Branch and link, register != LR +// Branch and link, register = LR +def : WriteRes<WriteBrReg, [THX2T99I2]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def : WriteRes<WriteSys, []> { let Latency = 1; } +def : WriteRes<WriteBarrier, []> { let Latency = 1; } +def : WriteRes<WriteHint, []> { let Latency = 1; } + +def : WriteRes<WriteAtomic, []> { + let Latency = 4; + let NumMicroOps = 2; +} + +//--- +// Branch +//--- +def : InstRW<[THX2T99Write_1Cyc_I2], (instrs B, BL, BR, BLR)>; +def : InstRW<[THX2T99Write_1Cyc_I2], (instrs RET)>; +def : InstRW<[THX2T99Write_1Cyc_I2], (instregex "^B..$")>; +def : InstRW<[THX2T99Write_1Cyc_I2], + (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>; + +//--- +// 3.2 Arithmetic and Logical Instructions +// 3.3 Move and Shift Instructions +//--- + + +// ALU, basic +// Conditional compare +// Conditional select +// Address generation +def : WriteRes<WriteI, [THX2T99I012]> { + let Latency = 1; + let ResourceCycles = [1]; + let NumMicroOps = 2; +} + +def : InstRW<[WriteI], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC(W|X)r", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", + "SBCS(W|X)r", "CCMN(W|X)(i|r)", + "CCMP(W|X)(i|r)", "CSEL(W|X)r", + "CSINC(W|X)r", "CSINV(W|X)r", + "CSNEG(W|X)r")>; + +def : InstRW<[WriteI], (instrs COPY)>; + +// ALU, extend and/or shift +def : WriteRes<WriteISReg, [THX2T99I012]> { + let Latency = 2; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} + +def : InstRW<[WriteISReg], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC(W|X)r", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", + "SBCS(W|X)r", "CCMN(W|X)(i|r)", + "CCMP(W|X)(i|r)", "CSEL(W|X)r", + "CSINC(W|X)r", "CSINV(W|X)r", + "CSNEG(W|X)r")>; + +def : WriteRes<WriteIEReg, [THX2T99I012]> { + let Latency = 1; + let ResourceCycles = [1]; + let NumMicroOps = 2; +} + +def : InstRW<[WriteIEReg], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC(W|X)r", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", + "SBCS(W|X)r", "CCMN(W|X)(i|r)", + "CCMP(W|X)(i|r)", "CSEL(W|X)r", + "CSINC(W|X)r", "CSINV(W|X)r", + "CSNEG(W|X)r")>; + +// Move immed +def : WriteRes<WriteImm, [THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def : InstRW<[THX2T99Write_1Cyc_I012], + (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; + +def : InstRW<[THX2T99Write_1Cyc_I012], + (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>; + +// Variable shift +def : WriteRes<WriteIS, [THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 2; +} + +//--- +// 3.4 Divide and Multiply Instructions +//--- + +// Divide, W-form +// Latency range of 13-23/13-39. +def : WriteRes<WriteID32, [THX2T99I1]> { + let Latency = 39; + let ResourceCycles = [39]; + let NumMicroOps = 4; +} + +// Divide, X-form +def : WriteRes<WriteID64, [THX2T99I1]> { + let Latency = 23; + let ResourceCycles = [23]; + let NumMicroOps = 4; +} + +// Multiply accumulate, W-form +def : WriteRes<WriteIM32, [THX2T99I012]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// Multiply accumulate, X-form +def : WriteRes<WriteIM64, [THX2T99I012]> { + let Latency = 5; + let NumMicroOps = 3; +} + +//def : InstRW<[WriteIM32, ReadIM, ReadIM, ReadIMA, THX2T99Write_5Cyc_I012], +// (instrs MADDWrrr, MSUBWrrr)>; +def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>; +def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>; +def : InstRW<[THX2T99Write_5Cyc_I012], + (instregex "(S|U)(MADDL|MSUBL)rrr")>; + +def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>; +def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>; + +// Bitfield extract, two reg +def : WriteRes<WriteExtr, [THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// Multiply high +def : InstRW<[THX2T99Write_4Cyc_I1], (instrs SMULHrr, UMULHrr)>; + +// Miscellaneous Data-Processing Instructions +// Bitfield extract +def : InstRW<[THX2T99Write_1Cyc_I012], (instrs EXTRWrri, EXTRXrri)>; + +// Bitifield move - basic +def : InstRW<[THX2T99Write_1Cyc_I012], + (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>; + +// Bitfield move, insert +def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "^BFM")>; +def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "(S|U)?BFM.*")>; + +// Count leading +def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$", + "^CLZ(W|X)r$")>; + +// Reverse bits +def : InstRW<[THX2T99Write_1Cyc_I012], (instrs RBITWr, RBITXr)>; + +// Cryptography Extensions +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES[DE]")>; +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AESI?MC")>; +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1SU0")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1(H|SU1)")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1[CMP]")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256SU0")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256(H|H2|SU1)")>; + +// CRC Instructions +// def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32", "^CRC32C")>; +def : InstRW<[THX2T99Write_4Cyc_I1], + (instrs CRC32Brr, CRC32Hrr, CRC32Wrr, CRC32Xrr)>; + +def : InstRW<[THX2T99Write_4Cyc_I1], + (instrs CRC32CBrr, CRC32CHrr, CRC32CWrr, CRC32CXrr)>; + +// Reverse bits/bytes +// NOTE: Handled by WriteI. + +//--- +// 3.6 Load Instructions +// 3.10 FP Load Instructions +//--- + +// Load register, literal +// Load register, unscaled immed +// Load register, immed unprivileged +// Load register, unsigned immed +def : WriteRes<WriteLD, [THX2T99LS01]> { + let Latency = 4; + let NumMicroOps = 4; +} + +// Load register, immed post-index +// NOTE: Handled by WriteLD, WriteI. +// Load register, immed pre-index +// NOTE: Handled by WriteLD, WriteAdr. +def : WriteRes<WriteAdr, [THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// Load pair, immed offset, normal +// Load pair, immed offset, signed words, base != SP +// Load pair, immed offset signed words, base = SP +// LDP only breaks into *one* LS micro-op. Thus +// the resources are handled by WriteLD. +def : WriteRes<WriteLDHi, []> { + let Latency = 5; + let NumMicroOps = 5; +} + +// Load register offset, basic +// Load register, register offset, scale by 4/8 +// Load register, register offset, scale by 2 +// Load register offset, extend +// Load register, register offset, extend, scale by 4/8 +// Load register, register offset, extend, scale by 2 +def THX2T99WriteLDIdx : SchedWriteVariant<[ + SchedVar<ScaledIdxPred, [THX2T99Write_6Cyc_LS01_I012_I012]>, + SchedVar<NoSchedPred, [THX2T99Write_5Cyc_LS01_I012]>]>; +def : SchedAlias<WriteLDIdx, THX2T99WriteLDIdx>; + +def THX2T99ReadAdrBase : SchedReadVariant<[ + SchedVar<ScaledIdxPred, [ReadDefault]>, + SchedVar<NoSchedPred, [ReadDefault]>]>; +def : SchedAlias<ReadAdrBase, THX2T99ReadAdrBase>; + +// Load pair, immed pre-index, normal +// Load pair, immed pre-index, signed words +// Load pair, immed post-index, normal +// Load pair, immed post-index, signed words +// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPDi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPQi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPSi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPWi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPXi)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPDi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPQi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSWi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPWi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPXi)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRBui)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDui)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRHui)>; +def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRQui)>; +def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRSui)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDl)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRQl)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRWl)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRXl)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRBi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRHi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRXi)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSWi)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRBpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRHpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRXpre)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpost)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpost)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPXpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRBpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRHpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRXpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPXpre)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRBpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRHpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRXpre)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPXpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRBpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRHpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRXpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroW)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroX)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRBroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRBroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRDroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRQroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHXroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRXroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRBroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRDroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRQroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHXroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRXroX)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBBi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURDi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHHi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURQi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSWi)>; + +//--- +// Prefetch +//--- +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMl)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFUMi)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMui)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroW)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroX)>; + +//-- +// 3.7 Store Instructions +// 3.11 FP Store Instructions +//-- + +// Store register, unscaled immed +// Store register, immed unprivileged +// Store register, unsigned immed +def : WriteRes<WriteST, [THX2T99LS01, THX2T99SD]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// Store register, immed post-index +// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase + +// Store register, immed pre-index +// NOTE: Handled by WriteAdr, WriteST + +// Store register, register offset, basic +// Store register, register offset, scaled by 4/8 +// Store register, register offset, scaled by 2 +// Store register, register offset, extend +// Store register, register offset, extend, scale by 4/8 +// Store register, register offset, extend, scale by 1 +def : WriteRes<WriteSTIdx, [THX2T99LS01, THX2T99SD, THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 3; +} + +// Store pair, immed offset, W-form +// Store pair, immed offset, X-form +def : WriteRes<WriteSTP, [THX2T99LS01, THX2T99SD]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// Store pair, immed post-index, W-form +// Store pair, immed post-index, X-form +// Store pair, immed pre-index, W-form +// Store pair, immed pre-index, X-form +// NOTE: Handled by WriteAdr, WriteSTP. + +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBBi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURDi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHHi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURQi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURSi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURWi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURXi)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRBi)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRHi)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRWi)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRXi)>; + +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPDi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPQi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPXi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPWi)>; + +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPDi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPQi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPXi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPWi)>; + +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRBui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRBui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRDui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRDui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRHui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRHui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRQui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRQui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRXui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRXui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRWui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRWui)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPXpre, STPXpost)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRXpre, STRXpost)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBroW, STRBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBroW, STRBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBBroW, STRBBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBBroW, STRBBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRDroW, STRDroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRDroW, STRDroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHroW, STRHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHroW, STRHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHHroW, STRHHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHHroW, STRHHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRQroW, STRQroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRQroW, STRQroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRSroW, STRSroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRSroW, STRSroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRWroW, STRWroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRWroW, STRWroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRXroW, STRXroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRXroW, STRXroX)>; + +//--- +// 3.8 FP Data Processing Instructions +//--- + +// FP absolute value +// FP min/max +// FP negate +def : WriteRes<WriteF, [THX2T99F01]> { + let Latency = 5; + let NumMicroOps = 2; +} + +// FP arithmetic +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>; + +// FP compare +def : WriteRes<WriteFCmp, [THX2T99F01]> { + let Latency = 5; + let NumMicroOps = 2; +} + +// FP Mul, Div, Sqrt +def : WriteRes<WriteFDiv, [THX2T99F01]> { + let Latency = 22; + let ResourceCycles = [19]; +} + +def THX2T99XWriteFDiv : SchedWriteRes<[THX2T99F01]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX2T99XWriteFDivSP : SchedWriteRes<[THX2T99F01]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX2T99XWriteFDivDP : SchedWriteRes<[THX2T99F01]> { + let Latency = 23; + let ResourceCycles = [12]; + let NumMicroOps = 4; +} + +def THX2T99XWriteFSqrtSP : SchedWriteRes<[THX2T99F01]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX2T99XWriteFSqrtDP : SchedWriteRes<[THX2T99F01]> { + let Latency = 23; + let ResourceCycles = [12]; + let NumMicroOps = 4; +} + +// FP divide, S-form +// FP square root, S-form +def : InstRW<[THX2T99XWriteFDivSP], (instrs FDIVSrr)>; +def : InstRW<[THX2T99XWriteFSqrtSP], (instrs FSQRTSr)>; +def : InstRW<[THX2T99XWriteFDivSP], (instregex "^FDIVv.*32$")>; +def : InstRW<[THX2T99XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; +def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "^FDIVSrr", "^FSQRTSr")>; + +// FP divide, D-form +// FP square root, D-form +def : InstRW<[THX2T99XWriteFDivDP], (instrs FDIVDrr)>; +def : InstRW<[THX2T99XWriteFSqrtDP], (instrs FSQRTDr)>; +def : InstRW<[THX2T99XWriteFDivDP], (instregex "^FDIVv.*64$")>; +def : InstRW<[THX2T99XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; +def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDr")>; + +// FP multiply +// FP multiply accumulate +def : WriteRes<WriteFMul, [THX2T99F01]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def THX2T99XWriteFMul : SchedWriteRes<[THX2T99F01]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def THX2T99XWriteFMulAcc : SchedWriteRes<[THX2T99F01]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def : InstRW<[THX2T99XWriteFMul], (instregex "^FMUL", "^FNMUL")>; +def : InstRW<[THX2T99XWriteFMulAcc], + (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>; + +// FP round to integral +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; + +// FP select +def : InstRW<[THX2T99Write_4Cyc_F01], (instregex "^FCSEL")>; + +//--- +// 3.9 FP Miscellaneous Instructions +//--- + +// FP convert, from vec to vec reg +// FP convert, from gen to vec reg +// FP convert, from vec to gen reg +def : WriteRes<WriteFCvt, [THX2T99F01]> { + let Latency = 7; + let NumMicroOps = 3; +} + +// FP move, immed +// FP move, register +def : WriteRes<WriteFImm, [THX2T99F01]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// FP transfer, from gen to vec reg +// FP transfer, from vec to gen reg +def : WriteRes<WriteFCopy, [THX2T99F01]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; + +//--- +// 3.12 ASIMD Integer Instructions +//--- + +// ASIMD absolute diff, D-form +// ASIMD absolute diff, Q-form +// ASIMD absolute diff accum, D-form +// ASIMD absolute diff accum, Q-form +// ASIMD absolute diff accum long +// ASIMD absolute diff long +// ASIMD arith, basic +// ASIMD arith, complex +// ASIMD compare +// ASIMD logical (AND, BIC, EOR) +// ASIMD max/min, basic +// ASIMD max/min, reduce, 4H/4S +// ASIMD max/min, reduce, 8B/8H +// ASIMD max/min, reduce, 16B +// ASIMD multiply, D-form +// ASIMD multiply, Q-form +// ASIMD multiply accumulate long +// ASIMD multiply accumulate saturating long +// ASIMD multiply long +// ASIMD pairwise add and accumulate +// ASIMD shift accumulate +// ASIMD shift by immed, basic +// ASIMD shift by immed and insert, basic, D-form +// ASIMD shift by immed and insert, basic, Q-form +// ASIMD shift by immed, complex +// ASIMD shift by register, basic, D-form +// ASIMD shift by register, basic, Q-form +// ASIMD shift by register, complex, D-form +// ASIMD shift by register, complex, Q-form +def : WriteRes<WriteV, [THX2T99F01]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [4]; +} + +// ASIMD arith, reduce, 4H/4S +// ASIMD arith, reduce, 8B/8H +// ASIMD arith, reduce, 16B + +// ASIMD logical (MVN (alias for NOT), ORN, ORR) +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; + +// ASIMD arith, reduce +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; + +// ASIMD polynomial (8x8) multiply long +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^(S|U|SQD)MULL")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL(v8i8|v16i8)")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^PMULL(v1i64|v2i64)")>; + +// ASIMD absolute diff accum, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; +// ASIMD absolute diff accum, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; +// ASIMD absolute diff accum long +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]ABAL")>; +// ASIMD arith, reduce, 4H/4S +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; +// ASIMD arith, reduce, 8B +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; +// ASIMD arith, reduce, 16B/16H +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^[SU]?ADDL?Vv16i8v$")>; +// ASIMD max/min, reduce, 4H/4S +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; +// ASIMD max/min, reduce, 8B/8H +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; +// ASIMD max/min, reduce, 16B/16H +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^[SU](MIN|MAX)Vv16i8v$")>; +// ASIMD multiply, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^(P?MUL|SQR?DMULH)" # + "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" # + "(_indexed)?$")>; +// ASIMD multiply, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; +// ASIMD multiply accumulate, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; +// ASIMD multiply accumulate, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; +// ASIMD shift accumulate +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>; + +// ASIMD shift by immed, basic +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "RSHRNv","SHRNv", "SQRSHRNv","SQRSHRUNv", + "SQSHRNv","SQSHRUNv", "UQRSHRNv", + "UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>; +// ASIMD shift by immed, complex +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^[SU]?(Q|R){1,2}SHR")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SQSHLU")>; +// ASIMD shift by register, basic, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; +// ASIMD shift by register, complex, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU][QR]{1,2}SHL" # + "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; +// ASIMD shift by register, complex, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; + +// ASIMD Arithmetic +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(ADD|SUB)HNv.*")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(RADD|RSUB)HNv.*")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD", + "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" # + "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADALP","^UADALP")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLPv","^UADDLPv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLV","^UADDLV")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ADDVv","^SMAXVv","^UMAXVv","^SMINVv","^UMINVv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SABAv","^UABAv","^SABALv","^UABALv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SQADDv","^SQSUBv","^UQADDv","^UQSUBv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SUQADDv","^USQADDv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ADDHNv","^RADDHNv", "^RSUBHNv", + "^SQABS", "^SQADD", "^SQNEG", "^SQSUB", + "^SRHADD", "^SUBHNv", "^SUQADD", + "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^CMEQv","^CMGEv","^CMGTv", + "^CMLEv","^CMLTv", "^CMHIv","^CMHSv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SMAXv","^SMINv","^UMAXv","^UMINv", + "^SMAXPv","^SMINPv","^UMAXPv","^UMINPv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SABDv","^UABDv", "^SABDLv","^UABDLv")>; + +//--- +// 3.13 ASIMD Floating-point Instructions +//--- + +// ASIMD FP absolute value +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FABSv")>; + +// ASIMD FP arith, normal, D-form +// ASIMD FP arith, normal, Q-form +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FABDv", "^FADDv", "^FSUBv")>; + +// ASIMD FP arith,pairwise, D-form +// ASIMD FP arith, pairwise, Q-form +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADDPv")>; + +// ASIMD FP compare, D-form +// ASIMD FP compare, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv", + "^FCMGTv", "^FCMLEv", + "^FCMLTv")>; + +// ASIMD FP round, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^FRINT[AIMNPXZ](v2f32)")>; +// ASIMD FP round, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; + +// ASIMD FP convert, long +// ASIMD FP convert, narrow +// ASIMD FP convert, other, D-form +// ASIMD FP convert, other, Q-form +// NOTE: Handled by WriteV. + +// ASIMD FP convert, long and narrow +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^FCVT(L|N|XN)v")>; +// ASIMD FP convert, other, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; +// ASIMD FP convert, other, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP divide, D-form, F32 +def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv2f32)>; +def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv2f32")>; + +// ASIMD FP divide, Q-form, F32 +def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv4f32)>; +def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv4f32")>; + +// ASIMD FP divide, Q-form, F64 +def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVv2f64)>; +def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "FDIVv2f64")>; + +// ASIMD FP max/min, normal, D-form +// ASIMD FP max/min, normal, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXv", "^FMAXNMv", + "^FMINv", "^FMINNMv")>; + +// ASIMD FP max/min, pairwise, D-form +// ASIMD FP max/min, pairwise, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXPv", "^FMAXNMPv", + "^FMINPv", "^FMINNMPv")>; + +// ASIMD FP max/min, reduce +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv", + "^FMINVv", "^FMINNMVv")>; + +// ASIMD FP multiply, D-form, FZ +// ASIMD FP multiply, D-form, no FZ +// ASIMD FP multiply, Q-form, FZ +// ASIMD FP multiply, Q-form, no FZ +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP multiply accumulate, Dform, FZ +// ASIMD FP multiply accumulate, Dform, no FZ +// ASIMD FP multiply accumulate, Qform, FZ +// ASIMD FP multiply accumulate, Qform, no FZ +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP negate +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FNEGv")>; + +//-- +// 3.14 ASIMD Miscellaneous Instructions +//-- + +// ASIMD bit reverse +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>; + +// ASIMD bitwise insert, D-form +// ASIMD bitwise insert, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^BIFv", "^BITv", "^BSLv")>; + +// ASIMD count, D-form +// ASIMD count, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^CLSv", "^CLZv", "^CNTv")>; + +// ASIMD duplicate, gen reg +// ASIMD duplicate, element +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CPY")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv.+gpr")>; + +// ASIMD extract +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^EXTv")>; + +// ASIMD extract narrow +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^XTNv")>; + +// ASIMD extract narrow, saturating +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>; + +// ASIMD insert, element to element +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; + +// ASIMD transfer, element to gen reg +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>; + +// ASIMD move, integer immed +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv")>; + +// ASIMD move, FP immed +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>; + +// ASIMD table lookup, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8One")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Two")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Three")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Four")>; + +// ASIMD table lookup, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8One")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Two")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Three")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Four")>; + +// ASIMD transpose +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1", "^TRN2")>; + +// ASIMD unzip/zip +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>; + +// ASIMD reciprocal estimate, D-form +// ASIMD reciprocal estimate, Q-form +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", + "^FRSQRTEv", "^URSQRTEv")>; + +// ASIMD reciprocal step, D-form, FZ +// ASIMD reciprocal step, D-form, no FZ +// ASIMD reciprocal step, Q-form, FZ +// ASIMD reciprocal step, Q-form, no FZ +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>; + +// ASIMD reverse +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^REV16v", "^REV32v", "^REV64v")>; + +// ASIMD table lookup, D-form +// ASIMD table lookup, Q-form +def : InstRW<[THX2T99Write_8Cyc_F01], (instregex "^TBLv", "^TBXv")>; + +// ASIMD transfer, element to word or word +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>; + +// ASIMD transfer, element to gen reg +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "(S|U)MOVv.*")>; + +// ASIMD transfer gen reg to element +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; + +// ASIMD transpose +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1v", "^TRN2v", + "^UZP1v", "^UZP2v")>; + +// ASIMD unzip/zip +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>; + +//-- +// 3.15 ASIMD Load Instructions +//-- + +// ASIMD load, 1 element, multiple, 1 reg, D-form +// ASIMD load, 1 element, multiple, 1 reg, Q-form +def : InstRW<[THX2T99Write_4Cyc_LS01], + (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], + (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 2 reg, D-form +// ASIMD load, 1 element, multiple, 2 reg, Q-form +def : InstRW<[THX2T99Write_4Cyc_LS01], + (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], + (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 3 reg, D-form +// ASIMD load, 1 element, multiple, 3 reg, Q-form +def : InstRW<[THX2T99Write_5Cyc_LS01], + (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr], + (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 4 reg, D-form +// ASIMD load, 1 element, multiple, 4 reg, Q-form +def : InstRW<[THX2T99Write_6Cyc_LS01], + (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr], + (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, one lane, B/H/S +// ASIMD load, 1 element, one lane, D +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], + (instregex "^LD1i(8|16|32|64)_POST$")>; + +// ASIMD load, 1 element, all lanes, D-form, B/H/S +// ASIMD load, 1 element, all lanes, D-form, D +// ASIMD load, 1 element, all lanes, Q-form +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], + (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], + (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 2 element, multiple, D-form, B/H/S +// ASIMD load, 2 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], + (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], + (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 2 element, one lane, B/H +// ASIMD load, 2 element, one lane, S +// ASIMD load, 2 element, one lane, D +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], + (instregex "^LD2i(8|16|32|64)_POST$")>; + +// ASIMD load, 2 element, all lanes, D-form, B/H/S +// ASIMD load, 2 element, all lanes, D-form, D +// ASIMD load, 2 element, all lanes, Q-form +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], + (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], + (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 3 element, multiple, D-form, B/H/S +// ASIMD load, 3 element, multiple, Q-form, B/H/S +// ASIMD load, 3 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_8Cyc_LS01_F01], + (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], + (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 3 element, one lone, B/H +// ASIMD load, 3 element, one lane, S +// ASIMD load, 3 element, one lane, D +def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], + (instregex "^LD3i(8|16|32|64)_POST$")>; + +// ASIMD load, 3 element, all lanes, D-form, B/H/S +// ASIMD load, 3 element, all lanes, D-form, D +// ASIMD load, 3 element, all lanes, Q-form, B/H/S +// ASIMD load, 3 element, all lanes, Q-form, D +def : InstRW<[THX2T99Write_7Cyc_LS01_F01], + (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], + (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 4 element, multiple, D-form, B/H/S +// ASIMD load, 4 element, multiple, Q-form, B/H/S +// ASIMD load, 4 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_8Cyc_LS01_F01], + (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], + (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 4 element, one lane, B/H +// ASIMD load, 4 element, one lane, S +// ASIMD load, 4 element, one lane, D +def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], + (instregex "^LD4i(8|16|32|64)_POST$")>; + +// ASIMD load, 4 element, all lanes, D-form, B/H/S +// ASIMD load, 4 element, all lanes, D-form, D +// ASIMD load, 4 element, all lanes, Q-form, B/H/S +// ASIMD load, 4 element, all lanes, Q-form, D +def : InstRW<[THX2T99Write_6Cyc_LS01_F01], + (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], + (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +//-- +// 3.16 ASIMD Store Instructions +//-- + +// ASIMD store, 1 element, multiple, 1 reg, D-form +// ASIMD store, 1 element, multiple, 1 reg, Q-form +def : InstRW<[THX2T99Write_1Cyc_LS01], + (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 2 reg, D-form +// ASIMD store, 1 element, multiple, 2 reg, Q-form +def : InstRW<[THX2T99Write_1Cyc_LS01], + (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 3 reg, D-form +// ASIMD store, 1 element, multiple, 3 reg, Q-form +def : InstRW<[THX2T99Write_1Cyc_LS01], + (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 4 reg, D-form +// ASIMD store, 1 element, multiple, 4 reg, Q-form +def : InstRW<[THX2T99Write_1Cyc_LS01], + (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, one lane, B/H/S +// ASIMD store, 1 element, one lane, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], + (instregex "^ST1i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST1i(8|16|32|64)_POST$")>; + +// ASIMD store, 2 element, multiple, D-form, B/H/S +// ASIMD store, 2 element, multiple, Q-form, B/H/S +// ASIMD store, 2 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], + (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 2 element, one lane, B/H/S +// ASIMD store, 2 element, one lane, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], + (instregex "^ST2i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST2i(8|16|32|64)_POST$")>; + +// ASIMD store, 3 element, multiple, D-form, B/H/S +// ASIMD store, 3 element, multiple, Q-form, B/H/S +// ASIMD store, 3 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], + (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 3 element, one lane, B/H +// ASIMD store, 3 element, one lane, S +// ASIMD store, 3 element, one lane, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST3i(8|16|32|64)_POST$")>; + +// ASIMD store, 4 element, multiple, D-form, B/H/S +// ASIMD store, 4 element, multiple, Q-form, B/H/S +// ASIMD store, 4 element, multiple, Q-form, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], + (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 4 element, one lane, B/H +// ASIMD store, 4 element, one lane, S +// ASIMD store, 4 element, one lane, D +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>; +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], + (instregex "^ST4i(8|16|32|64)_POST$")>; + +// V8.1a Atomics (LSE) +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs CASB, CASH, CASW, CASX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs CASAB, CASAH, CASAW, CASAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs CASLB, CASLH, CASLW, CASLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs CASALB, CASALH, CASALW, CASALX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDLARB, LDLARH, LDLARW, LDLARX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDADDB, LDADDH, LDADDW, LDADDX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDEORB, LDEORH, LDEORW, LDEORX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDSETB, LDSETH, LDSETW, LDSETX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX, + LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX, + LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX, + LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX, + LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX, + LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX, + LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX, + LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX, + LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX, + LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX, + LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX, + LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX, + LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs SWPB, SWPH, SWPW, SWPX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs SWPAB, SWPAH, SWPAW, SWPAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs SWPLB, SWPLH, SWPLW, SWPLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs SWPALB, SWPALH, SWPALW, SWPALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs STLLRB, STLLRH, STLLRW, STLLRX)>; + +} // SchedModel = ThunderX2T99Model + diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64Schedule.td b/capstone/suite/synctools/tablegen/AArch64/AArch64Schedule.td new file mode 100644 index 000000000..ce81f48ac --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64Schedule.td @@ -0,0 +1,106 @@ +//==-- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Define TII for use in SchedVariant Predicates. +// const MachineInstr *MI and const TargetSchedModel *SchedModel +// are defined by default. +def : PredicateProlog<[{ + const AArch64InstrInfo *TII = + static_cast<const AArch64InstrInfo*>(SchedModel->getInstrInfo()); + (void)TII; +}]>; + +// AArch64 Scheduler Definitions + +def WriteImm : SchedWrite; // MOVN, MOVZ +// TODO: Provide variants for MOV32/64imm Pseudos that dynamically +// select the correct sequence of WriteImms. + +def WriteI : SchedWrite; // ALU +def WriteISReg : SchedWrite; // ALU of Shifted-Reg +def WriteIEReg : SchedWrite; // ALU of Extended-Reg +def ReadI : SchedRead; // ALU +def ReadISReg : SchedRead; // ALU of Shifted-Reg +def ReadIEReg : SchedRead; // ALU of Extended-Reg +def WriteExtr : SchedWrite; // EXTR shifts a reg pair +def ReadExtrHi : SchedRead; // Read the high reg of the EXTR pair +def WriteIS : SchedWrite; // Shift/Scale +def WriteID32 : SchedWrite; // 32-bit Divide +def WriteID64 : SchedWrite; // 64-bit Divide +def ReadID : SchedRead; // 32/64-bit Divide +def WriteIM32 : SchedWrite; // 32-bit Multiply +def WriteIM64 : SchedWrite; // 64-bit Multiply +def ReadIM : SchedRead; // 32/64-bit Multiply +def ReadIMA : SchedRead; // 32/64-bit Multiply Accumulate +def WriteBr : SchedWrite; // Branch +def WriteBrReg : SchedWrite; // Indirect Branch + +def WriteLD : SchedWrite; // Load from base addr plus immediate offset +def WriteST : SchedWrite; // Store to base addr plus immediate offset +def WriteSTP : SchedWrite; // Store a register pair. +def WriteAdr : SchedWrite; // Address pre/post increment. + +def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled). +def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled). +def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST. + +// Predicate for determining when a shiftable register is shifted. +def RegShiftedPred : SchedPredicate<[{TII->hasShiftedReg(*MI)}]>; + +// Predicate for determining when a extendedable register is extended. +def RegExtendedPred : SchedPredicate<[{TII->hasExtendedReg(*MI)}]>; + +// ScaledIdxPred is true if a WriteLDIdx operand will be +// scaled. Subtargets can use this to dynamically select resources and +// latency for WriteLDIdx and ReadAdrBase. +def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(*MI)}]>; + +// Serialized two-level address load. +// EXAMPLE: LOADGot +def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>; + +// Serialized two-level address lookup. +// EXAMPLE: MOVaddr... +def WriteAdrAdr : WriteSequence<[WriteAdr, WriteAdr]>; + +// The second register of a load-pair. +// LDP,LDPSW,LDNP,LDXP,LDAXP +def WriteLDHi : SchedWrite; + +// Store-exclusive is a store followed by a dependent load. +def WriteSTX : WriteSequence<[WriteST, WriteLD]>; + +def WriteSys : SchedWrite; // Long, variable latency system ops. +def WriteBarrier : SchedWrite; // Memory barrier. +def WriteHint : SchedWrite; // Hint instruction. + +def WriteF : SchedWrite; // General floating-point ops. +def WriteFCmp : SchedWrite; // Floating-point compare. +def WriteFCvt : SchedWrite; // Float conversion. +def WriteFCopy : SchedWrite; // Float-int register copy. +def WriteFImm : SchedWrite; // Floating-point immediate. +def WriteFMul : SchedWrite; // Floating-point multiply. +def WriteFDiv : SchedWrite; // Floating-point division. + +def WriteV : SchedWrite; // Vector ops. +def WriteVLD : SchedWrite; // Vector loads. +def WriteVST : SchedWrite; // Vector stores. + +def WriteAtomic : SchedWrite; // Atomic memory operations (CAS, Swap, LDOP) + +// Read the unwritten lanes of the VLD's destination registers. +def ReadVLD : SchedRead; + +// Sequential vector load and shuffle. +def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteV]>; +def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>; + +// Store a shuffled vector. +def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>; +def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>; diff --git a/capstone/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td b/capstone/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td new file mode 100644 index 000000000..dbc4deaf3 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/AArch64SystemOperands.td @@ -0,0 +1,1332 @@ +//===- AArch64SystemOperands.td ----------------------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the symbolic operands permitted for various kinds of +// AArch64 system instruction. +// +//===----------------------------------------------------------------------===// + +include "llvm/TableGen/SearchableTable.td" + +//===----------------------------------------------------------------------===// +// AT (address translate) instruction options. +//===----------------------------------------------------------------------===// + +class AT<string name, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<14> Encoding; + let Encoding{13-11} = op1; + let Encoding{10-7} = crn; + let Encoding{6-3} = crm; + let Encoding{2-0} = op2; + code Requires = [{ {} }]; +} + +def : AT<"S1E1R", 0b000, 0b0111, 0b1000, 0b000>; +def : AT<"S1E2R", 0b100, 0b0111, 0b1000, 0b000>; +def : AT<"S1E3R", 0b110, 0b0111, 0b1000, 0b000>; +def : AT<"S1E1W", 0b000, 0b0111, 0b1000, 0b001>; +def : AT<"S1E2W", 0b100, 0b0111, 0b1000, 0b001>; +def : AT<"S1E3W", 0b110, 0b0111, 0b1000, 0b001>; +def : AT<"S1E0R", 0b000, 0b0111, 0b1000, 0b010>; +def : AT<"S1E0W", 0b000, 0b0111, 0b1000, 0b011>; +def : AT<"S12E1R", 0b100, 0b0111, 0b1000, 0b100>; +def : AT<"S12E1W", 0b100, 0b0111, 0b1000, 0b101>; +def : AT<"S12E0R", 0b100, 0b0111, 0b1000, 0b110>; +def : AT<"S12E0W", 0b100, 0b0111, 0b1000, 0b111>; + +let Requires = [{ {AArch64::HasV8_2aOps} }] in { +def : AT<"S1E1RP", 0b000, 0b0111, 0b1001, 0b000>; +def : AT<"S1E1WP", 0b000, 0b0111, 0b1001, 0b001>; +} + +//===----------------------------------------------------------------------===// +// DMB/DSB (data barrier) instruction options. +//===----------------------------------------------------------------------===// + +class DB<string name, bits<4> encoding> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<4> Encoding = encoding; +} + +def : DB<"oshld", 0x1>; +def : DB<"oshst", 0x2>; +def : DB<"osh", 0x3>; +def : DB<"nshld", 0x5>; +def : DB<"nshst", 0x6>; +def : DB<"nsh", 0x7>; +def : DB<"ishld", 0x9>; +def : DB<"ishst", 0xa>; +def : DB<"ish", 0xb>; +def : DB<"ld", 0xd>; +def : DB<"st", 0xe>; +def : DB<"sy", 0xf>; + +//===----------------------------------------------------------------------===// +// DC (data cache maintenance) instruction options. +//===----------------------------------------------------------------------===// + +class DC<string name, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<14> Encoding; + let Encoding{13-11} = op1; + let Encoding{10-7} = crn; + let Encoding{6-3} = crm; + let Encoding{2-0} = op2; + code Requires = [{ {} }]; +} + +def : DC<"ZVA", 0b011, 0b0111, 0b0100, 0b001>; +def : DC<"IVAC", 0b000, 0b0111, 0b0110, 0b001>; +def : DC<"ISW", 0b000, 0b0111, 0b0110, 0b010>; +def : DC<"CVAC", 0b011, 0b0111, 0b1010, 0b001>; +def : DC<"CSW", 0b000, 0b0111, 0b1010, 0b010>; +def : DC<"CVAU", 0b011, 0b0111, 0b1011, 0b001>; +def : DC<"CIVAC", 0b011, 0b0111, 0b1110, 0b001>; +def : DC<"CISW", 0b000, 0b0111, 0b1110, 0b010>; + +let Requires = [{ {AArch64::HasV8_2aOps} }] in +def : DC<"CVAP", 0b011, 0b0111, 0b1100, 0b001>; + +//===----------------------------------------------------------------------===// +// IC (instruction cache maintenance) instruction options. +//===----------------------------------------------------------------------===// + +class IC<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2, + bit needsreg> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<14> Encoding; + let Encoding{13-11} = op1; + let Encoding{10-7} = crn; + let Encoding{6-3} = crm; + let Encoding{2-0} = op2; + bit NeedsReg = needsreg; +} + +def : IC<"IALLUIS", 0b000, 0b0111, 0b0001, 0b000, 0>; +def : IC<"IALLU", 0b000, 0b0111, 0b0101, 0b000, 0>; +def : IC<"IVAU", 0b011, 0b0111, 0b0101, 0b001, 1>; + +//===----------------------------------------------------------------------===// +// ISB (instruction-fetch barrier) instruction options. +//===----------------------------------------------------------------------===// + +class ISB<string name, bits<4> encoding> : SearchableTable{ + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<4> Encoding; + let Encoding = encoding; +} + +def : ISB<"sy", 0xf>; + +//===----------------------------------------------------------------------===// +// TSB (Trace synchronization barrier) instruction options. +//===----------------------------------------------------------------------===// + +class TSB<string name, bits<4> encoding> : SearchableTable{ + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<4> Encoding; + let Encoding = encoding; + + code Requires = [{ {AArch64::HasV8_4aOps} }]; +} + +def : TSB<"csync", 0>; + +//===----------------------------------------------------------------------===// +// PRFM (prefetch) instruction options. +//===----------------------------------------------------------------------===// + +class PRFM<string name, bits<5> encoding> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<5> Encoding; + let Encoding = encoding; +} + +def : PRFM<"pldl1keep", 0x00>; +def : PRFM<"pldl1strm", 0x01>; +def : PRFM<"pldl2keep", 0x02>; +def : PRFM<"pldl2strm", 0x03>; +def : PRFM<"pldl3keep", 0x04>; +def : PRFM<"pldl3strm", 0x05>; +def : PRFM<"plil1keep", 0x08>; +def : PRFM<"plil1strm", 0x09>; +def : PRFM<"plil2keep", 0x0a>; +def : PRFM<"plil2strm", 0x0b>; +def : PRFM<"plil3keep", 0x0c>; +def : PRFM<"plil3strm", 0x0d>; +def : PRFM<"pstl1keep", 0x10>; +def : PRFM<"pstl1strm", 0x11>; +def : PRFM<"pstl2keep", 0x12>; +def : PRFM<"pstl2strm", 0x13>; +def : PRFM<"pstl3keep", 0x14>; +def : PRFM<"pstl3strm", 0x15>; + +//===----------------------------------------------------------------------===// +// SVE Prefetch instruction options. +//===----------------------------------------------------------------------===// + +class SVEPRFM<string name, bits<4> encoding> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<4> Encoding; + let Encoding = encoding; + code Requires = [{ {} }]; +} + +let Requires = [{ {AArch64::FeatureSVE} }] in { +def : SVEPRFM<"pldl1keep", 0x00>; +def : SVEPRFM<"pldl1strm", 0x01>; +def : SVEPRFM<"pldl2keep", 0x02>; +def : SVEPRFM<"pldl2strm", 0x03>; +def : SVEPRFM<"pldl3keep", 0x04>; +def : SVEPRFM<"pldl3strm", 0x05>; +def : SVEPRFM<"pstl1keep", 0x08>; +def : SVEPRFM<"pstl1strm", 0x09>; +def : SVEPRFM<"pstl2keep", 0x0a>; +def : SVEPRFM<"pstl2strm", 0x0b>; +def : SVEPRFM<"pstl3keep", 0x0c>; +def : SVEPRFM<"pstl3strm", 0x0d>; +} + +//===----------------------------------------------------------------------===// +// SVE Predicate patterns +//===----------------------------------------------------------------------===// + +class SVEPREDPAT<string name, bits<5> encoding> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<5> Encoding; + let Encoding = encoding; +} + +def : SVEPREDPAT<"pow2", 0x00>; +def : SVEPREDPAT<"vl1", 0x01>; +def : SVEPREDPAT<"vl2", 0x02>; +def : SVEPREDPAT<"vl3", 0x03>; +def : SVEPREDPAT<"vl4", 0x04>; +def : SVEPREDPAT<"vl5", 0x05>; +def : SVEPREDPAT<"vl6", 0x06>; +def : SVEPREDPAT<"vl7", 0x07>; +def : SVEPREDPAT<"vl8", 0x08>; +def : SVEPREDPAT<"vl16", 0x09>; +def : SVEPREDPAT<"vl32", 0x0a>; +def : SVEPREDPAT<"vl64", 0x0b>; +def : SVEPREDPAT<"vl128", 0x0c>; +def : SVEPREDPAT<"vl256", 0x0d>; +def : SVEPREDPAT<"mul4", 0x1d>; +def : SVEPREDPAT<"mul3", 0x1e>; +def : SVEPREDPAT<"all", 0x1f>; + +//===----------------------------------------------------------------------===// +// Exact FP Immediates. +// +// These definitions are used to create a lookup table with FP Immediates that +// is used for a few instructions that only accept a limited set of exact FP +// immediates values. +//===----------------------------------------------------------------------===// +class ExactFPImm<string name, string repr, bits<4> enum > : SearchableTable { + let SearchableFields = ["Enum", "Repr"]; + let EnumValueField = "Enum"; + + string Name = name; + bits<4> Enum = enum; + string Repr = repr; +} + +def : ExactFPImm<"zero", "0.0", 0x0>; +def : ExactFPImm<"half", "0.5", 0x1>; +def : ExactFPImm<"one", "1.0", 0x2>; +def : ExactFPImm<"two", "2.0", 0x3>; + +//===----------------------------------------------------------------------===// +// PState instruction options. +//===----------------------------------------------------------------------===// + +class PState<string name, bits<5> encoding> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<5> Encoding; + let Encoding = encoding; + code Requires = [{ {} }]; +} + +def : PState<"SPSel", 0b00101>; +def : PState<"DAIFSet", 0b11110>; +def : PState<"DAIFClr", 0b11111>; +// v8.1a "Privileged Access Never" extension-specific PStates +let Requires = [{ {AArch64::HasV8_1aOps} }] in +def : PState<"PAN", 0b00100>; +// v8.2a "User Access Override" extension-specific PStates +let Requires = [{ {AArch64::HasV8_2aOps} }] in +def : PState<"UAO", 0b00011>; +// v8.4a timining insensitivity of data processing instructions +let Requires = [{ {AArch64::HasV8_4aOps} }] in +def : PState<"DIT", 0b11010>; + +//===----------------------------------------------------------------------===// +// PSB instruction options. +//===----------------------------------------------------------------------===// + +class PSB<string name, bits<5> encoding> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<5> Encoding; + let Encoding = encoding; +} + +def : PSB<"csync", 0x11>; + +//===----------------------------------------------------------------------===// +// TLBI (translation lookaside buffer invalidate) instruction options. +//===----------------------------------------------------------------------===// + +class TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2, bit needsreg = 1> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<14> Encoding; + let Encoding{13-11} = op1; + let Encoding{10-7} = crn; + let Encoding{6-3} = crm; + let Encoding{2-0} = op2; + bit NeedsReg = needsreg; + code Requires = [{ {} }]; +} + +def : TLBI<"IPAS2E1IS", 0b100, 0b1000, 0b0000, 0b001>; +def : TLBI<"IPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b101>; +def : TLBI<"VMALLE1IS", 0b000, 0b1000, 0b0011, 0b000, 0>; +def : TLBI<"ALLE2IS", 0b100, 0b1000, 0b0011, 0b000, 0>; +def : TLBI<"ALLE3IS", 0b110, 0b1000, 0b0011, 0b000, 0>; +def : TLBI<"VAE1IS", 0b000, 0b1000, 0b0011, 0b001>; +def : TLBI<"VAE2IS", 0b100, 0b1000, 0b0011, 0b001>; +def : TLBI<"VAE3IS", 0b110, 0b1000, 0b0011, 0b001>; +def : TLBI<"ASIDE1IS", 0b000, 0b1000, 0b0011, 0b010>; +def : TLBI<"VAAE1IS", 0b000, 0b1000, 0b0011, 0b011>; +def : TLBI<"ALLE1IS", 0b100, 0b1000, 0b0011, 0b100, 0>; +def : TLBI<"VALE1IS", 0b000, 0b1000, 0b0011, 0b101>; +def : TLBI<"VALE2IS", 0b100, 0b1000, 0b0011, 0b101>; +def : TLBI<"VALE3IS", 0b110, 0b1000, 0b0011, 0b101>; +def : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>; +def : TLBI<"VAALE1IS", 0b000, 0b1000, 0b0011, 0b111>; +def : TLBI<"IPAS2E1", 0b100, 0b1000, 0b0100, 0b001>; +def : TLBI<"IPAS2LE1", 0b100, 0b1000, 0b0100, 0b101>; +def : TLBI<"VMALLE1", 0b000, 0b1000, 0b0111, 0b000, 0>; +def : TLBI<"ALLE2", 0b100, 0b1000, 0b0111, 0b000, 0>; +def : TLBI<"ALLE3", 0b110, 0b1000, 0b0111, 0b000, 0>; +def : TLBI<"VAE1", 0b000, 0b1000, 0b0111, 0b001>; +def : TLBI<"VAE2", 0b100, 0b1000, 0b0111, 0b001>; +def : TLBI<"VAE3", 0b110, 0b1000, 0b0111, 0b001>; +def : TLBI<"ASIDE1", 0b000, 0b1000, 0b0111, 0b010>; +def : TLBI<"VAAE1", 0b000, 0b1000, 0b0111, 0b011>; +def : TLBI<"ALLE1", 0b100, 0b1000, 0b0111, 0b100, 0>; +def : TLBI<"VALE1", 0b000, 0b1000, 0b0111, 0b101>; +def : TLBI<"VALE2", 0b100, 0b1000, 0b0111, 0b101>; +def : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>; +def : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>; +def : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>; + +// Armv8.4-A Outer Sharable TLB Maintenance instructions: +let Requires = [{ {AArch64::HasV8_4aOps} }] in { +// op1 CRn CRm op2 +def : TLBI<"VMALLE1OS", 0b000, 0b1000, 0b0001, 0b000, 0>; +def : TLBI<"VAE1OS", 0b000, 0b1000, 0b0001, 0b001>; +def : TLBI<"ASIDE1OS", 0b000, 0b1000, 0b0001, 0b010>; +def : TLBI<"VAAE1OS", 0b000, 0b1000, 0b0001, 0b011>; +def : TLBI<"VALE1OS", 0b000, 0b1000, 0b0001, 0b101>; +def : TLBI<"VAALE1OS", 0b000, 0b1000, 0b0001, 0b111>; +def : TLBI<"IPAS2E1OS", 0b100, 0b1000, 0b0100, 0b000>; +def : TLBI<"IPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b100>; +def : TLBI<"VAE2OS", 0b100, 0b1000, 0b0001, 0b001>; +def : TLBI<"VALE2OS", 0b100, 0b1000, 0b0001, 0b101>; +def : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>; +def : TLBI<"VAE3OS", 0b110, 0b1000, 0b0001, 0b001>; +def : TLBI<"VALE3OS", 0b110, 0b1000, 0b0001, 0b101>; +def : TLBI<"ALLE2OS", 0b100, 0b1000, 0b0001, 0b000, 0>; +def : TLBI<"ALLE1OS", 0b100, 0b1000, 0b0001, 0b100, 0>; +def : TLBI<"ALLE3OS", 0b110, 0b1000, 0b0001, 0b000, 0>; + +// Armv8.4-A TLB Range Maintenance instructions: +// op1 CRn CRm op2 +def : TLBI<"RVAE1", 0b000, 0b1000, 0b0110, 0b001>; +def : TLBI<"RVAAE1", 0b000, 0b1000, 0b0110, 0b011>; +def : TLBI<"RVALE1", 0b000, 0b1000, 0b0110, 0b101>; +def : TLBI<"RVAALE1", 0b000, 0b1000, 0b0110, 0b111>; +def : TLBI<"RVAE1IS", 0b000, 0b1000, 0b0010, 0b001>; +def : TLBI<"RVAAE1IS", 0b000, 0b1000, 0b0010, 0b011>; +def : TLBI<"RVALE1IS", 0b000, 0b1000, 0b0010, 0b101>; +def : TLBI<"RVAALE1IS", 0b000, 0b1000, 0b0010, 0b111>; +def : TLBI<"RVAE1OS", 0b000, 0b1000, 0b0101, 0b001>; +def : TLBI<"RVAAE1OS", 0b000, 0b1000, 0b0101, 0b011>; +def : TLBI<"RVALE1OS", 0b000, 0b1000, 0b0101, 0b101>; +def : TLBI<"RVAALE1OS", 0b000, 0b1000, 0b0101, 0b111>; +def : TLBI<"RIPAS2E1IS", 0b100, 0b1000, 0b0000, 0b010>; +def : TLBI<"RIPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b110>; +def : TLBI<"RIPAS2E1", 0b100, 0b1000, 0b0100, 0b010>; +def : TLBI<"RIPAS2LE1", 0b100, 0b1000, 0b0100, 0b110>; +def : TLBI<"RIPAS2E1OS", 0b100, 0b1000, 0b0100, 0b011>; +def : TLBI<"RIPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b111>; +def : TLBI<"RVAE2", 0b100, 0b1000, 0b0110, 0b001>; +def : TLBI<"RVALE2", 0b100, 0b1000, 0b0110, 0b101>; +def : TLBI<"RVAE2IS", 0b100, 0b1000, 0b0010, 0b001>; +def : TLBI<"RVALE2IS", 0b100, 0b1000, 0b0010, 0b101>; +def : TLBI<"RVAE2OS", 0b100, 0b1000, 0b0101, 0b001>; +def : TLBI<"RVALE2OS", 0b100, 0b1000, 0b0101, 0b101>; +def : TLBI<"RVAE3", 0b110, 0b1000, 0b0110, 0b001>; +def : TLBI<"RVALE3", 0b110, 0b1000, 0b0110, 0b101>; +def : TLBI<"RVAE3IS", 0b110, 0b1000, 0b0010, 0b001>; +def : TLBI<"RVALE3IS", 0b110, 0b1000, 0b0010, 0b101>; +def : TLBI<"RVAE3OS", 0b110, 0b1000, 0b0101, 0b001>; +def : TLBI<"RVALE3OS", 0b110, 0b1000, 0b0101, 0b101>; +} + +//===----------------------------------------------------------------------===// +// MRS/MSR (system register read/write) instruction options. +//===----------------------------------------------------------------------===// + +class SysReg<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<16> Encoding; + let Encoding{15-14} = op0; + let Encoding{13-11} = op1; + let Encoding{10-7} = crn; + let Encoding{6-3} = crm; + let Encoding{2-0} = op2; + bit Readable = ?; + bit Writeable = ?; + code Requires = [{ {} }]; +} + +class RWSysReg<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2> + : SysReg<name, op0, op1, crn, crm, op2> { + let Readable = 1; + let Writeable = 1; +} + +class ROSysReg<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2> + : SysReg<name, op0, op1, crn, crm, op2> { + let Readable = 1; + let Writeable = 0; +} + +class WOSysReg<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2> + : SysReg<name, op0, op1, crn, crm, op2> { + let Readable = 0; + let Writeable = 1; +} + +//===---------------------- +// Read-only regs +//===---------------------- + +// Op0 Op1 CRn CRm Op2 +def : ROSysReg<"MDCCSR_EL0", 0b10, 0b011, 0b0000, 0b0001, 0b000>; +def : ROSysReg<"DBGDTRRX_EL0", 0b10, 0b011, 0b0000, 0b0101, 0b000>; +def : ROSysReg<"MDRAR_EL1", 0b10, 0b000, 0b0001, 0b0000, 0b000>; +def : ROSysReg<"OSLSR_EL1", 0b10, 0b000, 0b0001, 0b0001, 0b100>; +def : ROSysReg<"DBGAUTHSTATUS_EL1", 0b10, 0b000, 0b0111, 0b1110, 0b110>; +def : ROSysReg<"PMCEID0_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b110>; +def : ROSysReg<"PMCEID1_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b111>; +def : ROSysReg<"MIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b000>; +def : ROSysReg<"CCSIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b000>; +def : ROSysReg<"CCSIDR2_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b010> { + let Requires = [{ {AArch64::HasV8_3aOps} }]; +} +def : ROSysReg<"CLIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b001>; +def : ROSysReg<"CTR_EL0", 0b11, 0b011, 0b0000, 0b0000, 0b001>; +def : ROSysReg<"MPIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b101>; +def : ROSysReg<"REVIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b110>; +def : ROSysReg<"AIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b111>; +def : ROSysReg<"DCZID_EL0", 0b11, 0b011, 0b0000, 0b0000, 0b111>; +def : ROSysReg<"ID_PFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b000>; +def : ROSysReg<"ID_PFR1_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b001>; +def : ROSysReg<"ID_DFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b010>; +def : ROSysReg<"ID_AFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b011>; +def : ROSysReg<"ID_MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b100>; +def : ROSysReg<"ID_MMFR1_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b101>; +def : ROSysReg<"ID_MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b110>; +def : ROSysReg<"ID_MMFR3_EL1", 0b11, 0b000, 0b0000, 0b0001, 0b111>; +def : ROSysReg<"ID_ISAR0_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b000>; +def : ROSysReg<"ID_ISAR1_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b001>; +def : ROSysReg<"ID_ISAR2_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b010>; +def : ROSysReg<"ID_ISAR3_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b011>; +def : ROSysReg<"ID_ISAR4_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b100>; +def : ROSysReg<"ID_ISAR5_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b101>; +def : ROSysReg<"ID_ISAR6_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b111> { + let Requires = [{ {AArch64::HasV8_2aOps} }]; +} +def : ROSysReg<"ID_AA64PFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b000>; +def : ROSysReg<"ID_AA64PFR1_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b001>; +def : ROSysReg<"ID_AA64DFR0_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b000>; +def : ROSysReg<"ID_AA64DFR1_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b001>; +def : ROSysReg<"ID_AA64AFR0_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b100>; +def : ROSysReg<"ID_AA64AFR1_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b101>; +def : ROSysReg<"ID_AA64ISAR0_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b000>; +def : ROSysReg<"ID_AA64ISAR1_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b001>; +def : ROSysReg<"ID_AA64MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b000>; +def : ROSysReg<"ID_AA64MMFR1_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b001>; +def : ROSysReg<"ID_AA64MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b010> { + let Requires = [{ {AArch64::HasV8_2aOps} }]; +} +def : ROSysReg<"MVFR0_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b000>; +def : ROSysReg<"MVFR1_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b001>; +def : ROSysReg<"MVFR2_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b010>; +def : ROSysReg<"RVBAR_EL1", 0b11, 0b000, 0b1100, 0b0000, 0b001>; +def : ROSysReg<"RVBAR_EL2", 0b11, 0b100, 0b1100, 0b0000, 0b001>; +def : ROSysReg<"RVBAR_EL3", 0b11, 0b110, 0b1100, 0b0000, 0b001>; +def : ROSysReg<"ISR_EL1", 0b11, 0b000, 0b1100, 0b0001, 0b000>; +def : ROSysReg<"CNTPCT_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b001>; +def : ROSysReg<"CNTVCT_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b010>; +def : ROSysReg<"ID_MMFR4_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b110>; + +// Trace registers +// Op0 Op1 CRn CRm Op2 +def : ROSysReg<"TRCSTATR", 0b10, 0b001, 0b0000, 0b0011, 0b000>; +def : ROSysReg<"TRCIDR8", 0b10, 0b001, 0b0000, 0b0000, 0b110>; +def : ROSysReg<"TRCIDR9", 0b10, 0b001, 0b0000, 0b0001, 0b110>; +def : ROSysReg<"TRCIDR10", 0b10, 0b001, 0b0000, 0b0010, 0b110>; +def : ROSysReg<"TRCIDR11", 0b10, 0b001, 0b0000, 0b0011, 0b110>; +def : ROSysReg<"TRCIDR12", 0b10, 0b001, 0b0000, 0b0100, 0b110>; +def : ROSysReg<"TRCIDR13", 0b10, 0b001, 0b0000, 0b0101, 0b110>; +def : ROSysReg<"TRCIDR0", 0b10, 0b001, 0b0000, 0b1000, 0b111>; +def : ROSysReg<"TRCIDR1", 0b10, 0b001, 0b0000, 0b1001, 0b111>; +def : ROSysReg<"TRCIDR2", 0b10, 0b001, 0b0000, 0b1010, 0b111>; +def : ROSysReg<"TRCIDR3", 0b10, 0b001, 0b0000, 0b1011, 0b111>; +def : ROSysReg<"TRCIDR4", 0b10, 0b001, 0b0000, 0b1100, 0b111>; +def : ROSysReg<"TRCIDR5", 0b10, 0b001, 0b0000, 0b1101, 0b111>; +def : ROSysReg<"TRCIDR6", 0b10, 0b001, 0b0000, 0b1110, 0b111>; +def : ROSysReg<"TRCIDR7", 0b10, 0b001, 0b0000, 0b1111, 0b111>; +def : ROSysReg<"TRCOSLSR", 0b10, 0b001, 0b0001, 0b0001, 0b100>; +def : ROSysReg<"TRCPDSR", 0b10, 0b001, 0b0001, 0b0101, 0b100>; +def : ROSysReg<"TRCDEVAFF0", 0b10, 0b001, 0b0111, 0b1010, 0b110>; +def : ROSysReg<"TRCDEVAFF1", 0b10, 0b001, 0b0111, 0b1011, 0b110>; +def : ROSysReg<"TRCLSR", 0b10, 0b001, 0b0111, 0b1101, 0b110>; +def : ROSysReg<"TRCAUTHSTATUS", 0b10, 0b001, 0b0111, 0b1110, 0b110>; +def : ROSysReg<"TRCDEVARCH", 0b10, 0b001, 0b0111, 0b1111, 0b110>; +def : ROSysReg<"TRCDEVID", 0b10, 0b001, 0b0111, 0b0010, 0b111>; +def : ROSysReg<"TRCDEVTYPE", 0b10, 0b001, 0b0111, 0b0011, 0b111>; +def : ROSysReg<"TRCPIDR4", 0b10, 0b001, 0b0111, 0b0100, 0b111>; +def : ROSysReg<"TRCPIDR5", 0b10, 0b001, 0b0111, 0b0101, 0b111>; +def : ROSysReg<"TRCPIDR6", 0b10, 0b001, 0b0111, 0b0110, 0b111>; +def : ROSysReg<"TRCPIDR7", 0b10, 0b001, 0b0111, 0b0111, 0b111>; +def : ROSysReg<"TRCPIDR0", 0b10, 0b001, 0b0111, 0b1000, 0b111>; +def : ROSysReg<"TRCPIDR1", 0b10, 0b001, 0b0111, 0b1001, 0b111>; +def : ROSysReg<"TRCPIDR2", 0b10, 0b001, 0b0111, 0b1010, 0b111>; +def : ROSysReg<"TRCPIDR3", 0b10, 0b001, 0b0111, 0b1011, 0b111>; +def : ROSysReg<"TRCCIDR0", 0b10, 0b001, 0b0111, 0b1100, 0b111>; +def : ROSysReg<"TRCCIDR1", 0b10, 0b001, 0b0111, 0b1101, 0b111>; +def : ROSysReg<"TRCCIDR2", 0b10, 0b001, 0b0111, 0b1110, 0b111>; +def : ROSysReg<"TRCCIDR3", 0b10, 0b001, 0b0111, 0b1111, 0b111>; + +// GICv3 registers +// Op0 Op1 CRn CRm Op2 +def : ROSysReg<"ICC_IAR1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b000>; +def : ROSysReg<"ICC_IAR0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b000>; +def : ROSysReg<"ICC_HPPIR1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b010>; +def : ROSysReg<"ICC_HPPIR0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b010>; +def : ROSysReg<"ICC_RPR_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b011>; +def : ROSysReg<"ICH_VTR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b001>; +def : ROSysReg<"ICH_EISR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b011>; +def : ROSysReg<"ICH_ELRSR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b101>; + +// SVE control registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::FeatureSVE} }] in { +def : ROSysReg<"ID_AA64ZFR0_EL1", 0b11, 0b000, 0b0000, 0b0100, 0b100>; +} + +// v8.1a "Limited Ordering Regions" extension-specific system register +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::HasV8_1aOps} }] in +def : ROSysReg<"LORID_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b111>; + +// v8.2a "RAS extension" registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::FeatureRAS} }] in { +def : ROSysReg<"ERRIDR_EL1", 0b11, 0b000, 0b0101, 0b0011, 0b000>; +def : ROSysReg<"ERXFR_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b000>; +} + +//===---------------------- +// Write-only regs +//===---------------------- + +// Op0 Op1 CRn CRm Op2 +def : WOSysReg<"DBGDTRTX_EL0", 0b10, 0b011, 0b0000, 0b0101, 0b000>; +def : WOSysReg<"OSLAR_EL1", 0b10, 0b000, 0b0001, 0b0000, 0b100>; +def : WOSysReg<"PMSWINC_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b100>; + +// Trace Registers +// Op0 Op1 CRn CRm Op2 +def : WOSysReg<"TRCOSLAR", 0b10, 0b001, 0b0001, 0b0000, 0b100>; +def : WOSysReg<"TRCLAR", 0b10, 0b001, 0b0111, 0b1100, 0b110>; + +// GICv3 registers +// Op0 Op1 CRn CRm Op2 +def : WOSysReg<"ICC_EOIR1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b001>; +def : WOSysReg<"ICC_EOIR0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b001>; +def : WOSysReg<"ICC_DIR_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b001>; +def : WOSysReg<"ICC_SGI1R_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b101>; +def : WOSysReg<"ICC_ASGI1R_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b110>; +def : WOSysReg<"ICC_SGI0R_EL1", 0b11, 0b000, 0b1100, 0b1011, 0b111>; + +//===---------------------- +// Read-write regs +//===---------------------- + +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"OSDTRRX_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b010>; +def : RWSysReg<"OSDTRTX_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b010>; +def : RWSysReg<"TEECR32_EL1", 0b10, 0b010, 0b0000, 0b0000, 0b000>; +def : RWSysReg<"MDCCINT_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b000>; +def : RWSysReg<"MDSCR_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b010>; +def : RWSysReg<"DBGDTR_EL0", 0b10, 0b011, 0b0000, 0b0100, 0b000>; +def : RWSysReg<"OSECCR_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b010>; +def : RWSysReg<"DBGVCR32_EL2", 0b10, 0b100, 0b0000, 0b0111, 0b000>; +def : RWSysReg<"DBGBVR0_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b100>; +def : RWSysReg<"DBGBVR1_EL1", 0b10, 0b000, 0b0000, 0b0001, 0b100>; +def : RWSysReg<"DBGBVR2_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b100>; +def : RWSysReg<"DBGBVR3_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b100>; +def : RWSysReg<"DBGBVR4_EL1", 0b10, 0b000, 0b0000, 0b0100, 0b100>; +def : RWSysReg<"DBGBVR5_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b100>; +def : RWSysReg<"DBGBVR6_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b100>; +def : RWSysReg<"DBGBVR7_EL1", 0b10, 0b000, 0b0000, 0b0111, 0b100>; +def : RWSysReg<"DBGBVR8_EL1", 0b10, 0b000, 0b0000, 0b1000, 0b100>; +def : RWSysReg<"DBGBVR9_EL1", 0b10, 0b000, 0b0000, 0b1001, 0b100>; +def : RWSysReg<"DBGBVR10_EL1", 0b10, 0b000, 0b0000, 0b1010, 0b100>; +def : RWSysReg<"DBGBVR11_EL1", 0b10, 0b000, 0b0000, 0b1011, 0b100>; +def : RWSysReg<"DBGBVR12_EL1", 0b10, 0b000, 0b0000, 0b1100, 0b100>; +def : RWSysReg<"DBGBVR13_EL1", 0b10, 0b000, 0b0000, 0b1101, 0b100>; +def : RWSysReg<"DBGBVR14_EL1", 0b10, 0b000, 0b0000, 0b1110, 0b100>; +def : RWSysReg<"DBGBVR15_EL1", 0b10, 0b000, 0b0000, 0b1111, 0b100>; +def : RWSysReg<"DBGBCR0_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b101>; +def : RWSysReg<"DBGBCR1_EL1", 0b10, 0b000, 0b0000, 0b0001, 0b101>; +def : RWSysReg<"DBGBCR2_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b101>; +def : RWSysReg<"DBGBCR3_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b101>; +def : RWSysReg<"DBGBCR4_EL1", 0b10, 0b000, 0b0000, 0b0100, 0b101>; +def : RWSysReg<"DBGBCR5_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b101>; +def : RWSysReg<"DBGBCR6_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b101>; +def : RWSysReg<"DBGBCR7_EL1", 0b10, 0b000, 0b0000, 0b0111, 0b101>; +def : RWSysReg<"DBGBCR8_EL1", 0b10, 0b000, 0b0000, 0b1000, 0b101>; +def : RWSysReg<"DBGBCR9_EL1", 0b10, 0b000, 0b0000, 0b1001, 0b101>; +def : RWSysReg<"DBGBCR10_EL1", 0b10, 0b000, 0b0000, 0b1010, 0b101>; +def : RWSysReg<"DBGBCR11_EL1", 0b10, 0b000, 0b0000, 0b1011, 0b101>; +def : RWSysReg<"DBGBCR12_EL1", 0b10, 0b000, 0b0000, 0b1100, 0b101>; +def : RWSysReg<"DBGBCR13_EL1", 0b10, 0b000, 0b0000, 0b1101, 0b101>; +def : RWSysReg<"DBGBCR14_EL1", 0b10, 0b000, 0b0000, 0b1110, 0b101>; +def : RWSysReg<"DBGBCR15_EL1", 0b10, 0b000, 0b0000, 0b1111, 0b101>; +def : RWSysReg<"DBGWVR0_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b110>; +def : RWSysReg<"DBGWVR1_EL1", 0b10, 0b000, 0b0000, 0b0001, 0b110>; +def : RWSysReg<"DBGWVR2_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b110>; +def : RWSysReg<"DBGWVR3_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b110>; +def : RWSysReg<"DBGWVR4_EL1", 0b10, 0b000, 0b0000, 0b0100, 0b110>; +def : RWSysReg<"DBGWVR5_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b110>; +def : RWSysReg<"DBGWVR6_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b110>; +def : RWSysReg<"DBGWVR7_EL1", 0b10, 0b000, 0b0000, 0b0111, 0b110>; +def : RWSysReg<"DBGWVR8_EL1", 0b10, 0b000, 0b0000, 0b1000, 0b110>; +def : RWSysReg<"DBGWVR9_EL1", 0b10, 0b000, 0b0000, 0b1001, 0b110>; +def : RWSysReg<"DBGWVR10_EL1", 0b10, 0b000, 0b0000, 0b1010, 0b110>; +def : RWSysReg<"DBGWVR11_EL1", 0b10, 0b000, 0b0000, 0b1011, 0b110>; +def : RWSysReg<"DBGWVR12_EL1", 0b10, 0b000, 0b0000, 0b1100, 0b110>; +def : RWSysReg<"DBGWVR13_EL1", 0b10, 0b000, 0b0000, 0b1101, 0b110>; +def : RWSysReg<"DBGWVR14_EL1", 0b10, 0b000, 0b0000, 0b1110, 0b110>; +def : RWSysReg<"DBGWVR15_EL1", 0b10, 0b000, 0b0000, 0b1111, 0b110>; +def : RWSysReg<"DBGWCR0_EL1", 0b10, 0b000, 0b0000, 0b0000, 0b111>; +def : RWSysReg<"DBGWCR1_EL1", 0b10, 0b000, 0b0000, 0b0001, 0b111>; +def : RWSysReg<"DBGWCR2_EL1", 0b10, 0b000, 0b0000, 0b0010, 0b111>; +def : RWSysReg<"DBGWCR3_EL1", 0b10, 0b000, 0b0000, 0b0011, 0b111>; +def : RWSysReg<"DBGWCR4_EL1", 0b10, 0b000, 0b0000, 0b0100, 0b111>; +def : RWSysReg<"DBGWCR5_EL1", 0b10, 0b000, 0b0000, 0b0101, 0b111>; +def : RWSysReg<"DBGWCR6_EL1", 0b10, 0b000, 0b0000, 0b0110, 0b111>; +def : RWSysReg<"DBGWCR7_EL1", 0b10, 0b000, 0b0000, 0b0111, 0b111>; +def : RWSysReg<"DBGWCR8_EL1", 0b10, 0b000, 0b0000, 0b1000, 0b111>; +def : RWSysReg<"DBGWCR9_EL1", 0b10, 0b000, 0b0000, 0b1001, 0b111>; +def : RWSysReg<"DBGWCR10_EL1", 0b10, 0b000, 0b0000, 0b1010, 0b111>; +def : RWSysReg<"DBGWCR11_EL1", 0b10, 0b000, 0b0000, 0b1011, 0b111>; +def : RWSysReg<"DBGWCR12_EL1", 0b10, 0b000, 0b0000, 0b1100, 0b111>; +def : RWSysReg<"DBGWCR13_EL1", 0b10, 0b000, 0b0000, 0b1101, 0b111>; +def : RWSysReg<"DBGWCR14_EL1", 0b10, 0b000, 0b0000, 0b1110, 0b111>; +def : RWSysReg<"DBGWCR15_EL1", 0b10, 0b000, 0b0000, 0b1111, 0b111>; +def : RWSysReg<"TEEHBR32_EL1", 0b10, 0b010, 0b0001, 0b0000, 0b000>; +def : RWSysReg<"OSDLR_EL1", 0b10, 0b000, 0b0001, 0b0011, 0b100>; +def : RWSysReg<"DBGPRCR_EL1", 0b10, 0b000, 0b0001, 0b0100, 0b100>; +def : RWSysReg<"DBGCLAIMSET_EL1", 0b10, 0b000, 0b0111, 0b1000, 0b110>; +def : RWSysReg<"DBGCLAIMCLR_EL1", 0b10, 0b000, 0b0111, 0b1001, 0b110>; +def : RWSysReg<"CSSELR_EL1", 0b11, 0b010, 0b0000, 0b0000, 0b000>; +def : RWSysReg<"VPIDR_EL2", 0b11, 0b100, 0b0000, 0b0000, 0b000>; +def : RWSysReg<"VMPIDR_EL2", 0b11, 0b100, 0b0000, 0b0000, 0b101>; +def : RWSysReg<"CPACR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b010>; +def : RWSysReg<"SCTLR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b000>; +def : RWSysReg<"SCTLR_EL2", 0b11, 0b100, 0b0001, 0b0000, 0b000>; +def : RWSysReg<"SCTLR_EL3", 0b11, 0b110, 0b0001, 0b0000, 0b000>; +def : RWSysReg<"ACTLR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b001>; +def : RWSysReg<"ACTLR_EL2", 0b11, 0b100, 0b0001, 0b0000, 0b001>; +def : RWSysReg<"ACTLR_EL3", 0b11, 0b110, 0b0001, 0b0000, 0b001>; +def : RWSysReg<"HCR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b000>; +def : RWSysReg<"SCR_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b000>; +def : RWSysReg<"MDCR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b001>; +def : RWSysReg<"SDER32_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b001>; +def : RWSysReg<"CPTR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b010>; +def : RWSysReg<"CPTR_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b010>; +def : RWSysReg<"HSTR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b011>; +def : RWSysReg<"HACR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b111>; +def : RWSysReg<"MDCR_EL3", 0b11, 0b110, 0b0001, 0b0011, 0b001>; +def : RWSysReg<"TTBR0_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b000>; +def : RWSysReg<"TTBR0_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000>; +def : RWSysReg<"TTBR0_EL3", 0b11, 0b110, 0b0010, 0b0000, 0b000>; +def : RWSysReg<"TTBR1_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b001>; +def : RWSysReg<"TCR_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b010>; +def : RWSysReg<"TCR_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b010>; +def : RWSysReg<"TCR_EL3", 0b11, 0b110, 0b0010, 0b0000, 0b010>; +def : RWSysReg<"VTTBR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b000>; +def : RWSysReg<"VTCR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b010>; +def : RWSysReg<"DACR32_EL2", 0b11, 0b100, 0b0011, 0b0000, 0b000>; +def : RWSysReg<"SPSR_EL1", 0b11, 0b000, 0b0100, 0b0000, 0b000>; +def : RWSysReg<"SPSR_EL2", 0b11, 0b100, 0b0100, 0b0000, 0b000>; +def : RWSysReg<"SPSR_EL3", 0b11, 0b110, 0b0100, 0b0000, 0b000>; +def : RWSysReg<"ELR_EL1", 0b11, 0b000, 0b0100, 0b0000, 0b001>; +def : RWSysReg<"ELR_EL2", 0b11, 0b100, 0b0100, 0b0000, 0b001>; +def : RWSysReg<"ELR_EL3", 0b11, 0b110, 0b0100, 0b0000, 0b001>; +def : RWSysReg<"SP_EL0", 0b11, 0b000, 0b0100, 0b0001, 0b000>; +def : RWSysReg<"SP_EL1", 0b11, 0b100, 0b0100, 0b0001, 0b000>; +def : RWSysReg<"SP_EL2", 0b11, 0b110, 0b0100, 0b0001, 0b000>; +def : RWSysReg<"SPSel", 0b11, 0b000, 0b0100, 0b0010, 0b000>; +def : RWSysReg<"NZCV", 0b11, 0b011, 0b0100, 0b0010, 0b000>; +def : RWSysReg<"DAIF", 0b11, 0b011, 0b0100, 0b0010, 0b001>; +def : RWSysReg<"CurrentEL", 0b11, 0b000, 0b0100, 0b0010, 0b010>; +def : RWSysReg<"SPSR_irq", 0b11, 0b100, 0b0100, 0b0011, 0b000>; +def : RWSysReg<"SPSR_abt", 0b11, 0b100, 0b0100, 0b0011, 0b001>; +def : RWSysReg<"SPSR_und", 0b11, 0b100, 0b0100, 0b0011, 0b010>; +def : RWSysReg<"SPSR_fiq", 0b11, 0b100, 0b0100, 0b0011, 0b011>; +def : RWSysReg<"FPCR", 0b11, 0b011, 0b0100, 0b0100, 0b000>; +def : RWSysReg<"FPSR", 0b11, 0b011, 0b0100, 0b0100, 0b001>; +def : RWSysReg<"DSPSR_EL0", 0b11, 0b011, 0b0100, 0b0101, 0b000>; +def : RWSysReg<"DLR_EL0", 0b11, 0b011, 0b0100, 0b0101, 0b001>; +def : RWSysReg<"IFSR32_EL2", 0b11, 0b100, 0b0101, 0b0000, 0b001>; +def : RWSysReg<"AFSR0_EL1", 0b11, 0b000, 0b0101, 0b0001, 0b000>; +def : RWSysReg<"AFSR0_EL2", 0b11, 0b100, 0b0101, 0b0001, 0b000>; +def : RWSysReg<"AFSR0_EL3", 0b11, 0b110, 0b0101, 0b0001, 0b000>; +def : RWSysReg<"AFSR1_EL1", 0b11, 0b000, 0b0101, 0b0001, 0b001>; +def : RWSysReg<"AFSR1_EL2", 0b11, 0b100, 0b0101, 0b0001, 0b001>; +def : RWSysReg<"AFSR1_EL3", 0b11, 0b110, 0b0101, 0b0001, 0b001>; +def : RWSysReg<"ESR_EL1", 0b11, 0b000, 0b0101, 0b0010, 0b000>; +def : RWSysReg<"ESR_EL2", 0b11, 0b100, 0b0101, 0b0010, 0b000>; +def : RWSysReg<"ESR_EL3", 0b11, 0b110, 0b0101, 0b0010, 0b000>; +def : RWSysReg<"FPEXC32_EL2", 0b11, 0b100, 0b0101, 0b0011, 0b000>; +def : RWSysReg<"FAR_EL1", 0b11, 0b000, 0b0110, 0b0000, 0b000>; +def : RWSysReg<"FAR_EL2", 0b11, 0b100, 0b0110, 0b0000, 0b000>; +def : RWSysReg<"FAR_EL3", 0b11, 0b110, 0b0110, 0b0000, 0b000>; +def : RWSysReg<"HPFAR_EL2", 0b11, 0b100, 0b0110, 0b0000, 0b100>; +def : RWSysReg<"PAR_EL1", 0b11, 0b000, 0b0111, 0b0100, 0b000>; +def : RWSysReg<"PMCR_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b000>; +def : RWSysReg<"PMCNTENSET_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b001>; +def : RWSysReg<"PMCNTENCLR_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b010>; +def : RWSysReg<"PMOVSCLR_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b011>; +def : RWSysReg<"PMSELR_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b101>; +def : RWSysReg<"PMCCNTR_EL0", 0b11, 0b011, 0b1001, 0b1101, 0b000>; +def : RWSysReg<"PMXEVTYPER_EL0", 0b11, 0b011, 0b1001, 0b1101, 0b001>; +def : RWSysReg<"PMXEVCNTR_EL0", 0b11, 0b011, 0b1001, 0b1101, 0b010>; +def : RWSysReg<"PMUSERENR_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b000>; +def : RWSysReg<"PMINTENSET_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b001>; +def : RWSysReg<"PMINTENCLR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b010>; +def : RWSysReg<"PMOVSSET_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b011>; +def : RWSysReg<"MAIR_EL1", 0b11, 0b000, 0b1010, 0b0010, 0b000>; +def : RWSysReg<"MAIR_EL2", 0b11, 0b100, 0b1010, 0b0010, 0b000>; +def : RWSysReg<"MAIR_EL3", 0b11, 0b110, 0b1010, 0b0010, 0b000>; +def : RWSysReg<"AMAIR_EL1", 0b11, 0b000, 0b1010, 0b0011, 0b000>; +def : RWSysReg<"AMAIR_EL2", 0b11, 0b100, 0b1010, 0b0011, 0b000>; +def : RWSysReg<"AMAIR_EL3", 0b11, 0b110, 0b1010, 0b0011, 0b000>; +def : RWSysReg<"VBAR_EL1", 0b11, 0b000, 0b1100, 0b0000, 0b000>; +def : RWSysReg<"VBAR_EL2", 0b11, 0b100, 0b1100, 0b0000, 0b000>; +def : RWSysReg<"VBAR_EL3", 0b11, 0b110, 0b1100, 0b0000, 0b000>; +def : RWSysReg<"RMR_EL1", 0b11, 0b000, 0b1100, 0b0000, 0b010>; +def : RWSysReg<"RMR_EL2", 0b11, 0b100, 0b1100, 0b0000, 0b010>; +def : RWSysReg<"RMR_EL3", 0b11, 0b110, 0b1100, 0b0000, 0b010>; +def : RWSysReg<"CONTEXTIDR_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b001>; +def : RWSysReg<"TPIDR_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b010>; +def : RWSysReg<"TPIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b010>; +def : RWSysReg<"TPIDR_EL3", 0b11, 0b110, 0b1101, 0b0000, 0b010>; +def : RWSysReg<"TPIDRRO_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b011>; +def : RWSysReg<"TPIDR_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b100>; +def : RWSysReg<"CNTFRQ_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b000>; +def : RWSysReg<"CNTVOFF_EL2", 0b11, 0b100, 0b1110, 0b0000, 0b011>; +def : RWSysReg<"CNTKCTL_EL1", 0b11, 0b000, 0b1110, 0b0001, 0b000>; +def : RWSysReg<"CNTHCTL_EL2", 0b11, 0b100, 0b1110, 0b0001, 0b000>; +def : RWSysReg<"CNTP_TVAL_EL0", 0b11, 0b011, 0b1110, 0b0010, 0b000>; +def : RWSysReg<"CNTHP_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0010, 0b000>; +def : RWSysReg<"CNTPS_TVAL_EL1", 0b11, 0b111, 0b1110, 0b0010, 0b000>; +def : RWSysReg<"CNTP_CTL_EL0", 0b11, 0b011, 0b1110, 0b0010, 0b001>; +def : RWSysReg<"CNTHP_CTL_EL2", 0b11, 0b100, 0b1110, 0b0010, 0b001>; +def : RWSysReg<"CNTPS_CTL_EL1", 0b11, 0b111, 0b1110, 0b0010, 0b001>; +def : RWSysReg<"CNTP_CVAL_EL0", 0b11, 0b011, 0b1110, 0b0010, 0b010>; +def : RWSysReg<"CNTHP_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0010, 0b010>; +def : RWSysReg<"CNTPS_CVAL_EL1", 0b11, 0b111, 0b1110, 0b0010, 0b010>; +def : RWSysReg<"CNTV_TVAL_EL0", 0b11, 0b011, 0b1110, 0b0011, 0b000>; +def : RWSysReg<"CNTV_CTL_EL0", 0b11, 0b011, 0b1110, 0b0011, 0b001>; +def : RWSysReg<"CNTV_CVAL_EL0", 0b11, 0b011, 0b1110, 0b0011, 0b010>; +def : RWSysReg<"PMEVCNTR0_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b000>; +def : RWSysReg<"PMEVCNTR1_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b001>; +def : RWSysReg<"PMEVCNTR2_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b010>; +def : RWSysReg<"PMEVCNTR3_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b011>; +def : RWSysReg<"PMEVCNTR4_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b100>; +def : RWSysReg<"PMEVCNTR5_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b101>; +def : RWSysReg<"PMEVCNTR6_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b110>; +def : RWSysReg<"PMEVCNTR7_EL0", 0b11, 0b011, 0b1110, 0b1000, 0b111>; +def : RWSysReg<"PMEVCNTR8_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b000>; +def : RWSysReg<"PMEVCNTR9_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b001>; +def : RWSysReg<"PMEVCNTR10_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b010>; +def : RWSysReg<"PMEVCNTR11_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b011>; +def : RWSysReg<"PMEVCNTR12_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b100>; +def : RWSysReg<"PMEVCNTR13_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b101>; +def : RWSysReg<"PMEVCNTR14_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b110>; +def : RWSysReg<"PMEVCNTR15_EL0", 0b11, 0b011, 0b1110, 0b1001, 0b111>; +def : RWSysReg<"PMEVCNTR16_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b000>; +def : RWSysReg<"PMEVCNTR17_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b001>; +def : RWSysReg<"PMEVCNTR18_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b010>; +def : RWSysReg<"PMEVCNTR19_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b011>; +def : RWSysReg<"PMEVCNTR20_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b100>; +def : RWSysReg<"PMEVCNTR21_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b101>; +def : RWSysReg<"PMEVCNTR22_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b110>; +def : RWSysReg<"PMEVCNTR23_EL0", 0b11, 0b011, 0b1110, 0b1010, 0b111>; +def : RWSysReg<"PMEVCNTR24_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b000>; +def : RWSysReg<"PMEVCNTR25_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b001>; +def : RWSysReg<"PMEVCNTR26_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b010>; +def : RWSysReg<"PMEVCNTR27_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b011>; +def : RWSysReg<"PMEVCNTR28_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b100>; +def : RWSysReg<"PMEVCNTR29_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b101>; +def : RWSysReg<"PMEVCNTR30_EL0", 0b11, 0b011, 0b1110, 0b1011, 0b110>; +def : RWSysReg<"PMCCFILTR_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b111>; +def : RWSysReg<"PMEVTYPER0_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b000>; +def : RWSysReg<"PMEVTYPER1_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b001>; +def : RWSysReg<"PMEVTYPER2_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b010>; +def : RWSysReg<"PMEVTYPER3_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b011>; +def : RWSysReg<"PMEVTYPER4_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b100>; +def : RWSysReg<"PMEVTYPER5_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b101>; +def : RWSysReg<"PMEVTYPER6_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b110>; +def : RWSysReg<"PMEVTYPER7_EL0", 0b11, 0b011, 0b1110, 0b1100, 0b111>; +def : RWSysReg<"PMEVTYPER8_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b000>; +def : RWSysReg<"PMEVTYPER9_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b001>; +def : RWSysReg<"PMEVTYPER10_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b010>; +def : RWSysReg<"PMEVTYPER11_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b011>; +def : RWSysReg<"PMEVTYPER12_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b100>; +def : RWSysReg<"PMEVTYPER13_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b101>; +def : RWSysReg<"PMEVTYPER14_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b110>; +def : RWSysReg<"PMEVTYPER15_EL0", 0b11, 0b011, 0b1110, 0b1101, 0b111>; +def : RWSysReg<"PMEVTYPER16_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b000>; +def : RWSysReg<"PMEVTYPER17_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b001>; +def : RWSysReg<"PMEVTYPER18_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b010>; +def : RWSysReg<"PMEVTYPER19_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b011>; +def : RWSysReg<"PMEVTYPER20_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b100>; +def : RWSysReg<"PMEVTYPER21_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b101>; +def : RWSysReg<"PMEVTYPER22_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b110>; +def : RWSysReg<"PMEVTYPER23_EL0", 0b11, 0b011, 0b1110, 0b1110, 0b111>; +def : RWSysReg<"PMEVTYPER24_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b000>; +def : RWSysReg<"PMEVTYPER25_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b001>; +def : RWSysReg<"PMEVTYPER26_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b010>; +def : RWSysReg<"PMEVTYPER27_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b011>; +def : RWSysReg<"PMEVTYPER28_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b100>; +def : RWSysReg<"PMEVTYPER29_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b101>; +def : RWSysReg<"PMEVTYPER30_EL0", 0b11, 0b011, 0b1110, 0b1111, 0b110>; + +// Trace registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"TRCPRGCTLR", 0b10, 0b001, 0b0000, 0b0001, 0b000>; +def : RWSysReg<"TRCPROCSELR", 0b10, 0b001, 0b0000, 0b0010, 0b000>; +def : RWSysReg<"TRCCONFIGR", 0b10, 0b001, 0b0000, 0b0100, 0b000>; +def : RWSysReg<"TRCAUXCTLR", 0b10, 0b001, 0b0000, 0b0110, 0b000>; +def : RWSysReg<"TRCEVENTCTL0R", 0b10, 0b001, 0b0000, 0b1000, 0b000>; +def : RWSysReg<"TRCEVENTCTL1R", 0b10, 0b001, 0b0000, 0b1001, 0b000>; +def : RWSysReg<"TRCSTALLCTLR", 0b10, 0b001, 0b0000, 0b1011, 0b000>; +def : RWSysReg<"TRCTSCTLR", 0b10, 0b001, 0b0000, 0b1100, 0b000>; +def : RWSysReg<"TRCSYNCPR", 0b10, 0b001, 0b0000, 0b1101, 0b000>; +def : RWSysReg<"TRCCCCTLR", 0b10, 0b001, 0b0000, 0b1110, 0b000>; +def : RWSysReg<"TRCBBCTLR", 0b10, 0b001, 0b0000, 0b1111, 0b000>; +def : RWSysReg<"TRCTRACEIDR", 0b10, 0b001, 0b0000, 0b0000, 0b001>; +def : RWSysReg<"TRCQCTLR", 0b10, 0b001, 0b0000, 0b0001, 0b001>; +def : RWSysReg<"TRCVICTLR", 0b10, 0b001, 0b0000, 0b0000, 0b010>; +def : RWSysReg<"TRCVIIECTLR", 0b10, 0b001, 0b0000, 0b0001, 0b010>; +def : RWSysReg<"TRCVISSCTLR", 0b10, 0b001, 0b0000, 0b0010, 0b010>; +def : RWSysReg<"TRCVIPCSSCTLR", 0b10, 0b001, 0b0000, 0b0011, 0b010>; +def : RWSysReg<"TRCVDCTLR", 0b10, 0b001, 0b0000, 0b1000, 0b010>; +def : RWSysReg<"TRCVDSACCTLR", 0b10, 0b001, 0b0000, 0b1001, 0b010>; +def : RWSysReg<"TRCVDARCCTLR", 0b10, 0b001, 0b0000, 0b1010, 0b010>; +def : RWSysReg<"TRCSEQEVR0", 0b10, 0b001, 0b0000, 0b0000, 0b100>; +def : RWSysReg<"TRCSEQEVR1", 0b10, 0b001, 0b0000, 0b0001, 0b100>; +def : RWSysReg<"TRCSEQEVR2", 0b10, 0b001, 0b0000, 0b0010, 0b100>; +def : RWSysReg<"TRCSEQRSTEVR", 0b10, 0b001, 0b0000, 0b0110, 0b100>; +def : RWSysReg<"TRCSEQSTR", 0b10, 0b001, 0b0000, 0b0111, 0b100>; +def : RWSysReg<"TRCEXTINSELR", 0b10, 0b001, 0b0000, 0b1000, 0b100>; +def : RWSysReg<"TRCCNTRLDVR0", 0b10, 0b001, 0b0000, 0b0000, 0b101>; +def : RWSysReg<"TRCCNTRLDVR1", 0b10, 0b001, 0b0000, 0b0001, 0b101>; +def : RWSysReg<"TRCCNTRLDVR2", 0b10, 0b001, 0b0000, 0b0010, 0b101>; +def : RWSysReg<"TRCCNTRLDVR3", 0b10, 0b001, 0b0000, 0b0011, 0b101>; +def : RWSysReg<"TRCCNTCTLR0", 0b10, 0b001, 0b0000, 0b0100, 0b101>; +def : RWSysReg<"TRCCNTCTLR1", 0b10, 0b001, 0b0000, 0b0101, 0b101>; +def : RWSysReg<"TRCCNTCTLR2", 0b10, 0b001, 0b0000, 0b0110, 0b101>; +def : RWSysReg<"TRCCNTCTLR3", 0b10, 0b001, 0b0000, 0b0111, 0b101>; +def : RWSysReg<"TRCCNTVR0", 0b10, 0b001, 0b0000, 0b1000, 0b101>; +def : RWSysReg<"TRCCNTVR1", 0b10, 0b001, 0b0000, 0b1001, 0b101>; +def : RWSysReg<"TRCCNTVR2", 0b10, 0b001, 0b0000, 0b1010, 0b101>; +def : RWSysReg<"TRCCNTVR3", 0b10, 0b001, 0b0000, 0b1011, 0b101>; +def : RWSysReg<"TRCIMSPEC0", 0b10, 0b001, 0b0000, 0b0000, 0b111>; +def : RWSysReg<"TRCIMSPEC1", 0b10, 0b001, 0b0000, 0b0001, 0b111>; +def : RWSysReg<"TRCIMSPEC2", 0b10, 0b001, 0b0000, 0b0010, 0b111>; +def : RWSysReg<"TRCIMSPEC3", 0b10, 0b001, 0b0000, 0b0011, 0b111>; +def : RWSysReg<"TRCIMSPEC4", 0b10, 0b001, 0b0000, 0b0100, 0b111>; +def : RWSysReg<"TRCIMSPEC5", 0b10, 0b001, 0b0000, 0b0101, 0b111>; +def : RWSysReg<"TRCIMSPEC6", 0b10, 0b001, 0b0000, 0b0110, 0b111>; +def : RWSysReg<"TRCIMSPEC7", 0b10, 0b001, 0b0000, 0b0111, 0b111>; +def : RWSysReg<"TRCRSCTLR2", 0b10, 0b001, 0b0001, 0b0010, 0b000>; +def : RWSysReg<"TRCRSCTLR3", 0b10, 0b001, 0b0001, 0b0011, 0b000>; +def : RWSysReg<"TRCRSCTLR4", 0b10, 0b001, 0b0001, 0b0100, 0b000>; +def : RWSysReg<"TRCRSCTLR5", 0b10, 0b001, 0b0001, 0b0101, 0b000>; +def : RWSysReg<"TRCRSCTLR6", 0b10, 0b001, 0b0001, 0b0110, 0b000>; +def : RWSysReg<"TRCRSCTLR7", 0b10, 0b001, 0b0001, 0b0111, 0b000>; +def : RWSysReg<"TRCRSCTLR8", 0b10, 0b001, 0b0001, 0b1000, 0b000>; +def : RWSysReg<"TRCRSCTLR9", 0b10, 0b001, 0b0001, 0b1001, 0b000>; +def : RWSysReg<"TRCRSCTLR10", 0b10, 0b001, 0b0001, 0b1010, 0b000>; +def : RWSysReg<"TRCRSCTLR11", 0b10, 0b001, 0b0001, 0b1011, 0b000>; +def : RWSysReg<"TRCRSCTLR12", 0b10, 0b001, 0b0001, 0b1100, 0b000>; +def : RWSysReg<"TRCRSCTLR13", 0b10, 0b001, 0b0001, 0b1101, 0b000>; +def : RWSysReg<"TRCRSCTLR14", 0b10, 0b001, 0b0001, 0b1110, 0b000>; +def : RWSysReg<"TRCRSCTLR15", 0b10, 0b001, 0b0001, 0b1111, 0b000>; +def : RWSysReg<"TRCRSCTLR16", 0b10, 0b001, 0b0001, 0b0000, 0b001>; +def : RWSysReg<"TRCRSCTLR17", 0b10, 0b001, 0b0001, 0b0001, 0b001>; +def : RWSysReg<"TRCRSCTLR18", 0b10, 0b001, 0b0001, 0b0010, 0b001>; +def : RWSysReg<"TRCRSCTLR19", 0b10, 0b001, 0b0001, 0b0011, 0b001>; +def : RWSysReg<"TRCRSCTLR20", 0b10, 0b001, 0b0001, 0b0100, 0b001>; +def : RWSysReg<"TRCRSCTLR21", 0b10, 0b001, 0b0001, 0b0101, 0b001>; +def : RWSysReg<"TRCRSCTLR22", 0b10, 0b001, 0b0001, 0b0110, 0b001>; +def : RWSysReg<"TRCRSCTLR23", 0b10, 0b001, 0b0001, 0b0111, 0b001>; +def : RWSysReg<"TRCRSCTLR24", 0b10, 0b001, 0b0001, 0b1000, 0b001>; +def : RWSysReg<"TRCRSCTLR25", 0b10, 0b001, 0b0001, 0b1001, 0b001>; +def : RWSysReg<"TRCRSCTLR26", 0b10, 0b001, 0b0001, 0b1010, 0b001>; +def : RWSysReg<"TRCRSCTLR27", 0b10, 0b001, 0b0001, 0b1011, 0b001>; +def : RWSysReg<"TRCRSCTLR28", 0b10, 0b001, 0b0001, 0b1100, 0b001>; +def : RWSysReg<"TRCRSCTLR29", 0b10, 0b001, 0b0001, 0b1101, 0b001>; +def : RWSysReg<"TRCRSCTLR30", 0b10, 0b001, 0b0001, 0b1110, 0b001>; +def : RWSysReg<"TRCRSCTLR31", 0b10, 0b001, 0b0001, 0b1111, 0b001>; +def : RWSysReg<"TRCSSCCR0", 0b10, 0b001, 0b0001, 0b0000, 0b010>; +def : RWSysReg<"TRCSSCCR1", 0b10, 0b001, 0b0001, 0b0001, 0b010>; +def : RWSysReg<"TRCSSCCR2", 0b10, 0b001, 0b0001, 0b0010, 0b010>; +def : RWSysReg<"TRCSSCCR3", 0b10, 0b001, 0b0001, 0b0011, 0b010>; +def : RWSysReg<"TRCSSCCR4", 0b10, 0b001, 0b0001, 0b0100, 0b010>; +def : RWSysReg<"TRCSSCCR5", 0b10, 0b001, 0b0001, 0b0101, 0b010>; +def : RWSysReg<"TRCSSCCR6", 0b10, 0b001, 0b0001, 0b0110, 0b010>; +def : RWSysReg<"TRCSSCCR7", 0b10, 0b001, 0b0001, 0b0111, 0b010>; +def : RWSysReg<"TRCSSCSR0", 0b10, 0b001, 0b0001, 0b1000, 0b010>; +def : RWSysReg<"TRCSSCSR1", 0b10, 0b001, 0b0001, 0b1001, 0b010>; +def : RWSysReg<"TRCSSCSR2", 0b10, 0b001, 0b0001, 0b1010, 0b010>; +def : RWSysReg<"TRCSSCSR3", 0b10, 0b001, 0b0001, 0b1011, 0b010>; +def : RWSysReg<"TRCSSCSR4", 0b10, 0b001, 0b0001, 0b1100, 0b010>; +def : RWSysReg<"TRCSSCSR5", 0b10, 0b001, 0b0001, 0b1101, 0b010>; +def : RWSysReg<"TRCSSCSR6", 0b10, 0b001, 0b0001, 0b1110, 0b010>; +def : RWSysReg<"TRCSSCSR7", 0b10, 0b001, 0b0001, 0b1111, 0b010>; +def : RWSysReg<"TRCSSPCICR0", 0b10, 0b001, 0b0001, 0b0000, 0b011>; +def : RWSysReg<"TRCSSPCICR1", 0b10, 0b001, 0b0001, 0b0001, 0b011>; +def : RWSysReg<"TRCSSPCICR2", 0b10, 0b001, 0b0001, 0b0010, 0b011>; +def : RWSysReg<"TRCSSPCICR3", 0b10, 0b001, 0b0001, 0b0011, 0b011>; +def : RWSysReg<"TRCSSPCICR4", 0b10, 0b001, 0b0001, 0b0100, 0b011>; +def : RWSysReg<"TRCSSPCICR5", 0b10, 0b001, 0b0001, 0b0101, 0b011>; +def : RWSysReg<"TRCSSPCICR6", 0b10, 0b001, 0b0001, 0b0110, 0b011>; +def : RWSysReg<"TRCSSPCICR7", 0b10, 0b001, 0b0001, 0b0111, 0b011>; +def : RWSysReg<"TRCPDCR", 0b10, 0b001, 0b0001, 0b0100, 0b100>; +def : RWSysReg<"TRCACVR0", 0b10, 0b001, 0b0010, 0b0000, 0b000>; +def : RWSysReg<"TRCACVR1", 0b10, 0b001, 0b0010, 0b0010, 0b000>; +def : RWSysReg<"TRCACVR2", 0b10, 0b001, 0b0010, 0b0100, 0b000>; +def : RWSysReg<"TRCACVR3", 0b10, 0b001, 0b0010, 0b0110, 0b000>; +def : RWSysReg<"TRCACVR4", 0b10, 0b001, 0b0010, 0b1000, 0b000>; +def : RWSysReg<"TRCACVR5", 0b10, 0b001, 0b0010, 0b1010, 0b000>; +def : RWSysReg<"TRCACVR6", 0b10, 0b001, 0b0010, 0b1100, 0b000>; +def : RWSysReg<"TRCACVR7", 0b10, 0b001, 0b0010, 0b1110, 0b000>; +def : RWSysReg<"TRCACVR8", 0b10, 0b001, 0b0010, 0b0000, 0b001>; +def : RWSysReg<"TRCACVR9", 0b10, 0b001, 0b0010, 0b0010, 0b001>; +def : RWSysReg<"TRCACVR10", 0b10, 0b001, 0b0010, 0b0100, 0b001>; +def : RWSysReg<"TRCACVR11", 0b10, 0b001, 0b0010, 0b0110, 0b001>; +def : RWSysReg<"TRCACVR12", 0b10, 0b001, 0b0010, 0b1000, 0b001>; +def : RWSysReg<"TRCACVR13", 0b10, 0b001, 0b0010, 0b1010, 0b001>; +def : RWSysReg<"TRCACVR14", 0b10, 0b001, 0b0010, 0b1100, 0b001>; +def : RWSysReg<"TRCACVR15", 0b10, 0b001, 0b0010, 0b1110, 0b001>; +def : RWSysReg<"TRCACATR0", 0b10, 0b001, 0b0010, 0b0000, 0b010>; +def : RWSysReg<"TRCACATR1", 0b10, 0b001, 0b0010, 0b0010, 0b010>; +def : RWSysReg<"TRCACATR2", 0b10, 0b001, 0b0010, 0b0100, 0b010>; +def : RWSysReg<"TRCACATR3", 0b10, 0b001, 0b0010, 0b0110, 0b010>; +def : RWSysReg<"TRCACATR4", 0b10, 0b001, 0b0010, 0b1000, 0b010>; +def : RWSysReg<"TRCACATR5", 0b10, 0b001, 0b0010, 0b1010, 0b010>; +def : RWSysReg<"TRCACATR6", 0b10, 0b001, 0b0010, 0b1100, 0b010>; +def : RWSysReg<"TRCACATR7", 0b10, 0b001, 0b0010, 0b1110, 0b010>; +def : RWSysReg<"TRCACATR8", 0b10, 0b001, 0b0010, 0b0000, 0b011>; +def : RWSysReg<"TRCACATR9", 0b10, 0b001, 0b0010, 0b0010, 0b011>; +def : RWSysReg<"TRCACATR10", 0b10, 0b001, 0b0010, 0b0100, 0b011>; +def : RWSysReg<"TRCACATR11", 0b10, 0b001, 0b0010, 0b0110, 0b011>; +def : RWSysReg<"TRCACATR12", 0b10, 0b001, 0b0010, 0b1000, 0b011>; +def : RWSysReg<"TRCACATR13", 0b10, 0b001, 0b0010, 0b1010, 0b011>; +def : RWSysReg<"TRCACATR14", 0b10, 0b001, 0b0010, 0b1100, 0b011>; +def : RWSysReg<"TRCACATR15", 0b10, 0b001, 0b0010, 0b1110, 0b011>; +def : RWSysReg<"TRCDVCVR0", 0b10, 0b001, 0b0010, 0b0000, 0b100>; +def : RWSysReg<"TRCDVCVR1", 0b10, 0b001, 0b0010, 0b0100, 0b100>; +def : RWSysReg<"TRCDVCVR2", 0b10, 0b001, 0b0010, 0b1000, 0b100>; +def : RWSysReg<"TRCDVCVR3", 0b10, 0b001, 0b0010, 0b1100, 0b100>; +def : RWSysReg<"TRCDVCVR4", 0b10, 0b001, 0b0010, 0b0000, 0b101>; +def : RWSysReg<"TRCDVCVR5", 0b10, 0b001, 0b0010, 0b0100, 0b101>; +def : RWSysReg<"TRCDVCVR6", 0b10, 0b001, 0b0010, 0b1000, 0b101>; +def : RWSysReg<"TRCDVCVR7", 0b10, 0b001, 0b0010, 0b1100, 0b101>; +def : RWSysReg<"TRCDVCMR0", 0b10, 0b001, 0b0010, 0b0000, 0b110>; +def : RWSysReg<"TRCDVCMR1", 0b10, 0b001, 0b0010, 0b0100, 0b110>; +def : RWSysReg<"TRCDVCMR2", 0b10, 0b001, 0b0010, 0b1000, 0b110>; +def : RWSysReg<"TRCDVCMR3", 0b10, 0b001, 0b0010, 0b1100, 0b110>; +def : RWSysReg<"TRCDVCMR4", 0b10, 0b001, 0b0010, 0b0000, 0b111>; +def : RWSysReg<"TRCDVCMR5", 0b10, 0b001, 0b0010, 0b0100, 0b111>; +def : RWSysReg<"TRCDVCMR6", 0b10, 0b001, 0b0010, 0b1000, 0b111>; +def : RWSysReg<"TRCDVCMR7", 0b10, 0b001, 0b0010, 0b1100, 0b111>; +def : RWSysReg<"TRCCIDCVR0", 0b10, 0b001, 0b0011, 0b0000, 0b000>; +def : RWSysReg<"TRCCIDCVR1", 0b10, 0b001, 0b0011, 0b0010, 0b000>; +def : RWSysReg<"TRCCIDCVR2", 0b10, 0b001, 0b0011, 0b0100, 0b000>; +def : RWSysReg<"TRCCIDCVR3", 0b10, 0b001, 0b0011, 0b0110, 0b000>; +def : RWSysReg<"TRCCIDCVR4", 0b10, 0b001, 0b0011, 0b1000, 0b000>; +def : RWSysReg<"TRCCIDCVR5", 0b10, 0b001, 0b0011, 0b1010, 0b000>; +def : RWSysReg<"TRCCIDCVR6", 0b10, 0b001, 0b0011, 0b1100, 0b000>; +def : RWSysReg<"TRCCIDCVR7", 0b10, 0b001, 0b0011, 0b1110, 0b000>; +def : RWSysReg<"TRCVMIDCVR0", 0b10, 0b001, 0b0011, 0b0000, 0b001>; +def : RWSysReg<"TRCVMIDCVR1", 0b10, 0b001, 0b0011, 0b0010, 0b001>; +def : RWSysReg<"TRCVMIDCVR2", 0b10, 0b001, 0b0011, 0b0100, 0b001>; +def : RWSysReg<"TRCVMIDCVR3", 0b10, 0b001, 0b0011, 0b0110, 0b001>; +def : RWSysReg<"TRCVMIDCVR4", 0b10, 0b001, 0b0011, 0b1000, 0b001>; +def : RWSysReg<"TRCVMIDCVR5", 0b10, 0b001, 0b0011, 0b1010, 0b001>; +def : RWSysReg<"TRCVMIDCVR6", 0b10, 0b001, 0b0011, 0b1100, 0b001>; +def : RWSysReg<"TRCVMIDCVR7", 0b10, 0b001, 0b0011, 0b1110, 0b001>; +def : RWSysReg<"TRCCIDCCTLR0", 0b10, 0b001, 0b0011, 0b0000, 0b010>; +def : RWSysReg<"TRCCIDCCTLR1", 0b10, 0b001, 0b0011, 0b0001, 0b010>; +def : RWSysReg<"TRCVMIDCCTLR0", 0b10, 0b001, 0b0011, 0b0010, 0b010>; +def : RWSysReg<"TRCVMIDCCTLR1", 0b10, 0b001, 0b0011, 0b0011, 0b010>; +def : RWSysReg<"TRCITCTRL", 0b10, 0b001, 0b0111, 0b0000, 0b100>; +def : RWSysReg<"TRCCLAIMSET", 0b10, 0b001, 0b0111, 0b1000, 0b110>; +def : RWSysReg<"TRCCLAIMCLR", 0b10, 0b001, 0b0111, 0b1001, 0b110>; + +// GICv3 registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"ICC_BPR1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b011>; +def : RWSysReg<"ICC_BPR0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b011>; +def : RWSysReg<"ICC_PMR_EL1", 0b11, 0b000, 0b0100, 0b0110, 0b000>; +def : RWSysReg<"ICC_CTLR_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b100>; +def : RWSysReg<"ICC_CTLR_EL3", 0b11, 0b110, 0b1100, 0b1100, 0b100>; +def : RWSysReg<"ICC_SRE_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b101>; +def : RWSysReg<"ICC_SRE_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b101>; +def : RWSysReg<"ICC_SRE_EL3", 0b11, 0b110, 0b1100, 0b1100, 0b101>; +def : RWSysReg<"ICC_IGRPEN0_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b110>; +def : RWSysReg<"ICC_IGRPEN1_EL1", 0b11, 0b000, 0b1100, 0b1100, 0b111>; +def : RWSysReg<"ICC_IGRPEN1_EL3", 0b11, 0b110, 0b1100, 0b1100, 0b111>; +def : RWSysReg<"ICC_SEIEN_EL1", 0b11, 0b000, 0b1100, 0b1101, 0b000>; +def : RWSysReg<"ICC_AP0R0_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b100>; +def : RWSysReg<"ICC_AP0R1_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b101>; +def : RWSysReg<"ICC_AP0R2_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b110>; +def : RWSysReg<"ICC_AP0R3_EL1", 0b11, 0b000, 0b1100, 0b1000, 0b111>; +def : RWSysReg<"ICC_AP1R0_EL1", 0b11, 0b000, 0b1100, 0b1001, 0b000>; +def : RWSysReg<"ICC_AP1R1_EL1", 0b11, 0b000, 0b1100, 0b1001, 0b001>; +def : RWSysReg<"ICC_AP1R2_EL1", 0b11, 0b000, 0b1100, 0b1001, 0b010>; +def : RWSysReg<"ICC_AP1R3_EL1", 0b11, 0b000, 0b1100, 0b1001, 0b011>; +def : RWSysReg<"ICH_AP0R0_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b000>; +def : RWSysReg<"ICH_AP0R1_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b001>; +def : RWSysReg<"ICH_AP0R2_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b010>; +def : RWSysReg<"ICH_AP0R3_EL2", 0b11, 0b100, 0b1100, 0b1000, 0b011>; +def : RWSysReg<"ICH_AP1R0_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b000>; +def : RWSysReg<"ICH_AP1R1_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b001>; +def : RWSysReg<"ICH_AP1R2_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b010>; +def : RWSysReg<"ICH_AP1R3_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b011>; +def : RWSysReg<"ICH_HCR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b000>; +def : RWSysReg<"ICH_MISR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b010>; +def : RWSysReg<"ICH_VMCR_EL2", 0b11, 0b100, 0b1100, 0b1011, 0b111>; +def : RWSysReg<"ICH_VSEIR_EL2", 0b11, 0b100, 0b1100, 0b1001, 0b100>; +def : RWSysReg<"ICH_LR0_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b000>; +def : RWSysReg<"ICH_LR1_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b001>; +def : RWSysReg<"ICH_LR2_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b010>; +def : RWSysReg<"ICH_LR3_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b011>; +def : RWSysReg<"ICH_LR4_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b100>; +def : RWSysReg<"ICH_LR5_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b101>; +def : RWSysReg<"ICH_LR6_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b110>; +def : RWSysReg<"ICH_LR7_EL2", 0b11, 0b100, 0b1100, 0b1100, 0b111>; +def : RWSysReg<"ICH_LR8_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b000>; +def : RWSysReg<"ICH_LR9_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b001>; +def : RWSysReg<"ICH_LR10_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b010>; +def : RWSysReg<"ICH_LR11_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b011>; +def : RWSysReg<"ICH_LR12_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b100>; +def : RWSysReg<"ICH_LR13_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b101>; +def : RWSysReg<"ICH_LR14_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b110>; +def : RWSysReg<"ICH_LR15_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b111>; + +// v8.1a "Privileged Access Never" extension-specific system registers +let Requires = [{ {AArch64::HasV8_1aOps} }] in +def : RWSysReg<"PAN", 0b11, 0b000, 0b0100, 0b0010, 0b011>; + +// v8.1a "Limited Ordering Regions" extension-specific system registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::HasV8_1aOps} }] in { +def : RWSysReg<"LORSA_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b000>; +def : RWSysReg<"LOREA_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b001>; +def : RWSysReg<"LORN_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b010>; +def : RWSysReg<"LORC_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b011>; +} + +// v8.1a "Virtualization hos extensions" system registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::HasV8_1aOps} }] in { +def : RWSysReg<"TTBR1_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b001>; +def : RWSysReg<"CONTEXTIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b001>; +def : RWSysReg<"CNTHV_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b000>; +def : RWSysReg<"CNTHV_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b010>; +def : RWSysReg<"CNTHV_CTL_EL2", 0b11, 0b100, 0b1110, 0b0011, 0b001>; +def : RWSysReg<"SCTLR_EL12", 0b11, 0b101, 0b0001, 0b0000, 0b000>; +def : RWSysReg<"CPACR_EL12", 0b11, 0b101, 0b0001, 0b0000, 0b010>; +def : RWSysReg<"TTBR0_EL12", 0b11, 0b101, 0b0010, 0b0000, 0b000>; +def : RWSysReg<"TTBR1_EL12", 0b11, 0b101, 0b0010, 0b0000, 0b001>; +def : RWSysReg<"TCR_EL12", 0b11, 0b101, 0b0010, 0b0000, 0b010>; +def : RWSysReg<"AFSR0_EL12", 0b11, 0b101, 0b0101, 0b0001, 0b000>; +def : RWSysReg<"AFSR1_EL12", 0b11, 0b101, 0b0101, 0b0001, 0b001>; +def : RWSysReg<"ESR_EL12", 0b11, 0b101, 0b0101, 0b0010, 0b000>; +def : RWSysReg<"FAR_EL12", 0b11, 0b101, 0b0110, 0b0000, 0b000>; +def : RWSysReg<"MAIR_EL12", 0b11, 0b101, 0b1010, 0b0010, 0b000>; +def : RWSysReg<"AMAIR_EL12", 0b11, 0b101, 0b1010, 0b0011, 0b000>; +def : RWSysReg<"VBAR_EL12", 0b11, 0b101, 0b1100, 0b0000, 0b000>; +def : RWSysReg<"CONTEXTIDR_EL12", 0b11, 0b101, 0b1101, 0b0000, 0b001>; +def : RWSysReg<"CNTKCTL_EL12", 0b11, 0b101, 0b1110, 0b0001, 0b000>; +def : RWSysReg<"CNTP_TVAL_EL02", 0b11, 0b101, 0b1110, 0b0010, 0b000>; +def : RWSysReg<"CNTP_CTL_EL02", 0b11, 0b101, 0b1110, 0b0010, 0b001>; +def : RWSysReg<"CNTP_CVAL_EL02", 0b11, 0b101, 0b1110, 0b0010, 0b010>; +def : RWSysReg<"CNTV_TVAL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b000>; +def : RWSysReg<"CNTV_CTL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b001>; +def : RWSysReg<"CNTV_CVAL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b010>; +def : RWSysReg<"SPSR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b000>; +def : RWSysReg<"ELR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b001>; +} +// v8.2a registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::HasV8_2aOps} }] in +def : RWSysReg<"UAO", 0b11, 0b000, 0b0100, 0b0010, 0b100>; + +// v8.2a "Statistical Profiling extension" registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::FeatureSPE} }] in { +def : RWSysReg<"PMBLIMITR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b000>; +def : RWSysReg<"PMBPTR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b001>; +def : RWSysReg<"PMBSR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b011>; +def : RWSysReg<"PMBIDR_EL1", 0b11, 0b000, 0b1001, 0b1010, 0b111>; +def : RWSysReg<"PMSCR_EL2", 0b11, 0b100, 0b1001, 0b1001, 0b000>; +def : RWSysReg<"PMSCR_EL12", 0b11, 0b101, 0b1001, 0b1001, 0b000>; +def : RWSysReg<"PMSCR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b000>; +def : RWSysReg<"PMSICR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b010>; +def : RWSysReg<"PMSIRR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b011>; +def : RWSysReg<"PMSFCR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b100>; +def : RWSysReg<"PMSEVFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b101>; +def : RWSysReg<"PMSLATFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b110>; +def : RWSysReg<"PMSIDR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b111>; +} + +// v8.2a "RAS extension" registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::FeatureRAS} }] in { +def : RWSysReg<"ERRSELR_EL1", 0b11, 0b000, 0b0101, 0b0011, 0b001>; +def : RWSysReg<"ERXCTLR_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b001>; +def : RWSysReg<"ERXSTATUS_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b010>; +def : RWSysReg<"ERXADDR_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b011>; +def : RWSysReg<"ERXMISC0_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b000>; +def : RWSysReg<"ERXMISC1_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b001>; +def : RWSysReg<"DISR_EL1", 0b11, 0b000, 0b1100, 0b0001, 0b001>; +def : RWSysReg<"VDISR_EL2", 0b11, 0b100, 0b1100, 0b0001, 0b001>; +def : RWSysReg<"VSESR_EL2", 0b11, 0b100, 0b0101, 0b0010, 0b011>; +} + +// v8.3a "Pointer authentication extension" registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::HasV8_3aOps} }] in { +def : RWSysReg<"APIAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b000>; +def : RWSysReg<"APIAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b001>; +def : RWSysReg<"APIBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b010>; +def : RWSysReg<"APIBKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b011>; +def : RWSysReg<"APDAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b000>; +def : RWSysReg<"APDAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b001>; +def : RWSysReg<"APDBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b010>; +def : RWSysReg<"APDBKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0010, 0b011>; +def : RWSysReg<"APGAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b000>; +def : RWSysReg<"APGAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b001>; +} + +let Requires = [{ {AArch64::HasV8_4aOps} }] in { + +// v8.4a "Virtualization secure second stage translation" registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"VSTCR_EL2" , 0b11, 0b100, 0b0010, 0b0110, 0b010>; +def : RWSysReg<"VSTTBR_EL2", 0b11, 0b100, 0b0010, 0b0110, 0b000>; + +// v8.4a "Virtualization timer" registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"CNTHVS_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0100, 0b000>; +def : RWSysReg<"CNTHVS_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0100, 0b010>; +def : RWSysReg<"CNTHVS_CTL_EL2", 0b11, 0b100, 0b1110, 0b0100, 0b001>; +def : RWSysReg<"CNTHPS_TVAL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b000>; +def : RWSysReg<"CNTHPS_CVAL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b010>; +def : RWSysReg<"CNTHPS_CTL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b001>; + +// v8.4a "Virtualization debug state" registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"SDER32_EL2", 0b11, 0b100, 0b0001, 0b0011, 0b001>; + +// v8.4a RAS registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"ERXPFGCTL_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b101>; +def : RWSysReg<"ERXPFGCDN_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b110>; +def : RWSysReg<"ERXTS_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b111>; +def : RWSysReg<"ERXMISC2_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b010>; +def : RWSysReg<"ERXMISC3_EL1", 0b11, 0b000, 0b0101, 0b0101, 0b011>; +def : ROSysReg<"ERXPFGF_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b100>; + +// v8.4a MPAM registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"MPAM0_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b001>; +def : RWSysReg<"MPAM1_EL1", 0b11, 0b000, 0b1010, 0b0101, 0b000>; +def : RWSysReg<"MPAM2_EL2", 0b11, 0b100, 0b1010, 0b0101, 0b000>; +def : RWSysReg<"MPAM3_EL3", 0b11, 0b110, 0b1010, 0b0101, 0b000>; +def : RWSysReg<"MPAM1_EL12", 0b11, 0b101, 0b1010, 0b0101, 0b000>; +def : RWSysReg<"MPAMHCR_EL2", 0b11, 0b100, 0b1010, 0b0100, 0b000>; +def : RWSysReg<"MPAMVPMV_EL2", 0b11, 0b100, 0b1010, 0b0100, 0b001>; +def : RWSysReg<"MPAMVPM0_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b000>; +def : RWSysReg<"MPAMVPM1_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b001>; +def : RWSysReg<"MPAMVPM2_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b010>; +def : RWSysReg<"MPAMVPM3_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b011>; +def : RWSysReg<"MPAMVPM4_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b100>; +def : RWSysReg<"MPAMVPM5_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b101>; +def : RWSysReg<"MPAMVPM6_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b110>; +def : RWSysReg<"MPAMVPM7_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b111>; +def : ROSysReg<"MPAMIDR_EL1", 0b11, 0b000, 0b1010, 0b0100, 0b100>; + +// v8.4a Activitiy monitor registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"AMCR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b000>; +def : ROSysReg<"AMCFGR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b001>; +def : ROSysReg<"AMCGCR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b010>; +def : RWSysReg<"AMUSERENR_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b011>; +def : RWSysReg<"AMCNTENCLR0_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b100>; +def : RWSysReg<"AMCNTENSET0_EL0", 0b11, 0b011, 0b1101, 0b0010, 0b101>; +def : RWSysReg<"AMEVCNTR00_EL0", 0b11, 0b011, 0b1101, 0b0100, 0b000>; +def : RWSysReg<"AMEVCNTR01_EL0", 0b11, 0b011, 0b1101, 0b0100, 0b001>; +def : RWSysReg<"AMEVCNTR02_EL0", 0b11, 0b011, 0b1101, 0b0100, 0b010>; +def : RWSysReg<"AMEVCNTR03_EL0", 0b11, 0b011, 0b1101, 0b0100, 0b011>; +def : ROSysReg<"AMEVTYPER00_EL0", 0b11, 0b011, 0b1101, 0b0110, 0b000>; +def : ROSysReg<"AMEVTYPER01_EL0", 0b11, 0b011, 0b1101, 0b0110, 0b001>; +def : ROSysReg<"AMEVTYPER02_EL0", 0b11, 0b011, 0b1101, 0b0110, 0b010>; +def : ROSysReg<"AMEVTYPER03_EL0", 0b11, 0b011, 0b1101, 0b0110, 0b011>; +def : RWSysReg<"AMCNTENCLR1_EL0", 0b11, 0b011, 0b1101, 0b0011, 0b000>; +def : RWSysReg<"AMCNTENSET1_EL0", 0b11, 0b011, 0b1101, 0b0011, 0b001>; +def : RWSysReg<"AMEVCNTR10_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b000>; +def : RWSysReg<"AMEVCNTR11_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b001>; +def : RWSysReg<"AMEVCNTR12_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b010>; +def : RWSysReg<"AMEVCNTR13_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b011>; +def : RWSysReg<"AMEVCNTR14_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b100>; +def : RWSysReg<"AMEVCNTR15_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b101>; +def : RWSysReg<"AMEVCNTR16_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b110>; +def : RWSysReg<"AMEVCNTR17_EL0", 0b11, 0b011, 0b1101, 0b1100, 0b111>; +def : RWSysReg<"AMEVCNTR18_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b000>; +def : RWSysReg<"AMEVCNTR19_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b001>; +def : RWSysReg<"AMEVCNTR110_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b010>; +def : RWSysReg<"AMEVCNTR111_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b011>; +def : RWSysReg<"AMEVCNTR112_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b100>; +def : RWSysReg<"AMEVCNTR113_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b101>; +def : RWSysReg<"AMEVCNTR114_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b110>; +def : RWSysReg<"AMEVCNTR115_EL0", 0b11, 0b011, 0b1101, 0b1101, 0b111>; +def : RWSysReg<"AMEVTYPER10_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b000>; +def : RWSysReg<"AMEVTYPER11_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b001>; +def : RWSysReg<"AMEVTYPER12_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b010>; +def : RWSysReg<"AMEVTYPER13_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b011>; +def : RWSysReg<"AMEVTYPER14_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b100>; +def : RWSysReg<"AMEVTYPER15_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b101>; +def : RWSysReg<"AMEVTYPER16_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b110>; +def : RWSysReg<"AMEVTYPER17_EL0", 0b11, 0b011, 0b1101, 0b1110, 0b111>; +def : RWSysReg<"AMEVTYPER18_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b000>; +def : RWSysReg<"AMEVTYPER19_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b001>; +def : RWSysReg<"AMEVTYPER110_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b010>; +def : RWSysReg<"AMEVTYPER111_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b011>; +def : RWSysReg<"AMEVTYPER112_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b100>; +def : RWSysReg<"AMEVTYPER113_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b101>; +def : RWSysReg<"AMEVTYPER114_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b110>; +def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>; + +// v8.4a Trace Extension registers +// +// Please note that the 8.4 spec also defines these registers: +// TRCIDR1, ID_DFR0_EL1, ID_AA64DFR0_EL1, MDSCR_EL1, MDCR_EL2, and MDCR_EL3, +// but they are already defined above. +// +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"TRFCR_EL1", 0b11, 0b000, 0b0001, 0b0010, 0b001>; +def : RWSysReg<"TRFCR_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b001>; +def : RWSysReg<"TRFCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b001>; + +// v8.4a Timining insensitivity of data processing instructions +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"DIT", 0b11, 0b011, 0b0100, 0b0010, 0b101>; + +// v8.4a Enhanced Support for Nested Virtualization +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"VNCR_EL2", 0b11, 0b100, 0b0010, 0b0010, 0b000>; + +} // HasV8_4aOps + +// SVE control registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::FeatureSVE} }] in { +def : RWSysReg<"ZCR_EL1", 0b11, 0b000, 0b0001, 0b0010, 0b000>; +def : RWSysReg<"ZCR_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b000>; +def : RWSysReg<"ZCR_EL3", 0b11, 0b110, 0b0001, 0b0010, 0b000>; +def : RWSysReg<"ZCR_EL12", 0b11, 0b101, 0b0001, 0b0010, 0b000>; +} + +// Cyclone specific system registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::ProcCyclone} }] in +def : RWSysReg<"CPM_IOACC_CTL_EL3", 0b11, 0b111, 0b1111, 0b0010, 0b000>; diff --git a/capstone/suite/synctools/tablegen/AArch64/SVEInstrFormats.td b/capstone/suite/synctools/tablegen/AArch64/SVEInstrFormats.td new file mode 100644 index 000000000..7a8dd8bc5 --- /dev/null +++ b/capstone/suite/synctools/tablegen/AArch64/SVEInstrFormats.td @@ -0,0 +1,4456 @@ +//=-- SVEInstrFormats.td - AArch64 SVE Instruction classes -*- tablegen -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AArch64 Scalable Vector Extension (SVE) Instruction Class Definitions. +// +//===----------------------------------------------------------------------===// + +def SVEPatternOperand : AsmOperandClass { + let Name = "SVEPattern"; + let ParserMethod = "tryParseSVEPattern"; + let PredicateMethod = "isSVEPattern"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "InvalidSVEPattern"; +} + +def sve_pred_enum : Operand<i32>, ImmLeaf<i32, [{ + return (((uint32_t)Imm) < 32); + }]> { + + let PrintMethod = "printSVEPattern"; + let ParserMatchClass = SVEPatternOperand; +} + +def SVEPrefetchOperand : AsmOperandClass { + let Name = "SVEPrefetch"; + let ParserMethod = "tryParsePrefetch<true>"; + let PredicateMethod = "isPrefetch"; + let RenderMethod = "addPrefetchOperands"; +} + +def sve_prfop : Operand<i32>, ImmLeaf<i32, [{ + return (((uint32_t)Imm) <= 15); + }]> { + let PrintMethod = "printPrefetchOp<true>"; + let ParserMatchClass = SVEPrefetchOperand; +} + +class SVELogicalImmOperand<int Width> : AsmOperandClass { + let Name = "SVELogicalImm" # Width; + let DiagnosticType = "LogicalSecondSource"; + let PredicateMethod = "isLogicalImm<int" # Width # "_t>"; + let RenderMethod = "addLogicalImmOperands<int" # Width # "_t>"; +} + +def sve_logical_imm8 : Operand<i64> { + let ParserMatchClass = SVELogicalImmOperand<8>; + let PrintMethod = "printLogicalImm<int8_t>"; + + let MCOperandPredicate = [{ + if (!MCOp.isImm()) + return false; + int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); + return AArch64_AM::isSVEMaskOfIdenticalElements<int8_t>(Val); + }]; +} + +def sve_logical_imm16 : Operand<i64> { + let ParserMatchClass = SVELogicalImmOperand<16>; + let PrintMethod = "printLogicalImm<int16_t>"; + + let MCOperandPredicate = [{ + if (!MCOp.isImm()) + return false; + int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); + return AArch64_AM::isSVEMaskOfIdenticalElements<int16_t>(Val); + }]; +} + +def sve_logical_imm32 : Operand<i64> { + let ParserMatchClass = SVELogicalImmOperand<32>; + let PrintMethod = "printLogicalImm<int32_t>"; + + let MCOperandPredicate = [{ + if (!MCOp.isImm()) + return false; + int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); + return AArch64_AM::isSVEMaskOfIdenticalElements<int32_t>(Val); + }]; +} + +class SVEPreferredLogicalImmOperand<int Width> : AsmOperandClass { + let Name = "SVEPreferredLogicalImm" # Width; + let PredicateMethod = "isSVEPreferredLogicalImm<int" # Width # "_t>"; + let RenderMethod = "addLogicalImmOperands<int" # Width # "_t>"; +} + +def sve_preferred_logical_imm16 : Operand<i64> { + let ParserMatchClass = SVEPreferredLogicalImmOperand<16>; + let PrintMethod = "printSVELogicalImm<int16_t>"; + + let MCOperandPredicate = [{ + if (!MCOp.isImm()) + return false; + int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); + return AArch64_AM::isSVEMaskOfIdenticalElements<int16_t>(Val) && + AArch64_AM::isSVEMoveMaskPreferredLogicalImmediate(Val); + }]; +} + +def sve_preferred_logical_imm32 : Operand<i64> { + let ParserMatchClass = SVEPreferredLogicalImmOperand<32>; + let PrintMethod = "printSVELogicalImm<int32_t>"; + + let MCOperandPredicate = [{ + if (!MCOp.isImm()) + return false; + int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); + return AArch64_AM::isSVEMaskOfIdenticalElements<int32_t>(Val) && + AArch64_AM::isSVEMoveMaskPreferredLogicalImmediate(Val); + }]; +} + +def sve_preferred_logical_imm64 : Operand<i64> { + let ParserMatchClass = SVEPreferredLogicalImmOperand<64>; + let PrintMethod = "printSVELogicalImm<int64_t>"; + + let MCOperandPredicate = [{ + if (!MCOp.isImm()) + return false; + int64_t Val = AArch64_AM::decodeLogicalImmediate(MCOp.getImm(), 64); + return AArch64_AM::isSVEMaskOfIdenticalElements<int64_t>(Val) && + AArch64_AM::isSVEMoveMaskPreferredLogicalImmediate(Val); + }]; +} + +class SVELogicalImmNotOperand<int Width> : AsmOperandClass { + let Name = "SVELogicalImm" # Width # "Not"; + let DiagnosticType = "LogicalSecondSource"; + let PredicateMethod = "isLogicalImm<int" # Width # "_t>"; + let RenderMethod = "addLogicalImmNotOperands<int" # Width # "_t>"; +} + +def sve_logical_imm8_not : Operand<i64> { + let ParserMatchClass = SVELogicalImmNotOperand<8>; +} + +def sve_logical_imm16_not : Operand<i64> { + let ParserMatchClass = SVELogicalImmNotOperand<16>; +} + +def sve_logical_imm32_not : Operand<i64> { + let ParserMatchClass = SVELogicalImmNotOperand<32>; +} + +class SVEShiftedImmOperand<int ElementWidth, string Infix, string Predicate> + : AsmOperandClass { + let Name = "SVE" # Infix # "Imm" # ElementWidth; + let DiagnosticType = "Invalid" # Name; + let RenderMethod = "addImmWithOptionalShiftOperands<8>"; + let ParserMethod = "tryParseImmWithOptionalShift"; + let PredicateMethod = Predicate; +} + +def SVECpyImmOperand8 : SVEShiftedImmOperand<8, "Cpy", "isSVECpyImm<int8_t>">; +def SVECpyImmOperand16 : SVEShiftedImmOperand<16, "Cpy", "isSVECpyImm<int16_t>">; +def SVECpyImmOperand32 : SVEShiftedImmOperand<32, "Cpy", "isSVECpyImm<int32_t>">; +def SVECpyImmOperand64 : SVEShiftedImmOperand<64, "Cpy", "isSVECpyImm<int64_t>">; + +def SVEAddSubImmOperand8 : SVEShiftedImmOperand<8, "AddSub", "isSVEAddSubImm<int8_t>">; +def SVEAddSubImmOperand16 : SVEShiftedImmOperand<16, "AddSub", "isSVEAddSubImm<int16_t>">; +def SVEAddSubImmOperand32 : SVEShiftedImmOperand<32, "AddSub", "isSVEAddSubImm<int32_t>">; +def SVEAddSubImmOperand64 : SVEShiftedImmOperand<64, "AddSub", "isSVEAddSubImm<int64_t>">; + +class imm8_opt_lsl<int ElementWidth, string printType, + AsmOperandClass OpndClass, code Predicate> + : Operand<i32>, ImmLeaf<i32, Predicate> { + let EncoderMethod = "getImm8OptLsl"; + let DecoderMethod = "DecodeImm8OptLsl<" # ElementWidth # ">"; + let PrintMethod = "printImm8OptLsl<" # printType # ">"; + let ParserMatchClass = OpndClass; + let MIOperandInfo = (ops i32imm, i32imm); +} + +def cpy_imm8_opt_lsl_i8 : imm8_opt_lsl<8, "int8_t", SVECpyImmOperand8, [{ + return AArch64_AM::isSVECpyImm<int8_t>(Imm); +}]>; +def cpy_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "int16_t", SVECpyImmOperand16, [{ + return AArch64_AM::isSVECpyImm<int16_t>(Imm); +}]>; +def cpy_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "int32_t", SVECpyImmOperand32, [{ + return AArch64_AM::isSVECpyImm<int32_t>(Imm); +}]>; +def cpy_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "int64_t", SVECpyImmOperand64, [{ + return AArch64_AM::isSVECpyImm<int64_t>(Imm); +}]>; + +def addsub_imm8_opt_lsl_i8 : imm8_opt_lsl<8, "uint8_t", SVEAddSubImmOperand8, [{ + return AArch64_AM::isSVEAddSubImm<int8_t>(Imm); +}]>; +def addsub_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "uint16_t", SVEAddSubImmOperand16, [{ + return AArch64_AM::isSVEAddSubImm<int16_t>(Imm); +}]>; +def addsub_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "uint32_t", SVEAddSubImmOperand32, [{ + return AArch64_AM::isSVEAddSubImm<int32_t>(Imm); +}]>; +def addsub_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "uint64_t", SVEAddSubImmOperand64, [{ + return AArch64_AM::isSVEAddSubImm<int64_t>(Imm); +}]>; + +class SVEExactFPImm<string Suffix, string ValA, string ValB> : AsmOperandClass { + let Name = "SVEExactFPImmOperand" # Suffix; + let DiagnosticType = "Invalid" # Name; + let ParserMethod = "tryParseFPImm<false>"; + let PredicateMethod = "isExactFPImm<" # ValA # ", " # ValB # ">"; + let RenderMethod = "addExactFPImmOperands<" # ValA # ", " # ValB # ">"; +} + +class SVEExactFPImmOperand<string Suffix, string ValA, string ValB> : Operand<i32> { + let PrintMethod = "printExactFPImm<" # ValA # ", " # ValB # ">"; + let ParserMatchClass = SVEExactFPImm<Suffix, ValA, ValB>; +} + +def sve_fpimm_half_one + : SVEExactFPImmOperand<"HalfOne", "AArch64ExactFPImm::half", + "AArch64ExactFPImm::one">; +def sve_fpimm_half_two + : SVEExactFPImmOperand<"HalfTwo", "AArch64ExactFPImm::half", + "AArch64ExactFPImm::two">; +def sve_fpimm_zero_one + : SVEExactFPImmOperand<"ZeroOne", "AArch64ExactFPImm::zero", + "AArch64ExactFPImm::one">; + +def sve_incdec_imm : Operand<i32>, ImmLeaf<i32, [{ + return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17); +}]> { + let ParserMatchClass = Imm1_16Operand; + let EncoderMethod = "getSVEIncDecImm"; + let DecoderMethod = "DecodeSVEIncDecImm"; +} + +//===----------------------------------------------------------------------===// +// SVE PTrue - These are used extensively throughout the pattern matching so +// it's important we define them first. +//===----------------------------------------------------------------------===// + +class sve_int_ptrue<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty> +: I<(outs pprty:$Pd), (ins sve_pred_enum:$pattern), + asm, "\t$Pd, $pattern", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<5> pattern; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-19} = 0b011; + let Inst{18-17} = opc{2-1}; + let Inst{16} = opc{0}; + let Inst{15-10} = 0b111000; + let Inst{9-5} = pattern; + let Inst{4} = 0b0; + let Inst{3-0} = Pd; + + let Defs = !if(!eq (opc{0}, 1), [NZCV], []); +} + +multiclass sve_int_ptrue<bits<3> opc, string asm> { + def _B : sve_int_ptrue<0b00, opc, asm, PPR8>; + def _H : sve_int_ptrue<0b01, opc, asm, PPR16>; + def _S : sve_int_ptrue<0b10, opc, asm, PPR32>; + def _D : sve_int_ptrue<0b11, opc, asm, PPR64>; + + def : InstAlias<asm # "\t$Pd", + (!cast<Instruction>(NAME # _B) PPR8:$Pd, 0b11111), 1>; + def : InstAlias<asm # "\t$Pd", + (!cast<Instruction>(NAME # _H) PPR16:$Pd, 0b11111), 1>; + def : InstAlias<asm # "\t$Pd", + (!cast<Instruction>(NAME # _S) PPR32:$Pd, 0b11111), 1>; + def : InstAlias<asm # "\t$Pd", + (!cast<Instruction>(NAME # _D) PPR64:$Pd, 0b11111), 1>; +} + +let Predicates = [HasSVE] in { + defm PTRUE : sve_int_ptrue<0b000, "ptrue">; + defm PTRUES : sve_int_ptrue<0b001, "ptrues">; +} + + +//===----------------------------------------------------------------------===// +// SVE Predicate Misc Group +//===----------------------------------------------------------------------===// + +class sve_int_pfalse<bits<6> opc, string asm> +: I<(outs PPR8:$Pd), (ins), + asm, "\t$Pd", + "", + []>, Sched<[]> { + bits<4> Pd; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = opc{5-4}; + let Inst{21-19} = 0b011; + let Inst{18-16} = opc{3-1}; + let Inst{15-10} = 0b111001; + let Inst{9} = opc{0}; + let Inst{8-4} = 0b00000; + let Inst{3-0} = Pd; +} + +class sve_int_ptest<bits<6> opc, string asm> +: I<(outs), (ins PPRAny:$Pg, PPR8:$Pn), + asm, "\t$Pg, $Pn", + "", + []>, Sched<[]> { + bits<4> Pg; + bits<4> Pn; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = opc{5-4}; + let Inst{21-19} = 0b010; + let Inst{18-16} = opc{3-1}; + let Inst{15-14} = 0b11; + let Inst{13-10} = Pg; + let Inst{9} = opc{0}; + let Inst{8-5} = Pn; + let Inst{4-0} = 0b00000; + + let Defs = [NZCV]; +} + +class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm, + PPRRegOp pprty> +: I<(outs pprty:$Pdn), (ins PPRAny:$Pg, pprty:$_Pdn), + asm, "\t$Pdn, $Pg, $_Pdn", + "", + []>, Sched<[]> { + bits<4> Pdn; + bits<4> Pg; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-19} = 0b011; + let Inst{18-16} = opc{4-2}; + let Inst{15-11} = 0b11000; + let Inst{10-9} = opc{1-0}; + let Inst{8-5} = Pg; + let Inst{4} = 0; + let Inst{3-0} = Pdn; + + let Constraints = "$Pdn = $_Pdn"; + let Defs = [NZCV]; +} + +multiclass sve_int_pfirst<bits<5> opc, string asm> { + def : sve_int_pfirst_next<0b01, opc, asm, PPR8>; +} + +multiclass sve_int_pnext<bits<5> opc, string asm> { + def _B : sve_int_pfirst_next<0b00, opc, asm, PPR8>; + def _H : sve_int_pfirst_next<0b01, opc, asm, PPR16>; + def _S : sve_int_pfirst_next<0b10, opc, asm, PPR32>; + def _D : sve_int_pfirst_next<0b11, opc, asm, PPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Predicate Count Group +//===----------------------------------------------------------------------===// + +class sve_int_count_r<bits<2> sz8_64, bits<5> opc, string asm, + RegisterOperand dty, PPRRegOp pprty, RegisterOperand sty> +: I<(outs dty:$Rdn), (ins pprty:$Pg, sty:$_Rdn), + asm, "\t$Rdn, $Pg", + "", + []>, Sched<[]> { + bits<5> Rdn; + bits<4> Pg; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-19} = 0b101; + let Inst{18-16} = opc{4-2}; + let Inst{15-11} = 0b10001; + let Inst{10-9} = opc{1-0}; + let Inst{8-5} = Pg; + let Inst{4-0} = Rdn; + + // Signed 32bit forms require their GPR operand printed. + let AsmString = !if(!eq(opc{4,2-0}, 0b0000), + !strconcat(asm, "\t$Rdn, $Pg, $_Rdn"), + !strconcat(asm, "\t$Rdn, $Pg")); + let Constraints = "$Rdn = $_Rdn"; +} + +multiclass sve_int_count_r_s32<bits<5> opc, string asm> { + def _B : sve_int_count_r<0b00, opc, asm, GPR64z, PPR8, GPR64as32>; + def _H : sve_int_count_r<0b01, opc, asm, GPR64z, PPR16, GPR64as32>; + def _S : sve_int_count_r<0b10, opc, asm, GPR64z, PPR32, GPR64as32>; + def _D : sve_int_count_r<0b11, opc, asm, GPR64z, PPR64, GPR64as32>; +} + +multiclass sve_int_count_r_u32<bits<5> opc, string asm> { + def _B : sve_int_count_r<0b00, opc, asm, GPR32z, PPR8, GPR32z>; + def _H : sve_int_count_r<0b01, opc, asm, GPR32z, PPR16, GPR32z>; + def _S : sve_int_count_r<0b10, opc, asm, GPR32z, PPR32, GPR32z>; + def _D : sve_int_count_r<0b11, opc, asm, GPR32z, PPR64, GPR32z>; +} + +multiclass sve_int_count_r_x64<bits<5> opc, string asm> { + def _B : sve_int_count_r<0b00, opc, asm, GPR64z, PPR8, GPR64z>; + def _H : sve_int_count_r<0b01, opc, asm, GPR64z, PPR16, GPR64z>; + def _S : sve_int_count_r<0b10, opc, asm, GPR64z, PPR32, GPR64z>; + def _D : sve_int_count_r<0b11, opc, asm, GPR64z, PPR64, GPR64z>; +} + +class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, PPRAny:$Pg), + asm, "\t$Zdn, $Pg", + "", + []>, Sched<[]> { + bits<4> Pg; + bits<5> Zdn; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-19} = 0b101; + let Inst{18-16} = opc{4-2}; + let Inst{15-11} = 0b10000; + let Inst{10-9} = opc{1-0}; + let Inst{8-5} = Pg; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_count_v<bits<5> opc, string asm> { + def _H : sve_int_count_v<0b01, opc, asm, ZPR16>; + def _S : sve_int_count_v<0b10, opc, asm, ZPR32>; + def _D : sve_int_count_v<0b11, opc, asm, ZPR64>; +} + +class sve_int_pcount_pred<bits<2> sz8_64, bits<4> opc, string asm, + PPRRegOp pprty> +: I<(outs GPR64:$Rd), (ins PPRAny:$Pg, pprty:$Pn), + asm, "\t$Rd, $Pg, $Pn", + "", + []>, Sched<[]> { + bits<4> Pg; + bits<4> Pn; + bits<5> Rd; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-19} = 0b100; + let Inst{18-16} = opc{3-1}; + let Inst{15-14} = 0b10; + let Inst{13-10} = Pg; + let Inst{9} = opc{0}; + let Inst{8-5} = Pn; + let Inst{4-0} = Rd; +} + +multiclass sve_int_pcount_pred<bits<4> opc, string asm> { + def _B : sve_int_pcount_pred<0b00, opc, asm, PPR8>; + def _H : sve_int_pcount_pred<0b01, opc, asm, PPR16>; + def _S : sve_int_pcount_pred<0b10, opc, asm, PPR32>; + def _D : sve_int_pcount_pred<0b11, opc, asm, PPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Element Count Group +//===----------------------------------------------------------------------===// + +class sve_int_count<bits<3> opc, string asm> +: I<(outs GPR64:$Rd), (ins sve_pred_enum:$pattern, sve_incdec_imm:$imm4), + asm, "\t$Rd, $pattern, mul $imm4", + "", + []>, Sched<[]> { + bits<5> Rd; + bits<4> imm4; + bits<5> pattern; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = opc{2-1}; + let Inst{21-20} = 0b10; + let Inst{19-16} = imm4; + let Inst{15-11} = 0b11100; + let Inst{10} = opc{0}; + let Inst{9-5} = pattern; + let Inst{4-0} = Rd; +} + +multiclass sve_int_count<bits<3> opc, string asm> { + def NAME : sve_int_count<opc, asm>; + + def : InstAlias<asm # "\t$Rd, $pattern", + (!cast<Instruction>(NAME) GPR64:$Rd, sve_pred_enum:$pattern, 1), 1>; + def : InstAlias<asm # "\t$Rd", + (!cast<Instruction>(NAME) GPR64:$Rd, 0b11111, 1), 2>; +} + +class sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4), + asm, "\t$Zdn, $pattern, mul $imm4", + "", + []>, Sched<[]> { + bits<5> Zdn; + bits<5> pattern; + bits<4> imm4; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = opc{4-3}; + let Inst{21} = 0b1; + let Inst{20} = opc{2}; + let Inst{19-16} = imm4; + let Inst{15-12} = 0b1100; + let Inst{11-10} = opc{1-0}; + let Inst{9-5} = pattern; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty> { + def NAME : sve_int_countvlv<opc, asm, zprty>; + + def : InstAlias<asm # "\t$Zdn, $pattern", + (!cast<Instruction>(NAME) zprty:$Zdn, sve_pred_enum:$pattern, 1), 1>; + def : InstAlias<asm # "\t$Zdn", + (!cast<Instruction>(NAME) zprty:$Zdn, 0b11111, 1), 2>; +} + +class sve_int_pred_pattern_a<bits<3> opc, string asm> +: I<(outs GPR64:$Rdn), (ins GPR64:$_Rdn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4), + asm, "\t$Rdn, $pattern, mul $imm4", + "", + []>, Sched<[]> { + bits<5> Rdn; + bits<5> pattern; + bits<4> imm4; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = opc{2-1}; + let Inst{21-20} = 0b11; + let Inst{19-16} = imm4; + let Inst{15-11} = 0b11100; + let Inst{10} = opc{0}; + let Inst{9-5} = pattern; + let Inst{4-0} = Rdn; + + let Constraints = "$Rdn = $_Rdn"; +} + +multiclass sve_int_pred_pattern_a<bits<3> opc, string asm> { + def NAME : sve_int_pred_pattern_a<opc, asm>; + + def : InstAlias<asm # "\t$Rdn, $pattern", + (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1), 1>; + def : InstAlias<asm # "\t$Rdn", + (!cast<Instruction>(NAME) GPR64:$Rdn, 0b11111, 1), 2>; +} + +class sve_int_pred_pattern_b<bits<5> opc, string asm, RegisterOperand dt, + RegisterOperand st> +: I<(outs dt:$Rdn), (ins st:$_Rdn, sve_pred_enum:$pattern, sve_incdec_imm:$imm4), + asm, "\t$Rdn, $pattern, mul $imm4", + "", + []>, Sched<[]> { + bits<5> Rdn; + bits<5> pattern; + bits<4> imm4; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = opc{4-3}; + let Inst{21} = 0b1; + let Inst{20} = opc{2}; + let Inst{19-16} = imm4; + let Inst{15-12} = 0b1111; + let Inst{11-10} = opc{1-0}; + let Inst{9-5} = pattern; + let Inst{4-0} = Rdn; + + // Signed 32bit forms require their GPR operand printed. + let AsmString = !if(!eq(opc{2,0}, 0b00), + !strconcat(asm, "\t$Rdn, $_Rdn, $pattern, mul $imm4"), + !strconcat(asm, "\t$Rdn, $pattern, mul $imm4")); + + let Constraints = "$Rdn = $_Rdn"; +} + +multiclass sve_int_pred_pattern_b_s32<bits<5> opc, string asm> { + def NAME : sve_int_pred_pattern_b<opc, asm, GPR64z, GPR64as32>; + + def : InstAlias<asm # "\t$Rd, $Rn, $pattern", + (!cast<Instruction>(NAME) GPR64z:$Rd, GPR64as32:$Rn, sve_pred_enum:$pattern, 1), 1>; + def : InstAlias<asm # "\t$Rd, $Rn", + (!cast<Instruction>(NAME) GPR64z:$Rd, GPR64as32:$Rn, 0b11111, 1), 2>; +} + +multiclass sve_int_pred_pattern_b_u32<bits<5> opc, string asm> { + def NAME : sve_int_pred_pattern_b<opc, asm, GPR32z, GPR32z>; + + def : InstAlias<asm # "\t$Rdn, $pattern", + (!cast<Instruction>(NAME) GPR32z:$Rdn, sve_pred_enum:$pattern, 1), 1>; + def : InstAlias<asm # "\t$Rdn", + (!cast<Instruction>(NAME) GPR32z:$Rdn, 0b11111, 1), 2>; +} + +multiclass sve_int_pred_pattern_b_x64<bits<5> opc, string asm> { + def NAME : sve_int_pred_pattern_b<opc, asm, GPR64z, GPR64z>; + + def : InstAlias<asm # "\t$Rdn, $pattern", + (!cast<Instruction>(NAME) GPR64z:$Rdn, sve_pred_enum:$pattern, 1), 1>; + def : InstAlias<asm # "\t$Rdn", + (!cast<Instruction>(NAME) GPR64z:$Rdn, 0b11111, 1), 2>; +} + + +//===----------------------------------------------------------------------===// +// SVE Permute - Cross Lane Group +//===----------------------------------------------------------------------===// + +class sve_int_perm_dup_r<bits<2> sz8_64, string asm, ZPRRegOp zprty, + RegisterClass srcRegType> +: I<(outs zprty:$Zd), (ins srcRegType:$Rn), + asm, "\t$Zd, $Rn", + "", + []>, Sched<[]> { + bits<5> Rn; + bits<5> Zd; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-10} = 0b100000001110; + let Inst{9-5} = Rn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_perm_dup_r<string asm> { + def _B : sve_int_perm_dup_r<0b00, asm, ZPR8, GPR32sp>; + def _H : sve_int_perm_dup_r<0b01, asm, ZPR16, GPR32sp>; + def _S : sve_int_perm_dup_r<0b10, asm, ZPR32, GPR32sp>; + def _D : sve_int_perm_dup_r<0b11, asm, ZPR64, GPR64sp>; + + def : InstAlias<"mov $Zd, $Rn", + (!cast<Instruction>(NAME # _B) ZPR8:$Zd, GPR32sp:$Rn), 1>; + def : InstAlias<"mov $Zd, $Rn", + (!cast<Instruction>(NAME # _H) ZPR16:$Zd, GPR32sp:$Rn), 1>; + def : InstAlias<"mov $Zd, $Rn", + (!cast<Instruction>(NAME # _S) ZPR32:$Zd, GPR32sp:$Rn), 1>; + def : InstAlias<"mov $Zd, $Rn", + (!cast<Instruction>(NAME # _D) ZPR64:$Zd, GPR64sp:$Rn), 1>; +} + +class sve_int_perm_dup_i<bits<5> tsz, Operand immtype, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$idx), + asm, "\t$Zd, $Zn$idx", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<7> idx; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = {?,?}; // imm3h + let Inst{21} = 0b1; + let Inst{20-16} = tsz; + let Inst{15-10} = 0b001000; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_perm_dup_i<string asm> { + def _B : sve_int_perm_dup_i<{?,?,?,?,1}, sve_elm_idx_extdup_b, asm, ZPR8> { + let Inst{23-22} = idx{5-4}; + let Inst{20-17} = idx{3-0}; + } + def _H : sve_int_perm_dup_i<{?,?,?,1,0}, sve_elm_idx_extdup_h, asm, ZPR16> { + let Inst{23-22} = idx{4-3}; + let Inst{20-18} = idx{2-0}; + } + def _S : sve_int_perm_dup_i<{?,?,1,0,0}, sve_elm_idx_extdup_s, asm, ZPR32> { + let Inst{23-22} = idx{3-2}; + let Inst{20-19} = idx{1-0}; + } + def _D : sve_int_perm_dup_i<{?,1,0,0,0}, sve_elm_idx_extdup_d, asm, ZPR64> { + let Inst{23-22} = idx{2-1}; + let Inst{20} = idx{0}; + } + def _Q : sve_int_perm_dup_i<{1,0,0,0,0}, sve_elm_idx_extdup_q, asm, ZPR128> { + let Inst{23-22} = idx{1-0}; + } + + def : InstAlias<"mov $Zd, $Zn$idx", + (!cast<Instruction>(NAME # _B) ZPR8:$Zd, ZPR8:$Zn, sve_elm_idx_extdup_b:$idx), 1>; + def : InstAlias<"mov $Zd, $Zn$idx", + (!cast<Instruction>(NAME # _H) ZPR16:$Zd, ZPR16:$Zn, sve_elm_idx_extdup_h:$idx), 1>; + def : InstAlias<"mov $Zd, $Zn$idx", + (!cast<Instruction>(NAME # _S) ZPR32:$Zd, ZPR32:$Zn, sve_elm_idx_extdup_s:$idx), 1>; + def : InstAlias<"mov $Zd, $Zn$idx", + (!cast<Instruction>(NAME # _D) ZPR64:$Zd, ZPR64:$Zn, sve_elm_idx_extdup_d:$idx), 1>; + def : InstAlias<"mov $Zd, $Zn$idx", + (!cast<Instruction>(NAME # _Q) ZPR128:$Zd, ZPR128:$Zn, sve_elm_idx_extdup_q:$idx), 1>; + def : InstAlias<"mov $Zd, $Bn", + (!cast<Instruction>(NAME # _B) ZPR8:$Zd, FPR8asZPR:$Bn, 0), 2>; + def : InstAlias<"mov $Zd, $Hn", + (!cast<Instruction>(NAME # _H) ZPR16:$Zd, FPR16asZPR:$Hn, 0), 2>; + def : InstAlias<"mov $Zd, $Sn", + (!cast<Instruction>(NAME # _S) ZPR32:$Zd, FPR32asZPR:$Sn, 0), 2>; + def : InstAlias<"mov $Zd, $Dn", + (!cast<Instruction>(NAME # _D) ZPR64:$Zd, FPR64asZPR:$Dn, 0), 2>; + def : InstAlias<"mov $Zd, $Qn", + (!cast<Instruction>(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>; +} + +class sve_int_perm_tbl<bits<2> sz8_64, string asm, ZPRRegOp zprty, + RegisterOperand VecList> +: I<(outs zprty:$Zd), (ins VecList:$Zn, zprty:$Zm), + asm, "\t$Zd, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-10} = 0b001100; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_perm_tbl<string asm> { + def _B : sve_int_perm_tbl<0b00, asm, ZPR8, Z_b>; + def _H : sve_int_perm_tbl<0b01, asm, ZPR16, Z_h>; + def _S : sve_int_perm_tbl<0b10, asm, ZPR32, Z_s>; + def _D : sve_int_perm_tbl<0b11, asm, ZPR64, Z_d>; + + def : InstAlias<asm # "\t$Zd, $Zn, $Zm", + (!cast<Instruction>(NAME # _B) ZPR8:$Zd, ZPR8:$Zn, ZPR8:$Zm), 0>; + def : InstAlias<asm # "\t$Zd, $Zn, $Zm", + (!cast<Instruction>(NAME # _H) ZPR16:$Zd, ZPR16:$Zn, ZPR16:$Zm), 0>; + def : InstAlias<asm # "\t$Zd, $Zn, $Zm", + (!cast<Instruction>(NAME # _S) ZPR32:$Zd, ZPR32:$Zn, ZPR32:$Zm), 0>; + def : InstAlias<asm # "\t$Zd, $Zn, $Zm", + (!cast<Instruction>(NAME # _D) ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zm), 0>; +} + +class sve_int_perm_reverse_z<bits<2> sz8_64, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn), + asm, "\t$Zd, $Zn", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-10} = 0b111000001110; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_perm_reverse_z<string asm> { + def _B : sve_int_perm_reverse_z<0b00, asm, ZPR8>; + def _H : sve_int_perm_reverse_z<0b01, asm, ZPR16>; + def _S : sve_int_perm_reverse_z<0b10, asm, ZPR32>; + def _D : sve_int_perm_reverse_z<0b11, asm, ZPR64>; +} + +class sve_int_perm_reverse_p<bits<2> sz8_64, string asm, PPRRegOp pprty> +: I<(outs pprty:$Pd), (ins pprty:$Pn), + asm, "\t$Pd, $Pn", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-9} = 0b1101000100000; + let Inst{8-5} = Pn; + let Inst{4} = 0b0; + let Inst{3-0} = Pd; +} + +multiclass sve_int_perm_reverse_p<string asm> { + def _B : sve_int_perm_reverse_p<0b00, asm, PPR8>; + def _H : sve_int_perm_reverse_p<0b01, asm, PPR16>; + def _S : sve_int_perm_reverse_p<0b10, asm, PPR32>; + def _D : sve_int_perm_reverse_p<0b11, asm, PPR64>; +} + +class sve_int_perm_unpk<bits<2> sz16_64, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> +: I<(outs zprty1:$Zd), (ins zprty2:$Zn), + asm, "\t$Zd, $Zn", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz16_64; + let Inst{21-18} = 0b1100; + let Inst{17-16} = opc; + let Inst{15-10} = 0b001110; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_perm_unpk<bits<2> opc, string asm> { + def _H : sve_int_perm_unpk<0b01, opc, asm, ZPR16, ZPR8>; + def _S : sve_int_perm_unpk<0b10, opc, asm, ZPR32, ZPR16>; + def _D : sve_int_perm_unpk<0b11, opc, asm, ZPR64, ZPR32>; +} + +class sve_int_perm_insrs<bits<2> sz8_64, string asm, ZPRRegOp zprty, + RegisterClass srcRegType> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, srcRegType:$Rm), + asm, "\t$Zdn, $Rm", + "", + []>, Sched<[]> { + bits<5> Rm; + bits<5> Zdn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-10} = 0b100100001110; + let Inst{9-5} = Rm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_perm_insrs<string asm> { + def _B : sve_int_perm_insrs<0b00, asm, ZPR8, GPR32>; + def _H : sve_int_perm_insrs<0b01, asm, ZPR16, GPR32>; + def _S : sve_int_perm_insrs<0b10, asm, ZPR32, GPR32>; + def _D : sve_int_perm_insrs<0b11, asm, ZPR64, GPR64>; +} + +class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty, + RegisterClass srcRegType> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, srcRegType:$Vm), + asm, "\t$Zdn, $Vm", + "", + []>, Sched<[]> { + bits<5> Vm; + bits<5> Zdn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-10} = 0b110100001110; + let Inst{9-5} = Vm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_perm_insrv<string asm> { + def _B : sve_int_perm_insrv<0b00, asm, ZPR8, FPR8>; + def _H : sve_int_perm_insrv<0b01, asm, ZPR16, FPR16>; + def _S : sve_int_perm_insrv<0b10, asm, ZPR32, FPR32>; + def _D : sve_int_perm_insrv<0b11, asm, ZPR64, FPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Permute - Extract Group +//===----------------------------------------------------------------------===// + +class sve_int_perm_extract_i<string asm> +: I<(outs ZPR8:$Zdn), (ins ZPR8:$_Zdn, ZPR8:$Zm, imm0_255:$imm8), + asm, "\t$Zdn, $_Zdn, $Zm, $imm8", + "", []>, Sched<[]> { + bits<5> Zdn; + bits<5> Zm; + bits<8> imm8; + let Inst{31-21} = 0b00000101001; + let Inst{20-16} = imm8{7-3}; + let Inst{15-13} = 0b000; + let Inst{12-10} = imm8{2-0}; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +//===----------------------------------------------------------------------===// +// SVE Vector Select Group +//===----------------------------------------------------------------------===// + +class sve_int_sel_vvv<bits<2> sz8_64, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins PPRAny:$Pg, zprty:$Zn, zprty:$Zm), + asm, "\t$Zd, $Pg, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<4> Pg; + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-14} = 0b11; + let Inst{13-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_sel_vvv<string asm> { + def _B : sve_int_sel_vvv<0b00, asm, ZPR8>; + def _H : sve_int_sel_vvv<0b01, asm, ZPR16>; + def _S : sve_int_sel_vvv<0b10, asm, ZPR32>; + def _D : sve_int_sel_vvv<0b11, asm, ZPR64>; + + def : InstAlias<"mov $Zd, $Pg/m, $Zn", + (!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, ZPR8:$Zn, ZPR8:$Zd), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Zn", + (!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, ZPR16:$Zn, ZPR16:$Zd), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Zn", + (!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, ZPR32:$Zn, ZPR32:$Zd), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Zn", + (!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, ZPR64:$Zn, ZPR64:$Zd), 1>; +} + + +//===----------------------------------------------------------------------===// +// SVE Predicate Logical Operations Group +//===----------------------------------------------------------------------===// + +class sve_int_pred_log<bits<4> opc, string asm> +: I<(outs PPR8:$Pd), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$Pm), + asm, "\t$Pd, $Pg/z, $Pn, $Pm", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pg; + bits<4> Pm; + bits<4> Pn; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = opc{3-2}; + let Inst{21-20} = 0b00; + let Inst{19-16} = Pm; + let Inst{15-14} = 0b01; + let Inst{13-10} = Pg; + let Inst{9} = opc{1}; + let Inst{8-5} = Pn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + // SEL has no predication qualifier. + let AsmString = !if(!eq(opc, 0b0011), + !strconcat(asm, "\t$Pd, $Pg, $Pn, $Pm"), + !strconcat(asm, "\t$Pd, $Pg/z, $Pn, $Pm")); + + let Defs = !if(!eq (opc{2}, 1), [NZCV], []); +} + + +//===----------------------------------------------------------------------===// +// SVE Logical Mask Immediate Group +//===----------------------------------------------------------------------===// + +class sve_int_log_imm<bits<2> opc, string asm> +: I<(outs ZPR64:$Zdn), (ins ZPR64:$_Zdn, logical_imm64:$imms13), + asm, "\t$Zdn, $_Zdn, $imms13", + "", []>, Sched<[]> { + bits<5> Zdn; + bits<13> imms13; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = opc; + let Inst{21-18} = 0b0000; + let Inst{17-5} = imms13; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DecoderMethod = "DecodeSVELogicalImmInstruction"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_log_imm<bits<2> opc, string asm, string alias> { + def NAME : sve_int_log_imm<opc, asm>; + + def : InstAlias<asm # "\t$Zdn, $Zdn, $imm", + (!cast<Instruction>(NAME) ZPR8:$Zdn, sve_logical_imm8:$imm), 4>; + def : InstAlias<asm # "\t$Zdn, $Zdn, $imm", + (!cast<Instruction>(NAME) ZPR16:$Zdn, sve_logical_imm16:$imm), 3>; + def : InstAlias<asm # "\t$Zdn, $Zdn, $imm", + (!cast<Instruction>(NAME) ZPR32:$Zdn, sve_logical_imm32:$imm), 2>; + + def : InstAlias<alias # "\t$Zdn, $Zdn, $imm", + (!cast<Instruction>(NAME) ZPR8:$Zdn, sve_logical_imm8_not:$imm), 0>; + def : InstAlias<alias # "\t$Zdn, $Zdn, $imm", + (!cast<Instruction>(NAME) ZPR16:$Zdn, sve_logical_imm16_not:$imm), 0>; + def : InstAlias<alias # "\t$Zdn, $Zdn, $imm", + (!cast<Instruction>(NAME) ZPR32:$Zdn, sve_logical_imm32_not:$imm), 0>; + def : InstAlias<alias # "\t$Zdn, $Zdn, $imm", + (!cast<Instruction>(NAME) ZPR64:$Zdn, logical_imm64_not:$imm), 0>; +} + +class sve_int_dup_mask_imm<string asm> +: I<(outs ZPR64:$Zd), (ins logical_imm64:$imms), + asm, "\t$Zd, $imms", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<13> imms; + let Inst{31-18} = 0b00000101110000; + let Inst{17-5} = imms; + let Inst{4-0} = Zd; + + let isReMaterializable = 1; + let DecoderMethod = "DecodeSVELogicalImmInstruction"; +} + +multiclass sve_int_dup_mask_imm<string asm> { + def NAME : sve_int_dup_mask_imm<asm>; + + def : InstAlias<"dupm $Zd, $imm", + (!cast<Instruction>(NAME) ZPR8:$Zd, sve_logical_imm8:$imm), 4>; + def : InstAlias<"dupm $Zd, $imm", + (!cast<Instruction>(NAME) ZPR16:$Zd, sve_logical_imm16:$imm), 3>; + def : InstAlias<"dupm $Zd, $imm", + (!cast<Instruction>(NAME) ZPR32:$Zd, sve_logical_imm32:$imm), 2>; + + // All Zd.b forms have a CPY/DUP equivalent, hence no byte alias here. + def : InstAlias<"mov $Zd, $imm", + (!cast<Instruction>(NAME) ZPR16:$Zd, sve_preferred_logical_imm16:$imm), 7>; + def : InstAlias<"mov $Zd, $imm", + (!cast<Instruction>(NAME) ZPR32:$Zd, sve_preferred_logical_imm32:$imm), 6>; + def : InstAlias<"mov $Zd, $imm", + (!cast<Instruction>(NAME) ZPR64:$Zd, sve_preferred_logical_imm64:$imm), 5>; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Arithmetic - Unpredicated Group. +//===----------------------------------------------------------------------===// + +class sve_int_bin_cons_arit_0<bits<2> sz8_64, bits<3> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), + asm, "\t$Zd, $Zn, $Zm", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-13} = 0b000; + let Inst{12-10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_bin_cons_arit_0<bits<3> opc, string asm> { + def _B : sve_int_bin_cons_arit_0<0b00, opc, asm, ZPR8>; + def _H : sve_int_bin_cons_arit_0<0b01, opc, asm, ZPR16>; + def _S : sve_int_bin_cons_arit_0<0b10, opc, asm, ZPR32>; + def _D : sve_int_bin_cons_arit_0<0b11, opc, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Arithmetic - Predicated Group +//===----------------------------------------------------------------------===// + +class sve_fp_2op_i_p_zds<bits<2> sz, bits<3> opc, string asm, + ZPRRegOp zprty, + Operand imm_ty> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, imm_ty:$i1), + asm, "\t$Zdn, $Pg/m, $_Zdn, $i1", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zdn; + bit i1; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21-19} = 0b011; + let Inst{18-16} = opc; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-6} = 0b0000; + let Inst{5} = i1; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_fp_2op_i_p_zds<bits<3> opc, string asm, Operand imm_ty> { + def _H : sve_fp_2op_i_p_zds<0b01, opc, asm, ZPR16, imm_ty>; + def _S : sve_fp_2op_i_p_zds<0b10, opc, asm, ZPR32, imm_ty>; + def _D : sve_fp_2op_i_p_zds<0b11, opc, asm, ZPR64, imm_ty>; +} + +class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm), + asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zdn; + bits<5> Zm; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21-20} = 0b00; + let Inst{19-16} = opc; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_fp_2op_p_zds<bits<4> opc, string asm> { + def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>; + def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>; + def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>; +} + +class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm0_7:$imm3), + asm, "\t$Zdn, $_Zdn, $Zm, $imm3", + "", + []>, Sched<[]> { + bits<5> Zdn; + bits<5> Zm; + bits<3> imm3; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21-19} = 0b010; + let Inst{18-16} = imm3; + let Inst{15-10} = 0b100000; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_fp_ftmad<string asm> { + def _H : sve_fp_ftmad<0b01, asm, ZPR16>; + def _S : sve_fp_ftmad<0b10, asm, ZPR32>; + def _D : sve_fp_ftmad<0b11, asm, ZPR64>; +} + + +//===----------------------------------------------------------------------===// +// SVE Floating Point Arithmetic - Unpredicated Group +//===----------------------------------------------------------------------===// + +class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), + asm, "\t$Zd, $Zn, $Zm", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21} = 0b0; + let Inst{20-16} = Zm; + let Inst{15-13} = 0b000; + let Inst{12-10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_fp_3op_u_zd<bits<3> opc, string asm> { + def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>; + def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>; + def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Fused Multiply-Add Group +//===----------------------------------------------------------------------===// + +class sve_fp_3op_p_zds_a<bits<2> sz, bits<2> opc, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zda), (ins PPR3bAny:$Pg, zprty:$_Zda, zprty:$Zn, zprty:$Zm), + asm, "\t$Zda, $Pg/m, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zda; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15} = 0b0; + let Inst{14-13} = opc; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm> { + def _H : sve_fp_3op_p_zds_a<0b01, opc, asm, ZPR16>; + def _S : sve_fp_3op_p_zds_a<0b10, opc, asm, ZPR32>; + def _D : sve_fp_3op_p_zds_a<0b11, opc, asm, ZPR64>; +} + +class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm, zprty:$Za), + asm, "\t$Zdn, $Pg/m, $Zm, $Za", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Za; + bits<5> Zdn; + bits<5> Zm; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{20-16} = Za; + let Inst{15} = 0b1; + let Inst{14-13} = opc; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm> { + def _H : sve_fp_3op_p_zds_b<0b01, opc, asm, ZPR16>; + def _S : sve_fp_3op_p_zds_b<0b10, opc, asm, ZPR32>; + def _D : sve_fp_3op_p_zds_b<0b11, opc, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Multiply-Add - Indexed Group +//===----------------------------------------------------------------------===// + +class sve_fp_fma_by_indexed_elem<bits<2> sz, bit opc, string asm, + ZPRRegOp zprty1, + ZPRRegOp zprty2, Operand itype> +: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty1:$Zn, zprty2:$Zm, itype:$iop), + asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> { + bits<5> Zda; + bits<5> Zn; + let Inst{31-24} = 0b01100100; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{15-11} = 0; + let Inst{10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_fp_fma_by_indexed_elem<bit opc, string asm> { + def _H : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, VectorIndexH> { + bits<3> Zm; + bits<3> iop; + let Inst{22} = iop{2}; + let Inst{20-19} = iop{1-0}; + let Inst{18-16} = Zm; + } + def _S : sve_fp_fma_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR3b32, VectorIndexS> { + bits<3> Zm; + bits<2> iop; + let Inst{20-19} = iop; + let Inst{18-16} = Zm; + } + def _D : sve_fp_fma_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR4b64, VectorIndexD> { + bits<4> Zm; + bit iop; + let Inst{20} = iop; + let Inst{19-16} = Zm; + } +} + + +//===----------------------------------------------------------------------===// +// SVE Floating Point Multiply - Indexed Group +//===----------------------------------------------------------------------===// + +class sve_fp_fmul_by_indexed_elem<bits<2> sz, string asm, ZPRRegOp zprty, + ZPRRegOp zprty2, Operand itype> +: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty2:$Zm, itype:$iop), + asm, "\t$Zd, $Zn, $Zm$iop", "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b01100100; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{15-10} = 0b001000; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_fp_fmul_by_indexed_elem<string asm> { + def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, asm, ZPR16, ZPR3b16, VectorIndexH> { + bits<3> Zm; + bits<3> iop; + let Inst{22} = iop{2}; + let Inst{20-19} = iop{1-0}; + let Inst{18-16} = Zm; + } + def _S : sve_fp_fmul_by_indexed_elem<0b10, asm, ZPR32, ZPR3b32, VectorIndexS> { + bits<3> Zm; + bits<2> iop; + let Inst{20-19} = iop; + let Inst{18-16} = Zm; + } + def _D : sve_fp_fmul_by_indexed_elem<0b11, asm, ZPR64, ZPR4b64, VectorIndexD> { + bits<4> Zm; + bit iop; + let Inst{20} = iop; + let Inst{19-16} = Zm; + } +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Complex Multiply-Add Group +//===----------------------------------------------------------------------===// + +class sve_fp_fcmla<bits<2> sz, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zda), (ins PPR3bAny:$Pg, zprty:$_Zda, zprty:$Zn, zprty:$Zm, + complexrotateop:$imm), + asm, "\t$Zda, $Pg/m, $Zn, $Zm, $imm", + "", []>, Sched<[]> { + bits<5> Zda; + bits<3> Pg; + bits<5> Zn; + bits<5> Zm; + bits<2> imm; + let Inst{31-24} = 0b01100100; + let Inst{23-22} = sz; + let Inst{21} = 0; + let Inst{20-16} = Zm; + let Inst{15} = 0; + let Inst{14-13} = imm; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_fp_fcmla<string asm> { + def _H : sve_fp_fcmla<0b01, asm, ZPR16>; + def _S : sve_fp_fcmla<0b10, asm, ZPR32>; + def _D : sve_fp_fcmla<0b11, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Complex Multiply-Add - Indexed Group +//===----------------------------------------------------------------------===// + +class sve_fp_fcmla_by_indexed_elem<bits<2> sz, string asm, + ZPRRegOp zprty, + ZPRRegOp zprty2, Operand itype> +: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, zprty2:$Zm, itype:$iop, + complexrotateop:$imm), + asm, "\t$Zda, $Zn, $Zm$iop, $imm", + "", []>, Sched<[]> { + bits<5> Zda; + bits<5> Zn; + bits<2> imm; + let Inst{31-24} = 0b01100100; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{15-12} = 0b0001; + let Inst{11-10} = imm; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_fp_fcmla_by_indexed_elem<string asm> { + def _H : sve_fp_fcmla_by_indexed_elem<0b10, asm, ZPR16, ZPR3b16, VectorIndexS> { + bits<3> Zm; + bits<2> iop; + let Inst{20-19} = iop; + let Inst{18-16} = Zm; + } + def _S : sve_fp_fcmla_by_indexed_elem<0b11, asm, ZPR32, ZPR4b32, VectorIndexD> { + bits<4> Zm; + bits<1> iop; + let Inst{20} = iop; + let Inst{19-16} = Zm; + } +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Complex Addition Group +//===----------------------------------------------------------------------===// + +class sve_fp_fcadd<bits<2> sz, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm, + complexrotateopodd:$imm), + asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm, $imm", + "", + []>, Sched<[]> { + bits<5> Zdn; + bits<5> Zm; + bits<3> Pg; + bit imm; + let Inst{31-24} = 0b01100100; + let Inst{23-22} = sz; + let Inst{21-17} = 0; + let Inst{16} = imm; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_fp_fcadd<string asm> { + def _H : sve_fp_fcadd<0b01, asm, ZPR16>; + def _S : sve_fp_fcadd<0b10, asm, ZPR32>; + def _D : sve_fp_fcadd<0b11, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Stack Allocation Group +//===----------------------------------------------------------------------===// + +class sve_int_arith_vl<bit opc, string asm> +: I<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, simm6_32b:$imm6), + asm, "\t$Rd, $Rn, $imm6", + "", + []>, Sched<[]> { + bits<5> Rd; + bits<5> Rn; + bits<6> imm6; + let Inst{31-23} = 0b000001000; + let Inst{22} = opc; + let Inst{21} = 0b1; + let Inst{20-16} = Rn; + let Inst{15-11} = 0b01010; + let Inst{10-5} = imm6; + let Inst{4-0} = Rd; +} + +class sve_int_read_vl_a<bit op, bits<5> opc2, string asm> +: I<(outs GPR64:$Rd), (ins simm6_32b:$imm6), + asm, "\t$Rd, $imm6", + "", + []>, Sched<[]> { + bits<5> Rd; + bits<6> imm6; + let Inst{31-23} = 0b000001001; + let Inst{22} = op; + let Inst{21} = 0b1; + let Inst{20-16} = opc2{4-0}; + let Inst{15-11} = 0b01010; + let Inst{10-5} = imm6; + let Inst{4-0} = Rd; +} + +//===----------------------------------------------------------------------===// +// SVE Permute - In Lane Group +//===----------------------------------------------------------------------===// + +class sve_int_perm_bin_perm_zz<bits<3> opc, bits<2> sz8_64, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), + asm, "\t$Zd, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-13} = 0b011; + let Inst{12-10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm> { + def _B : sve_int_perm_bin_perm_zz<opc, 0b00, asm, ZPR8>; + def _H : sve_int_perm_bin_perm_zz<opc, 0b01, asm, ZPR16>; + def _S : sve_int_perm_bin_perm_zz<opc, 0b10, asm, ZPR32>; + def _D : sve_int_perm_bin_perm_zz<opc, 0b11, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Unary Operations Group +//===----------------------------------------------------------------------===// + +class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype, + RegisterOperand o_zprtype, ElementSizeEnum size> +: I<(outs o_zprtype:$Zd), (ins i_zprtype:$_Zd, PPR3bAny:$Pg, i_zprtype:$Zn), + asm, "\t$Zd, $Pg/m, $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = opc{6-5}; + let Inst{21} = 0b0; + let Inst{20-16} = opc{4-0}; + let Inst{15-13} = 0b101; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = size; +} + +multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm> { + def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>; + def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>; + def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>; +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Unary Operations - Unpredicated Group +//===----------------------------------------------------------------------===// + +class sve_fp_2op_u_zd<bits<2> sz, bits<3> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn), + asm, "\t$Zd, $Zn", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21-19} = 0b001; + let Inst{18-16} = opc; + let Inst{15-10} = 0b001100; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_fp_2op_u_zd<bits<3> opc, string asm> { + def _H : sve_fp_2op_u_zd<0b01, opc, asm, ZPR16>; + def _S : sve_fp_2op_u_zd<0b10, opc, asm, ZPR32>; + def _D : sve_fp_2op_u_zd<0b11, opc, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Arithmetic - Binary Predicated Group +//===----------------------------------------------------------------------===// + +class sve_int_bin_pred_arit_log<bits<2> sz8_64, bits<2> fmt, bits<3> opc, + string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm), + asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm", "", []>, Sched<[]> { + bits<3> Pg; + bits<5> Zdn; + bits<5> Zm; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b0; + let Inst{20-19} = fmt; + let Inst{18-16} = opc; + let Inst{15-13} = 0b000; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_bin_pred_log<bits<3> opc, string asm> { + def _B : sve_int_bin_pred_arit_log<0b00, 0b11, opc, asm, ZPR8>; + def _H : sve_int_bin_pred_arit_log<0b01, 0b11, opc, asm, ZPR16>; + def _S : sve_int_bin_pred_arit_log<0b10, 0b11, opc, asm, ZPR32>; + def _D : sve_int_bin_pred_arit_log<0b11, 0b11, opc, asm, ZPR64>; +} + +multiclass sve_int_bin_pred_arit_0<bits<3> opc, string asm> { + def _B : sve_int_bin_pred_arit_log<0b00, 0b00, opc, asm, ZPR8>; + def _H : sve_int_bin_pred_arit_log<0b01, 0b00, opc, asm, ZPR16>; + def _S : sve_int_bin_pred_arit_log<0b10, 0b00, opc, asm, ZPR32>; + def _D : sve_int_bin_pred_arit_log<0b11, 0b00, opc, asm, ZPR64>; +} + +multiclass sve_int_bin_pred_arit_1<bits<3> opc, string asm> { + def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>; + def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>; + def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>; + def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>; +} + +multiclass sve_int_bin_pred_arit_2<bits<3> opc, string asm> { + def _B : sve_int_bin_pred_arit_log<0b00, 0b10, opc, asm, ZPR8>; + def _H : sve_int_bin_pred_arit_log<0b01, 0b10, opc, asm, ZPR16>; + def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>; + def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>; +} + +// Special case for divides which are not defined for 8b/16b elements. +multiclass sve_int_bin_pred_arit_2_div<bits<3> opc, string asm> { + def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>; + def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Multiply-Add Group +//===----------------------------------------------------------------------===// + +class sve_int_mladdsub_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm, zprty:$Za), + asm, "\t$Zdn, $Pg/m, $Zm, $Za", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zdn; + bits<5> Za; + bits<5> Zm; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b0; + let Inst{20-16} = Zm; + let Inst{15-14} = 0b11; + let Inst{13} = opc; + let Inst{12-10} = Pg; + let Inst{9-5} = Za; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_mladdsub_vvv_pred<bits<1> opc, string asm> { + def _B : sve_int_mladdsub_vvv_pred<0b00, opc, asm, ZPR8>; + def _H : sve_int_mladdsub_vvv_pred<0b01, opc, asm, ZPR16>; + def _S : sve_int_mladdsub_vvv_pred<0b10, opc, asm, ZPR32>; + def _D : sve_int_mladdsub_vvv_pred<0b11, opc, asm, ZPR64>; +} + +class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zda), (ins PPR3bAny:$Pg, zprty:$_Zda, zprty:$Zn, zprty:$Zm), + asm, "\t$Zda, $Pg/m, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zda; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b0; + let Inst{20-16} = Zm; + let Inst{15-14} = 0b01; + let Inst{13} = opc; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm> { + def _B : sve_int_mlas_vvv_pred<0b00, opc, asm, ZPR8>; + def _H : sve_int_mlas_vvv_pred<0b01, opc, asm, ZPR16>; + def _S : sve_int_mlas_vvv_pred<0b10, opc, asm, ZPR32>; + def _D : sve_int_mlas_vvv_pred<0b11, opc, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Dot Product Group +//===----------------------------------------------------------------------===// + +class sve_intx_dot<bit sz, bit U, string asm, ZPRRegOp zprty1, + ZPRRegOp zprty2> +: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm), asm, + "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { + bits<5> Zda; + bits<5> Zn; + bits<5> Zm; + let Inst{31-23} = 0b010001001; + let Inst{22} = sz; + let Inst{21} = 0; + let Inst{20-16} = Zm; + let Inst{15-11} = 0; + let Inst{10} = U; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = zprty1.ElementSize; +} + +multiclass sve_intx_dot<bit opc, string asm> { + def _S : sve_intx_dot<0b0, opc, asm, ZPR32, ZPR8>; + def _D : sve_intx_dot<0b1, opc, asm, ZPR64, ZPR16>; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Dot Product Group - Indexed Group +//===----------------------------------------------------------------------===// + +class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2, + ZPRRegOp zprty3, Operand itype> +: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop), + asm, "\t$Zda, $Zn, $Zm$iop", + "", []>, Sched<[]> { + bits<5> Zda; + bits<5> Zn; + let Inst{31-23} = 0b010001001; + let Inst{22} = sz; + let Inst{21} = 0b1; + let Inst{15-11} = 0; + let Inst{10} = U; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm> { + def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> { + bits<2> iop; + bits<3> Zm; + let Inst{20-19} = iop; + let Inst{18-16} = Zm; + } + def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> { + bits<1> iop; + bits<4> Zm; + let Inst{20} = iop; + let Inst{19-16} = Zm; + } +} + +//===----------------------------------------------------------------------===// +// SVE Integer Arithmetic - Unary Predicated Group +//===----------------------------------------------------------------------===// + +class sve_int_un_pred_arit<bits<2> sz8_64, bits<4> opc, + string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Zd, $Pg/m, $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21-20} = 0b01; + let Inst{19} = opc{0}; + let Inst{18-16} = opc{3-1}; + let Inst{15-13} = 0b101; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm> { + def _B : sve_int_un_pred_arit<0b00, { opc, 0b0 }, asm, ZPR8>; + def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>; + def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>; + def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; +} + +multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm> { + def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>; + def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>; + def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; +} + +multiclass sve_int_un_pred_arit_0_w<bits<3> opc, string asm> { + def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>; + def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; +} + +multiclass sve_int_un_pred_arit_0_d<bits<3> opc, string asm> { + def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; +} + +multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm> { + def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>; + def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>; + def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>; + def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>; +} + +multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm> { + def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>; + def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>; + def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Wide Immediate - Unpredicated Group +//===----------------------------------------------------------------------===// +class sve_int_dup_imm<bits<2> sz8_64, string asm, + ZPRRegOp zprty, Operand immtype> +: I<(outs zprty:$Zd), (ins immtype:$imm), + asm, "\t$Zd, $imm", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<9> imm; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-14} = 0b11100011; + let Inst{13} = imm{8}; // sh + let Inst{12-5} = imm{7-0}; // imm8 + let Inst{4-0} = Zd; + + let isReMaterializable = 1; +} + +multiclass sve_int_dup_imm<string asm> { + def _B : sve_int_dup_imm<0b00, asm, ZPR8, cpy_imm8_opt_lsl_i8>; + def _H : sve_int_dup_imm<0b01, asm, ZPR16, cpy_imm8_opt_lsl_i16>; + def _S : sve_int_dup_imm<0b10, asm, ZPR32, cpy_imm8_opt_lsl_i32>; + def _D : sve_int_dup_imm<0b11, asm, ZPR64, cpy_imm8_opt_lsl_i64>; + + def : InstAlias<"mov $Zd, $imm", + (!cast<Instruction>(NAME # _B) ZPR8:$Zd, cpy_imm8_opt_lsl_i8:$imm), 1>; + def : InstAlias<"mov $Zd, $imm", + (!cast<Instruction>(NAME # _H) ZPR16:$Zd, cpy_imm8_opt_lsl_i16:$imm), 1>; + def : InstAlias<"mov $Zd, $imm", + (!cast<Instruction>(NAME # _S) ZPR32:$Zd, cpy_imm8_opt_lsl_i32:$imm), 1>; + def : InstAlias<"mov $Zd, $imm", + (!cast<Instruction>(NAME # _D) ZPR64:$Zd, cpy_imm8_opt_lsl_i64:$imm), 1>; + + def : InstAlias<"fmov $Zd, #0.0", + (!cast<Instruction>(NAME # _H) ZPR16:$Zd, 0, 0), 1>; + def : InstAlias<"fmov $Zd, #0.0", + (!cast<Instruction>(NAME # _S) ZPR32:$Zd, 0, 0), 1>; + def : InstAlias<"fmov $Zd, #0.0", + (!cast<Instruction>(NAME # _D) ZPR64:$Zd, 0, 0), 1>; +} + +class sve_int_dup_fpimm<bits<2> sz8_64, Operand fpimmtype, + string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins fpimmtype:$imm8), + asm, "\t$Zd, $imm8", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<8> imm8; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-14} = 0b11100111; + let Inst{13} = 0b0; + let Inst{12-5} = imm8; + let Inst{4-0} = Zd; + + let isReMaterializable = 1; +} + +multiclass sve_int_dup_fpimm<string asm> { + def _H : sve_int_dup_fpimm<0b01, fpimm16, asm, ZPR16>; + def _S : sve_int_dup_fpimm<0b10, fpimm32, asm, ZPR32>; + def _D : sve_int_dup_fpimm<0b11, fpimm64, asm, ZPR64>; + + def : InstAlias<"fmov $Zd, $imm8", + (!cast<Instruction>(NAME # _H) ZPR16:$Zd, fpimm16:$imm8), 1>; + def : InstAlias<"fmov $Zd, $imm8", + (!cast<Instruction>(NAME # _S) ZPR32:$Zd, fpimm32:$imm8), 1>; + def : InstAlias<"fmov $Zd, $imm8", + (!cast<Instruction>(NAME # _D) ZPR64:$Zd, fpimm64:$imm8), 1>; +} + +class sve_int_arith_imm0<bits<2> sz8_64, bits<3> opc, string asm, + ZPRRegOp zprty, Operand immtype> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, immtype:$imm), + asm, "\t$Zdn, $_Zdn, $imm", + "", + []>, Sched<[]> { + bits<5> Zdn; + bits<9> imm; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-19} = 0b100; + let Inst{18-16} = opc; + let Inst{15-14} = 0b11; + let Inst{13} = imm{8}; // sh + let Inst{12-5} = imm{7-0}; // imm8 + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_arith_imm0<bits<3> opc, string asm> { + def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8, addsub_imm8_opt_lsl_i8>; + def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>; + def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>; + def _D : sve_int_arith_imm0<0b11, opc, asm, ZPR64, addsub_imm8_opt_lsl_i64>; +} + +class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm, + ZPRRegOp zprty, Operand immtype> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, immtype:$imm), + asm, "\t$Zdn, $_Zdn, $imm", + "", + []>, Sched<[]> { + bits<5> Zdn; + bits<8> imm; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-16} = opc; + let Inst{15-13} = 0b110; + let Inst{12-5} = imm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_arith_imm1<bits<2> opc, string asm, Operand immtype> { + def _B : sve_int_arith_imm<0b00, { 0b1010, opc }, asm, ZPR8, immtype>; + def _H : sve_int_arith_imm<0b01, { 0b1010, opc }, asm, ZPR16, immtype>; + def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, immtype>; + def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, immtype>; +} + +multiclass sve_int_arith_imm2<string asm> { + def _B : sve_int_arith_imm<0b00, 0b110000, asm, ZPR8, simm8>; + def _H : sve_int_arith_imm<0b01, 0b110000, asm, ZPR16, simm8>; + def _S : sve_int_arith_imm<0b10, 0b110000, asm, ZPR32, simm8>; + def _D : sve_int_arith_imm<0b11, 0b110000, asm, ZPR64, simm8>; +} + +//===----------------------------------------------------------------------===// +// SVE Bitwise Logical - Unpredicated Group +//===----------------------------------------------------------------------===// + +class sve_int_bin_cons_log<bits<2> opc, string asm> +: I<(outs ZPR64:$Zd), (ins ZPR64:$Zn, ZPR64:$Zm), + asm, "\t$Zd, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = opc{1-0}; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-10} = 0b001100; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + + +//===----------------------------------------------------------------------===// +// SVE Integer Wide Immediate - Predicated Group +//===----------------------------------------------------------------------===// + +class sve_int_dup_fpimm_pred<bits<2> sz, Operand fpimmtype, + string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPRAny:$Pg, fpimmtype:$imm8), + asm, "\t$Zd, $Pg/m, $imm8", + "", + []>, Sched<[]> { + bits<4> Pg; + bits<5> Zd; + bits<8> imm8; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz; + let Inst{21-20} = 0b01; + let Inst{19-16} = Pg; + let Inst{15-13} = 0b110; + let Inst{12-5} = imm8; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_dup_fpimm_pred<string asm> { + def _H : sve_int_dup_fpimm_pred<0b01, fpimm16, asm, ZPR16>; + def _S : sve_int_dup_fpimm_pred<0b10, fpimm32, asm, ZPR32>; + def _D : sve_int_dup_fpimm_pred<0b11, fpimm64, asm, ZPR64>; + + def : InstAlias<"fmov $Zd, $Pg/m, $imm8", + (!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, fpimm16:$imm8), 1>; + def : InstAlias<"fmov $Zd, $Pg/m, $imm8", + (!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, fpimm32:$imm8), 1>; + def : InstAlias<"fmov $Zd, $Pg/m, $imm8", + (!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, fpimm64:$imm8), 1>; +} + +class sve_int_dup_imm_pred<bits<2> sz8_64, bit m, string asm, + ZPRRegOp zprty, string pred_qual, dag iops> +: I<(outs zprty:$Zd), iops, + asm, "\t$Zd, $Pg"#pred_qual#", $imm", + "", []>, Sched<[]> { + bits<5> Zd; + bits<4> Pg; + bits<9> imm; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-20} = 0b01; + let Inst{19-16} = Pg; + let Inst{15} = 0b0; + let Inst{14} = m; + let Inst{13} = imm{8}; // sh + let Inst{12-5} = imm{7-0}; // imm8 + let Inst{4-0} = Zd; + + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_dup_imm_pred_merge<string asm> { + let Constraints = "$Zd = $_Zd" in { + def _B : sve_int_dup_imm_pred<0b00, 1, asm, ZPR8, "/m", (ins ZPR8:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm)>; + def _H : sve_int_dup_imm_pred<0b01, 1, asm, ZPR16, "/m", (ins ZPR16:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm)>; + def _S : sve_int_dup_imm_pred<0b10, 1, asm, ZPR32, "/m", (ins ZPR32:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm)>; + def _D : sve_int_dup_imm_pred<0b11, 1, asm, ZPR64, "/m", (ins ZPR64:$_Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm)>; + } + + def : InstAlias<"mov $Zd, $Pg/m, $imm", + (!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $imm", + (!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $imm", + (!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $imm", + (!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm), 1>; + + def : InstAlias<"fmov $Zd, $Pg/m, #0.0", + (!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, 0, 0), 0>; + def : InstAlias<"fmov $Zd, $Pg/m, #0.0", + (!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, 0, 0), 0>; + def : InstAlias<"fmov $Zd, $Pg/m, #0.0", + (!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, 0, 0), 0>; +} + +multiclass sve_int_dup_imm_pred_zero<string asm> { + def _B : sve_int_dup_imm_pred<0b00, 0, asm, ZPR8, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm)>; + def _H : sve_int_dup_imm_pred<0b01, 0, asm, ZPR16, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm)>; + def _S : sve_int_dup_imm_pred<0b10, 0, asm, ZPR32, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm)>; + def _D : sve_int_dup_imm_pred<0b11, 0, asm, ZPR64, "/z", (ins PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm)>; + + def : InstAlias<"mov $Zd, $Pg/z, $imm", + (!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i8:$imm), 1>; + def : InstAlias<"mov $Zd, $Pg/z, $imm", + (!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i16:$imm), 1>; + def : InstAlias<"mov $Zd, $Pg/z, $imm", + (!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i32:$imm), 1>; + def : InstAlias<"mov $Zd, $Pg/z, $imm", + (!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, cpy_imm8_opt_lsl_i64:$imm), 1>; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Compare - Vectors Group +//===----------------------------------------------------------------------===// + +class sve_int_cmp<bit cmp_1, bits<2> sz8_64, bits<3> opc, string asm, + PPRRegOp pprty, ZPRRegOp zprty1, ZPRRegOp zprty2> +: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty1:$Zn, zprty2:$Zm), + asm, "\t$Pd, $Pg/z, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<3> Pg; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00100100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b0; + let Inst{20-16} = Zm; + let Inst{15} = opc{2}; + let Inst{14} = cmp_1; + let Inst{13} = opc{1}; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Defs = [NZCV]; +} + +multiclass sve_int_cmp_0<bits<3> opc, string asm> { + def _B : sve_int_cmp<0b0, 0b00, opc, asm, PPR8, ZPR8, ZPR8>; + def _H : sve_int_cmp<0b0, 0b01, opc, asm, PPR16, ZPR16, ZPR16>; + def _S : sve_int_cmp<0b0, 0b10, opc, asm, PPR32, ZPR32, ZPR32>; + def _D : sve_int_cmp<0b0, 0b11, opc, asm, PPR64, ZPR64, ZPR64>; +} + +multiclass sve_int_cmp_0_wide<bits<3> opc, string asm> { + def _B : sve_int_cmp<0b0, 0b00, opc, asm, PPR8, ZPR8, ZPR64>; + def _H : sve_int_cmp<0b0, 0b01, opc, asm, PPR16, ZPR16, ZPR64>; + def _S : sve_int_cmp<0b0, 0b10, opc, asm, PPR32, ZPR32, ZPR64>; +} + +multiclass sve_int_cmp_1_wide<bits<3> opc, string asm> { + def _B : sve_int_cmp<0b1, 0b00, opc, asm, PPR8, ZPR8, ZPR64>; + def _H : sve_int_cmp<0b1, 0b01, opc, asm, PPR16, ZPR16, ZPR64>; + def _S : sve_int_cmp<0b1, 0b10, opc, asm, PPR32, ZPR32, ZPR64>; +} + + +//===----------------------------------------------------------------------===// +// SVE Integer Compare - Signed Immediate Group +//===----------------------------------------------------------------------===// + +class sve_int_scmp_vi<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty, + ZPRRegOp zprty, + Operand immtype> +: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn, immtype:$imm5), + asm, "\t$Pd, $Pg/z, $Zn, $imm5", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<3> Pg; + bits<5> Zn; + bits<5> imm5; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b0; + let Inst{20-16} = imm5; + let Inst{15} = opc{2}; + let Inst{14} = 0b0; + let Inst{13} = opc{1}; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Defs = [NZCV]; +} + +multiclass sve_int_scmp_vi<bits<3> opc, string asm> { + def _B : sve_int_scmp_vi<0b00, opc, asm, PPR8, ZPR8, simm5_32b>; + def _H : sve_int_scmp_vi<0b01, opc, asm, PPR16, ZPR16, simm5_32b>; + def _S : sve_int_scmp_vi<0b10, opc, asm, PPR32, ZPR32, simm5_32b>; + def _D : sve_int_scmp_vi<0b11, opc, asm, PPR64, ZPR64, simm5_64b>; +} + + +//===----------------------------------------------------------------------===// +// SVE Integer Compare - Unsigned Immediate Group +//===----------------------------------------------------------------------===// + +class sve_int_ucmp_vi<bits<2> sz8_64, bits<2> opc, string asm, PPRRegOp pprty, + ZPRRegOp zprty, Operand immtype> +: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn, immtype:$imm7), + asm, "\t$Pd, $Pg/z, $Zn, $imm7", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<3> Pg; + bits<5> Zn; + bits<7> imm7; + let Inst{31-24} = 0b00100100; + let Inst{23-22} = sz8_64; + let Inst{21} = 1; + let Inst{20-14} = imm7; + let Inst{13} = opc{1}; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Defs = [NZCV]; +} + +multiclass sve_int_ucmp_vi<bits<2> opc, string asm> { + def _B : sve_int_ucmp_vi<0b00, opc, asm, PPR8, ZPR8, imm0_127>; + def _H : sve_int_ucmp_vi<0b01, opc, asm, PPR16, ZPR16, imm0_127>; + def _S : sve_int_ucmp_vi<0b10, opc, asm, PPR32, ZPR32, imm0_127>; + def _D : sve_int_ucmp_vi<0b11, opc, asm, PPR64, ZPR64, imm0_127>; +} + + +//===----------------------------------------------------------------------===// +// SVE Integer Compare - Scalars Group +//===----------------------------------------------------------------------===// + +class sve_int_cterm<bit sz, bit opc, string asm, RegisterClass rt> +: I<(outs), (ins rt:$Rn, rt:$Rm), + asm, "\t$Rn, $Rm", + "", + []>, Sched<[]> { + bits<5> Rm; + bits<5> Rn; + let Inst{31-23} = 0b001001011; + let Inst{22} = sz; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-10} = 0b001000; + let Inst{9-5} = Rn; + let Inst{4} = opc; + let Inst{3-0} = 0b0000; + + let Defs = [NZCV]; +} + +class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm, + RegisterClass gprty, PPRRegOp pprty> +: I<(outs pprty:$Pd), (ins gprty:$Rn, gprty:$Rm), + asm, "\t$Pd, $Rn, $Rm", + "", []>, Sched<[]> { + bits<4> Pd; + bits<5> Rm; + bits<5> Rn; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b000; + let Inst{12-10} = opc{3-1}; + let Inst{9-5} = Rn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Defs = [NZCV]; +} + +multiclass sve_int_while4_rr<bits<3> opc, string asm> { + def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>; + def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>; + def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>; + def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64>; +} + +multiclass sve_int_while8_rr<bits<3> opc, string asm> { + def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>; + def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>; + def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>; + def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>; +} + + +//===----------------------------------------------------------------------===// +// SVE Floating Point Fast Reduction Group +//===----------------------------------------------------------------------===// + +class sve_fp_fast_red<bits<2> sz, bits<3> opc, string asm, + ZPRRegOp zprty, RegisterClass dstRegClass> +: I<(outs dstRegClass:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Vd, $Pg, $Zn", + "", + []>, Sched<[]> { + bits<5> Zn; + bits<5> Vd; + bits<3> Pg; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21-19} = 0b000; + let Inst{18-16} = opc; + let Inst{15-13} = 0b001; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Vd; +} + +multiclass sve_fp_fast_red<bits<3> opc, string asm> { + def _H : sve_fp_fast_red<0b01, opc, asm, ZPR16, FPR16>; + def _S : sve_fp_fast_red<0b10, opc, asm, ZPR32, FPR32>; + def _D : sve_fp_fast_red<0b11, opc, asm, ZPR64, FPR64>; +} + + +//===----------------------------------------------------------------------===// +// SVE Floating Point Accumulating Reduction Group +//===----------------------------------------------------------------------===// + +class sve_fp_2op_p_vd<bits<2> sz, bits<3> opc, string asm, + ZPRRegOp zprty, RegisterClass dstRegClass> +: I<(outs dstRegClass:$Vdn), (ins PPR3bAny:$Pg, dstRegClass:$_Vdn, zprty:$Zm), + asm, "\t$Vdn, $Pg, $_Vdn, $Zm", + "", + []>, + Sched<[]> { + bits<3> Pg; + bits<5> Vdn; + bits<5> Zm; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21-19} = 0b011; + let Inst{18-16} = opc; + let Inst{15-13} = 0b001; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Vdn; + + let Constraints = "$Vdn = $_Vdn"; +} + +multiclass sve_fp_2op_p_vd<bits<3> opc, string asm> { + def _H : sve_fp_2op_p_vd<0b01, opc, asm, ZPR16, FPR16>; + def _S : sve_fp_2op_p_vd<0b10, opc, asm, ZPR32, FPR32>; + def _D : sve_fp_2op_p_vd<0b11, opc, asm, ZPR64, FPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Floating Point Compare - Vectors Group +//===----------------------------------------------------------------------===// + +class sve_fp_3op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty, + ZPRRegOp zprty> +: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn, zprty:$Zm), + asm, "\t$Pd, $Pg/z, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<3> Pg; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21} = 0b0; + let Inst{20-16} = Zm; + let Inst{15} = opc{2}; + let Inst{14} = 0b1; + let Inst{13} = opc{1}; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; +} + +multiclass sve_fp_3op_p_pd<bits<3> opc, string asm> { + def _H : sve_fp_3op_p_pd<0b01, opc, asm, PPR16, ZPR16>; + def _S : sve_fp_3op_p_pd<0b10, opc, asm, PPR32, ZPR32>; + def _D : sve_fp_3op_p_pd<0b11, opc, asm, PPR64, ZPR64>; +} + + +//===----------------------------------------------------------------------===// +// SVE Floating Point Compare - with Zero Group +//===----------------------------------------------------------------------===// + +class sve_fp_2op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty, + ZPRRegOp zprty> +: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Pd, $Pg/z, $Zn, #0.0", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<3> Pg; + bits<5> Zn; + let Inst{31-24} = 0b01100101; + let Inst{23-22} = sz; + let Inst{21-18} = 0b0100; + let Inst{17-16} = opc{2-1}; + let Inst{15-13} = 0b001; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; +} + +multiclass sve_fp_2op_p_pd<bits<3> opc, string asm> { + def _H : sve_fp_2op_p_pd<0b01, opc, asm, PPR16, ZPR16>; + def _S : sve_fp_2op_p_pd<0b10, opc, asm, PPR32, ZPR32>; + def _D : sve_fp_2op_p_pd<0b11, opc, asm, PPR64, ZPR64>; +} + + +//===----------------------------------------------------------------------===// +//SVE Index Generation Group +//===----------------------------------------------------------------------===// + +class sve_int_index_ii<bits<2> sz8_64, string asm, ZPRRegOp zprty, + Operand imm_ty> +: I<(outs zprty:$Zd), (ins imm_ty:$imm5, imm_ty:$imm5b), + asm, "\t$Zd, $imm5, $imm5b", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> imm5; + bits<5> imm5b; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = imm5b; + let Inst{15-10} = 0b010000; + let Inst{9-5} = imm5; + let Inst{4-0} = Zd; +} + +multiclass sve_int_index_ii<string asm> { + def _B : sve_int_index_ii<0b00, asm, ZPR8, simm5_32b>; + def _H : sve_int_index_ii<0b01, asm, ZPR16, simm5_32b>; + def _S : sve_int_index_ii<0b10, asm, ZPR32, simm5_32b>; + def _D : sve_int_index_ii<0b11, asm, ZPR64, simm5_64b>; +} + +class sve_int_index_ir<bits<2> sz8_64, string asm, ZPRRegOp zprty, + RegisterClass srcRegType, Operand imm_ty> +: I<(outs zprty:$Zd), (ins imm_ty:$imm5, srcRegType:$Rm), + asm, "\t$Zd, $imm5, $Rm", + "", []>, Sched<[]> { + bits<5> Rm; + bits<5> Zd; + bits<5> imm5; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-10} = 0b010010; + let Inst{9-5} = imm5; + let Inst{4-0} = Zd; +} + +multiclass sve_int_index_ir<string asm> { + def _B : sve_int_index_ir<0b00, asm, ZPR8, GPR32, simm5_32b>; + def _H : sve_int_index_ir<0b01, asm, ZPR16, GPR32, simm5_32b>; + def _S : sve_int_index_ir<0b10, asm, ZPR32, GPR32, simm5_32b>; + def _D : sve_int_index_ir<0b11, asm, ZPR64, GPR64, simm5_64b>; +} + +class sve_int_index_ri<bits<2> sz8_64, string asm, ZPRRegOp zprty, + RegisterClass srcRegType, Operand imm_ty> +: I<(outs zprty:$Zd), (ins srcRegType:$Rn, imm_ty:$imm5), + asm, "\t$Zd, $Rn, $imm5", + "", []>, Sched<[]> { + bits<5> Rn; + bits<5> Zd; + bits<5> imm5; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = imm5; + let Inst{15-10} = 0b010001; + let Inst{9-5} = Rn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_index_ri<string asm> { + def _B : sve_int_index_ri<0b00, asm, ZPR8, GPR32, simm5_32b>; + def _H : sve_int_index_ri<0b01, asm, ZPR16, GPR32, simm5_32b>; + def _S : sve_int_index_ri<0b10, asm, ZPR32, GPR32, simm5_32b>; + def _D : sve_int_index_ri<0b11, asm, ZPR64, GPR64, simm5_64b>; +} + +class sve_int_index_rr<bits<2> sz8_64, string asm, ZPRRegOp zprty, + RegisterClass srcRegType> +: I<(outs zprty:$Zd), (ins srcRegType:$Rn, srcRegType:$Rm), + asm, "\t$Zd, $Rn, $Rm", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Rm; + bits<5> Rn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Rm; + let Inst{15-10} = 0b010011; + let Inst{9-5} = Rn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_index_rr<string asm> { + def _B : sve_int_index_rr<0b00, asm, ZPR8, GPR32>; + def _H : sve_int_index_rr<0b01, asm, ZPR16, GPR32>; + def _S : sve_int_index_rr<0b10, asm, ZPR32, GPR32>; + def _D : sve_int_index_rr<0b11, asm, ZPR64, GPR64>; +} +// +//===----------------------------------------------------------------------===// +// SVE Bitwise Shift - Predicated Group +//===----------------------------------------------------------------------===// +class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm, + ZPRRegOp zprty, Operand immtype, + ElementSizeEnum size> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm), + asm, "\t$Zdn, $Pg/m, $_Zdn, $imm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zdn; + bits<6> imm; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = tsz8_64{3-2}; + let Inst{21-19} = 0b000; + let Inst{18-16} = opc; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-8} = tsz8_64{1-0}; + let Inst{7-5} = imm{2-0}; // imm3 + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = size; +} + +multiclass sve_int_bin_pred_shift_imm_left<bits<3> opc, string asm> { + def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8, + ElementSizeB>; + def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16, + ElementSizeH> { + let Inst{8} = imm{3}; + } + def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32, + ElementSizeS> { + let Inst{9-8} = imm{4-3}; + } + def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64, + ElementSizeD> { + let Inst{22} = imm{5}; + let Inst{9-8} = imm{4-3}; + } +} + +multiclass sve_int_bin_pred_shift_imm_right<bits<3> opc, string asm> { + def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8, + ElementSizeB>; + def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16, + ElementSizeH> { + let Inst{8} = imm{3}; + } + def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32, + ElementSizeS> { + let Inst{9-8} = imm{4-3}; + } + def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64, + ElementSizeD> { + let Inst{22} = imm{5}; + let Inst{9-8} = imm{4-3}; + } +} + +class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc, + string asm, ZPRRegOp zprty, ZPRRegOp zprty2> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty2:$Zm), + asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zdn; + bits<5> Zm; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21-20} = 0b01; + let Inst{19} = wide; + let Inst{18-16} = opc; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_bin_pred_shift<bits<3> opc, string asm> { + def _B : sve_int_bin_pred_shift<0b00, 0b0, opc, asm, ZPR8, ZPR8>; + def _H : sve_int_bin_pred_shift<0b01, 0b0, opc, asm, ZPR16, ZPR16>; + def _S : sve_int_bin_pred_shift<0b10, 0b0, opc, asm, ZPR32, ZPR32>; + def _D : sve_int_bin_pred_shift<0b11, 0b0, opc, asm, ZPR64, ZPR64>; +} + +multiclass sve_int_bin_pred_shift_wide<bits<3> opc, string asm> { + def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>; + def _H : sve_int_bin_pred_shift<0b01, 0b1, opc, asm, ZPR16, ZPR64>; + def _S : sve_int_bin_pred_shift<0b10, 0b1, opc, asm, ZPR32, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Shift - Unpredicated Group +//===----------------------------------------------------------------------===// + +class sve_int_bin_cons_shift_wide<bits<2> sz8_64, bits<2> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn, ZPR64:$Zm), + asm, "\t$Zd, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-12} = 0b1000; + let Inst{11-10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_bin_cons_shift_wide<bits<2> opc, string asm> { + def _B : sve_int_bin_cons_shift_wide<0b00, opc, asm, ZPR8>; + def _H : sve_int_bin_cons_shift_wide<0b01, opc, asm, ZPR16>; + def _S : sve_int_bin_cons_shift_wide<0b10, opc, asm, ZPR32>; +} + +class sve_int_bin_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm, + ZPRRegOp zprty, Operand immtype> +: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm), + asm, "\t$Zd, $Zn, $imm", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<6> imm; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = tsz8_64{3-2}; + let Inst{21} = 0b1; + let Inst{20-19} = tsz8_64{1-0}; + let Inst{18-16} = imm{2-0}; // imm3 + let Inst{15-12} = 0b1001; + let Inst{11-10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_bin_cons_shift_imm_left<bits<2> opc, string asm> { + def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; + def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { + let Inst{19} = imm{3}; + } + def _S : sve_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { + let Inst{20-19} = imm{4-3}; + } + def _D : sve_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { + let Inst{22} = imm{5}; + let Inst{20-19} = imm{4-3}; + } +} + +multiclass sve_int_bin_cons_shift_imm_right<bits<2> opc, string asm> { + def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; + def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { + let Inst{19} = imm{3}; + } + def _S : sve_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { + let Inst{20-19} = imm{4-3}; + } + def _D : sve_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { + let Inst{22} = imm{5}; + let Inst{20-19} = imm{4-3}; + } +} +//===----------------------------------------------------------------------===// +// SVE Memory - Store Group +//===----------------------------------------------------------------------===// + +class sve_mem_cst_si<bits<2> msz, bits<2> esz, string asm, + RegisterOperand VecList> +: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), + asm, "\t$Zt, $Pg, [$Rn, $imm4, mul vl]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zt; + bits<4> imm4; + let Inst{31-25} = 0b1110010; + let Inst{24-23} = msz; + let Inst{22-21} = esz; + let Inst{20} = 0; + let Inst{19-16} = imm4; + let Inst{15-13} = 0b111; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_cst_si<bits<2> msz, bits<2> esz, string asm, + RegisterOperand listty, ZPRRegOp zprty> +{ + def NAME : sve_mem_cst_si<msz, esz, asm, listty>; + + def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $imm4, mul vl]", + (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>; + def : InstAlias<asm # "\t$Zt, $Pg, [$Rn]", + (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; + def : InstAlias<asm # "\t$Zt, $Pg, [$Rn]", + (!cast<Instruction>(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_est_si<bits<2> sz, bits<2> nregs, RegisterOperand VecList, + string asm, Operand immtype> +: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, immtype:$imm4), + asm, "\t$Zt, $Pg, [$Rn, $imm4, mul vl]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zt; + bits<4> imm4; + let Inst{31-25} = 0b1110010; + let Inst{24-23} = sz; + let Inst{22-21} = nregs; + let Inst{20} = 1; + let Inst{19-16} = imm4; + let Inst{15-13} = 0b111; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_est_si<bits<2> sz, bits<2> nregs, RegisterOperand VecList, + string asm, Operand immtype> { + def NAME : sve_mem_est_si<sz, nregs, VecList, asm, immtype>; + + def : InstAlias<asm # "\t$Zt, $Pg, [$Rn]", + (!cast<Instruction>(NAME) VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_est_ss<bits<2> sz, bits<2> nregs, RegisterOperand VecList, + string asm, RegisterOperand gprty> +: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), + asm, "\t$Zt, $Pg, [$Rn, $Rm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rm; + bits<5> Rn; + bits<5> Zt; + let Inst{31-25} = 0b1110010; + let Inst{24-23} = sz; + let Inst{22-21} = nregs; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b011; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +class sve_mem_cst_ss_base<bits<4> dtype, string asm, + RegisterOperand listty, RegisterOperand gprty> +: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), + asm, "\t$Zt, $Pg, [$Rn, $Rm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rm; + bits<5> Rn; + bits<5> Zt; + let Inst{31-25} = 0b1110010; + let Inst{24-21} = dtype; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b010; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_cst_ss<bits<4> dtype, string asm, + RegisterOperand listty, ZPRRegOp zprty, + RegisterOperand gprty> { + def NAME : sve_mem_cst_ss_base<dtype, asm, listty, gprty>; + + def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Rm]", + (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; +} + +class sve_mem_cstnt_si<bits<2> msz, string asm, RegisterOperand VecList> +: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), + asm, "\t$Zt, $Pg, [$Rn, $imm4, mul vl]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zt; + bits<4> imm4; + let Inst{31-25} = 0b1110010; + let Inst{24-23} = msz; + let Inst{22-20} = 0b001; + let Inst{19-16} = imm4; + let Inst{15-13} = 0b111; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_cstnt_si<bits<2> msz, string asm, RegisterOperand listty, + ZPRRegOp zprty> { + def NAME : sve_mem_cstnt_si<msz, asm, listty>; + + def : InstAlias<asm # "\t$Zt, $Pg, [$Rn]", + (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; + def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $imm4, mul vl]", + (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>; + def : InstAlias<asm # "\t$Zt, $Pg, [$Rn]", + (!cast<Instruction>(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_cstnt_ss_base<bits<2> msz, string asm, RegisterOperand listty, + RegisterOperand gprty> +: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), + asm, "\t$Zt, $Pg, [$Rn, $Rm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rm; + bits<5> Rn; + bits<5> Zt; + let Inst{31-25} = 0b1110010; + let Inst{24-23} = msz; + let Inst{22-21} = 0b00; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b011; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_cstnt_ss<bits<2> msz, string asm, RegisterOperand listty, + ZPRRegOp zprty, RegisterOperand gprty> { + def NAME : sve_mem_cstnt_ss_base<msz, asm, listty, gprty>; + + def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Rm]", + (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; +} + +class sve_mem_sst_sv<bits<3> opc, bit xs, bit scaled, string asm, + RegisterOperand VecList, RegisterOperand zprext> +: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), + asm, "\t$Zt, $Pg, [$Rn, $Zm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zm; + bits<5> Zt; + let Inst{31-25} = 0b1110010; + let Inst{24-22} = opc; + let Inst{21} = scaled; + let Inst{20-16} = Zm; + let Inst{15} = 0b1; + let Inst{14} = xs; + let Inst{13} = 0; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_sst_sv_32_scaled<bits<3> opc, string asm, + RegisterOperand listty, + ZPRRegOp zprty, + RegisterOperand sxtw_opnd, + RegisterOperand uxtw_opnd > { + def _UXTW_SCALED : sve_mem_sst_sv<opc, 0, 1, asm, listty, uxtw_opnd>; + def _SXTW_SCALED : sve_mem_sst_sv<opc, 1, 1, asm, listty, sxtw_opnd>; + + def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]", + (!cast<Instruction>(NAME # _UXTW_SCALED) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; + def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]", + (!cast<Instruction>(NAME # _SXTW_SCALED) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; +} + +multiclass sve_mem_sst_sv_32_unscaled<bits<3> opc, string asm, + RegisterOperand listty, + ZPRRegOp zprty, + RegisterOperand sxtw_opnd, + RegisterOperand uxtw_opnd> { + def _UXTW : sve_mem_sst_sv<opc, 0, 0, asm, listty, uxtw_opnd>; + def _SXTW : sve_mem_sst_sv<opc, 1, 0, asm, listty, sxtw_opnd>; + + def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]", + (!cast<Instruction>(NAME # _UXTW) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; + def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]", + (!cast<Instruction>(NAME # _SXTW) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; +} + +class sve_mem_sst_sv2<bits<2> msz, bit scaled, string asm, + RegisterOperand zprext> +: I<(outs), (ins Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), + asm, "\t$Zt, $Pg, [$Rn, $Zm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zm; + bits<5> Zt; + let Inst{31-25} = 0b1110010; + let Inst{24-23} = msz; + let Inst{22} = 0b0; + let Inst{21} = scaled; + let Inst{20-16} = Zm; + let Inst{15-13} = 0b101; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_sst_sv_64_scaled<bits<2> msz, string asm, + RegisterOperand zprext> { + def "" : sve_mem_sst_sv2<msz, 1, asm, zprext>; + + def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]", + (!cast<Instruction>(NAME) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>; + +} + +multiclass sve_mem_sst_sv_64_unscaled<bits<2> msz, string asm> { + def "" : sve_mem_sst_sv2<msz, 0, asm, ZPR64ExtLSL8>; + + def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]", + (!cast<Instruction>(NAME) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>; +} + +class sve_mem_sst_vi<bits<3> opc, string asm, ZPRRegOp zprty, + RegisterOperand VecList, Operand imm_ty> +: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, zprty:$Zn, imm_ty:$imm5), + asm, "\t$Zt, $Pg, [$Zn, $imm5]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> imm5; + bits<5> Zn; + bits<5> Zt; + let Inst{31-25} = 0b1110010; + let Inst{24-23} = opc{2-1}; + let Inst{22} = 0b1; + let Inst{21} = opc{0}; + let Inst{20-16} = imm5; + let Inst{15-13} = 0b101; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_sst_vi_ptrs<bits<3> opc, string asm, RegisterOperand listty, + ZPRRegOp zprty, Operand imm_ty> { + def _IMM : sve_mem_sst_vi<opc, asm, zprty, listty, imm_ty>; + + def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]", + (!cast<Instruction>(NAME # _IMM) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, 0), 0>; + def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $imm5]", + (!cast<Instruction>(NAME # _IMM) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, imm_ty:$imm5), 0>; + def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]", + (!cast<Instruction>(NAME # _IMM) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, 0), 1>; +} + +class sve_mem_z_spill<string asm> +: I<(outs), (ins ZPRAny:$Zt, GPR64sp:$Rn, simm9:$imm9), + asm, "\t$Zt, [$Rn, $imm9, mul vl]", + "", + []>, Sched<[]> { + bits<5> Rn; + bits<5> Zt; + bits<9> imm9; + let Inst{31-22} = 0b1110010110; + let Inst{21-16} = imm9{8-3}; + let Inst{15-13} = 0b010; + let Inst{12-10} = imm9{2-0}; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayStore = 1; +} + +multiclass sve_mem_z_spill<string asm> { + def NAME : sve_mem_z_spill<asm>; + + def : InstAlias<asm # "\t$Zt, [$Rn]", + (!cast<Instruction>(NAME) ZPRAny:$Zt, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_p_spill<string asm> +: I<(outs), (ins PPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), + asm, "\t$Pt, [$Rn, $imm9, mul vl]", + "", + []>, Sched<[]> { + bits<4> Pt; + bits<5> Rn; + bits<9> imm9; + let Inst{31-22} = 0b1110010110; + let Inst{21-16} = imm9{8-3}; + let Inst{15-13} = 0b000; + let Inst{12-10} = imm9{2-0}; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = Pt; + + let mayStore = 1; +} + +multiclass sve_mem_p_spill<string asm> { + def NAME : sve_mem_p_spill<asm>; + + def : InstAlias<asm # "\t$Pt, [$Rn]", + (!cast<Instruction>(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>; +} + +//===----------------------------------------------------------------------===// +// SVE Permute - Predicates Group +//===----------------------------------------------------------------------===// + +class sve_int_perm_bin_perm_pp<bits<3> opc, bits<2> sz8_64, string asm, + PPRRegOp pprty> +: I<(outs pprty:$Pd), (ins pprty:$Pn, pprty:$Pm), + asm, "\t$Pd, $Pn, $Pm", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pm; + bits<4> Pn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-20} = 0b10; + let Inst{19-16} = Pm; + let Inst{15-13} = 0b010; + let Inst{12-10} = opc; + let Inst{9} = 0b0; + let Inst{8-5} = Pn; + let Inst{4} = 0b0; + let Inst{3-0} = Pd; +} + +multiclass sve_int_perm_bin_perm_pp<bits<3> opc, string asm> { + def _B : sve_int_perm_bin_perm_pp<opc, 0b00, asm, PPR8>; + def _H : sve_int_perm_bin_perm_pp<opc, 0b01, asm, PPR16>; + def _S : sve_int_perm_bin_perm_pp<opc, 0b10, asm, PPR32>; + def _D : sve_int_perm_bin_perm_pp<opc, 0b11, asm, PPR64>; +} + +class sve_int_perm_punpk<bit opc, string asm> +: I<(outs PPR16:$Pd), (ins PPR8:$Pn), + asm, "\t$Pd, $Pn", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pn; + let Inst{31-17} = 0b000001010011000; + let Inst{16} = opc; + let Inst{15-9} = 0b0100000; + let Inst{8-5} = Pn; + let Inst{4} = 0b0; + let Inst{3-0} = Pd; +} + +class sve_int_rdffr_pred<bit s, string asm> +: I<(outs PPR8:$Pd), (ins PPRAny:$Pg), + asm, "\t$Pd, $Pg/z", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pg; + let Inst{31-23} = 0b001001010; + let Inst{22} = s; + let Inst{21-9} = 0b0110001111000; + let Inst{8-5} = Pg; + let Inst{4} = 0; + let Inst{3-0} = Pd; + + let Defs = !if(!eq (s, 1), [NZCV], []); + let Uses = [FFR]; +} + +class sve_int_rdffr_unpred<string asm> : I< + (outs PPR8:$Pd), (ins), + asm, "\t$Pd", + "", + []>, Sched<[]> { + bits<4> Pd; + let Inst{31-4} = 0b0010010100011001111100000000; + let Inst{3-0} = Pd; + + let Uses = [FFR]; +} + +class sve_int_wrffr<string asm> +: I<(outs), (ins PPR8:$Pn), + asm, "\t$Pn", + "", + []>, Sched<[]> { + bits<4> Pn; + let Inst{31-9} = 0b00100101001010001001000; + let Inst{8-5} = Pn; + let Inst{4-0} = 0b00000; + + let hasSideEffects = 1; + let Defs = [FFR]; +} + +class sve_int_setffr<string asm> +: I<(outs), (ins), + asm, "", + "", + []>, Sched<[]> { + let Inst{31-0} = 0b00100101001011001001000000000000; + + let hasSideEffects = 1; + let Defs = [FFR]; +} + +//===----------------------------------------------------------------------===// +// SVE Permute Vector - Predicated Group +//===----------------------------------------------------------------------===// + +class sve_int_perm_clast_rz<bits<2> sz8_64, bit ab, string asm, + ZPRRegOp zprty, RegisterClass rt> +: I<(outs rt:$Rdn), (ins PPR3bAny:$Pg, rt:$_Rdn, zprty:$Zm), + asm, "\t$Rdn, $Pg, $_Rdn, $Zm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rdn; + bits<5> Zm; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-17} = 0b11000; + let Inst{16} = ab; + let Inst{15-13} = 0b101; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Rdn; + + let Constraints = "$Rdn = $_Rdn"; +} + +multiclass sve_int_perm_clast_rz<bit ab, string asm> { + def _B : sve_int_perm_clast_rz<0b00, ab, asm, ZPR8, GPR32>; + def _H : sve_int_perm_clast_rz<0b01, ab, asm, ZPR16, GPR32>; + def _S : sve_int_perm_clast_rz<0b10, ab, asm, ZPR32, GPR32>; + def _D : sve_int_perm_clast_rz<0b11, ab, asm, ZPR64, GPR64>; +} + +class sve_int_perm_clast_vz<bits<2> sz8_64, bit ab, string asm, + ZPRRegOp zprty, RegisterClass rt> +: I<(outs rt:$Vdn), (ins PPR3bAny:$Pg, rt:$_Vdn, zprty:$Zm), + asm, "\t$Vdn, $Pg, $_Vdn, $Zm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Vdn; + bits<5> Zm; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-17} = 0b10101; + let Inst{16} = ab; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Vdn; + + let Constraints = "$Vdn = $_Vdn"; +} + +multiclass sve_int_perm_clast_vz<bit ab, string asm> { + def _B : sve_int_perm_clast_vz<0b00, ab, asm, ZPR8, FPR8>; + def _H : sve_int_perm_clast_vz<0b01, ab, asm, ZPR16, FPR16>; + def _S : sve_int_perm_clast_vz<0b10, ab, asm, ZPR32, FPR32>; + def _D : sve_int_perm_clast_vz<0b11, ab, asm, ZPR64, FPR64>; +} + +class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm), + asm, "\t$Zdn, $Pg, $_Zdn, $Zm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zdn; + bits<5> Zm; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-17} = 0b10100; + let Inst{16} = ab; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_perm_clast_zz<bit ab, string asm> { + def _B : sve_int_perm_clast_zz<0b00, ab, asm, ZPR8>; + def _H : sve_int_perm_clast_zz<0b01, ab, asm, ZPR16>; + def _S : sve_int_perm_clast_zz<0b10, ab, asm, ZPR32>; + def _D : sve_int_perm_clast_zz<0b11, ab, asm, ZPR64>; +} + +class sve_int_perm_last_r<bits<2> sz8_64, bit ab, string asm, + ZPRRegOp zprty, RegisterClass resultRegType> +: I<(outs resultRegType:$Rd), (ins PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Rd, $Pg, $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rd; + bits<5> Zn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-17} = 0b10000; + let Inst{16} = ab; + let Inst{15-13} = 0b101; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Rd; +} + +multiclass sve_int_perm_last_r<bit ab, string asm> { + def _B : sve_int_perm_last_r<0b00, ab, asm, ZPR8, GPR32>; + def _H : sve_int_perm_last_r<0b01, ab, asm, ZPR16, GPR32>; + def _S : sve_int_perm_last_r<0b10, ab, asm, ZPR32, GPR32>; + def _D : sve_int_perm_last_r<0b11, ab, asm, ZPR64, GPR64>; +} + +class sve_int_perm_last_v<bits<2> sz8_64, bit ab, string asm, + ZPRRegOp zprty, RegisterClass dstRegtype> +: I<(outs dstRegtype:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Vd, $Pg, $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Vd; + bits<5> Zn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-17} = 0b10001; + let Inst{16} = ab; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Vd; +} + +multiclass sve_int_perm_last_v<bit ab, string asm> { + def _B : sve_int_perm_last_v<0b00, ab, asm, ZPR8, FPR8>; + def _H : sve_int_perm_last_v<0b01, ab, asm, ZPR16, FPR16>; + def _S : sve_int_perm_last_v<0b10, ab, asm, ZPR32, FPR32>; + def _D : sve_int_perm_last_v<0b11, ab, asm, ZPR64, FPR64>; +} + +class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm), + asm, "\t$Zdn, $Pg, $_Zdn, $Zm", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zdn; + bits<5> Zm; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-13} = 0b101100100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve_int_perm_splice<string asm> { + def _B : sve_int_perm_splice<0b00, asm, ZPR8>; + def _H : sve_int_perm_splice<0b01, asm, ZPR16>; + def _S : sve_int_perm_splice<0b10, asm, ZPR32>; + def _D : sve_int_perm_splice<0b11, asm, ZPR64>; +} + +class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Zd, $Pg/m, $Zn", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<3> Pg; + bits<5> Zn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-18} = 0b1001; + let Inst{17-16} = opc; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_perm_rev_rbit<string asm> { + def _B : sve_int_perm_rev<0b00, 0b11, asm, ZPR8>; + def _H : sve_int_perm_rev<0b01, 0b11, asm, ZPR16>; + def _S : sve_int_perm_rev<0b10, 0b11, asm, ZPR32>; + def _D : sve_int_perm_rev<0b11, 0b11, asm, ZPR64>; +} + +multiclass sve_int_perm_rev_revb<string asm> { + def _H : sve_int_perm_rev<0b01, 0b00, asm, ZPR16>; + def _S : sve_int_perm_rev<0b10, 0b00, asm, ZPR32>; + def _D : sve_int_perm_rev<0b11, 0b00, asm, ZPR64>; +} + +multiclass sve_int_perm_rev_revh<string asm> { + def _S : sve_int_perm_rev<0b10, 0b01, asm, ZPR32>; + def _D : sve_int_perm_rev<0b11, 0b01, asm, ZPR64>; +} + +multiclass sve_int_perm_rev_revw<string asm> { + def _D : sve_int_perm_rev<0b11, 0b10, asm, ZPR64>; +} + +class sve_int_perm_cpy_r<bits<2> sz8_64, string asm, ZPRRegOp zprty, + RegisterClass srcRegType> +: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, srcRegType:$Rn), + asm, "\t$Zd, $Pg/m, $Rn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zd; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-13} = 0b101000101; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_perm_cpy_r<string asm> { + def _B : sve_int_perm_cpy_r<0b00, asm, ZPR8, GPR32sp>; + def _H : sve_int_perm_cpy_r<0b01, asm, ZPR16, GPR32sp>; + def _S : sve_int_perm_cpy_r<0b10, asm, ZPR32, GPR32sp>; + def _D : sve_int_perm_cpy_r<0b11, asm, ZPR64, GPR64sp>; + + def : InstAlias<"mov $Zd, $Pg/m, $Rn", + (!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPR3bAny:$Pg, GPR32sp:$Rn), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Rn", + (!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPR3bAny:$Pg, GPR32sp:$Rn), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Rn", + (!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPR3bAny:$Pg, GPR32sp:$Rn), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Rn", + (!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPR3bAny:$Pg, GPR64sp:$Rn), 1>; +} + +class sve_int_perm_cpy_v<bits<2> sz8_64, string asm, ZPRRegOp zprty, + RegisterClass srcRegtype> +: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, srcRegtype:$Vn), + asm, "\t$Zd, $Pg/m, $Vn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Vn; + bits<5> Zd; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-13} = 0b100000100; + let Inst{12-10} = Pg; + let Inst{9-5} = Vn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_perm_cpy_v<string asm> { + def _B : sve_int_perm_cpy_v<0b00, asm, ZPR8, FPR8>; + def _H : sve_int_perm_cpy_v<0b01, asm, ZPR16, FPR16>; + def _S : sve_int_perm_cpy_v<0b10, asm, ZPR32, FPR32>; + def _D : sve_int_perm_cpy_v<0b11, asm, ZPR64, FPR64>; + + def : InstAlias<"mov $Zd, $Pg/m, $Vn", + (!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPR3bAny:$Pg, FPR8:$Vn), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Vn", + (!cast<Instruction>(NAME # _H) ZPR16:$Zd, PPR3bAny:$Pg, FPR16:$Vn), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Vn", + (!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPR3bAny:$Pg, FPR32:$Vn), 1>; + def : InstAlias<"mov $Zd, $Pg/m, $Vn", + (!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPR3bAny:$Pg, FPR64:$Vn), 1>; +} + +class sve_int_perm_compact<bit sz, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Zd, $Pg, $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zd; + bits<5> Zn; + let Inst{31-23} = 0b000001011; + let Inst{22} = sz; + let Inst{21-13} = 0b100001100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_perm_compact<string asm> { + def _S : sve_int_perm_compact<0b0, asm, ZPR32>; + def _D : sve_int_perm_compact<0b1, asm, ZPR64>; +} + + +//===----------------------------------------------------------------------===// +// SVE Memory - Contiguous Load Group +//===----------------------------------------------------------------------===// + +class sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm, + RegisterOperand VecList> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), + asm, "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zt; + bits<4> imm4; + let Inst{31-25} = 0b1010010; + let Inst{24-21} = dtype; + let Inst{20} = nf; + let Inst{19-16} = imm4; + let Inst{15-13} = 0b101; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; + let Uses = !if(!eq(nf, 1), [FFR], []); + let Defs = !if(!eq(nf, 1), [FFR], []); +} + +multiclass sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm, + RegisterOperand listty, ZPRRegOp zprty> { + def _REAL : sve_mem_cld_si_base<dtype, nf, asm, listty>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]", + (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]", + (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>; + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]", + (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; +} + +multiclass sve_mem_cld_si<bits<4> dtype, string asm, RegisterOperand listty, + ZPRRegOp zprty> +: sve_mem_cld_si_base<dtype, 0, asm, listty, zprty>; + +class sve_mem_cldnt_si_base<bits<2> msz, string asm, RegisterOperand VecList> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), + asm, "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]", + "", + []>, Sched<[]> { + bits<5> Zt; + bits<3> Pg; + bits<5> Rn; + bits<4> imm4; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = msz; + let Inst{22-20} = 0b000; + let Inst{19-16} = imm4; + let Inst{15-13} = 0b111; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_cldnt_si<bits<2> msz, string asm, RegisterOperand listty, + ZPRRegOp zprty> { + def NAME : sve_mem_cldnt_si_base<msz, asm, listty>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]", + (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]", + (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>; + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]", + (!cast<Instruction>(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_cldnt_ss_base<bits<2> msz, string asm, RegisterOperand VecList, + RegisterOperand gprty> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), + asm, "\t$Zt, $Pg/z, [$Rn, $Rm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rm; + bits<5> Rn; + bits<5> Zt; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = msz; + let Inst{22-21} = 0b00; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b110; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_cldnt_ss<bits<2> msz, string asm, RegisterOperand listty, + ZPRRegOp zprty, RegisterOperand gprty> { + def NAME : sve_mem_cldnt_ss_base<msz, asm, listty, gprty>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Rm]", + (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; +} + +class sve_mem_ldqr_si<bits<2> sz, string asm, RegisterOperand VecList> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s16:$imm4), + asm, "\t$Zt, $Pg/z, [$Rn, $imm4]", "", []>, Sched<[]> { + bits<5> Zt; + bits<5> Rn; + bits<3> Pg; + bits<4> imm4; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = sz; + let Inst{22-20} = 0; + let Inst{19-16} = imm4; + let Inst{15-13} = 0b001; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_ldqr_si<bits<2> sz, string asm, RegisterOperand listty, + ZPRRegOp zprty> { + def NAME : sve_mem_ldqr_si<sz, asm, listty>; + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]", + (!cast<Instruction>(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]", + (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $imm4]", + (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s16:$imm4), 0>; +} + +class sve_mem_ldqr_ss<bits<2> sz, string asm, RegisterOperand VecList, + RegisterOperand gprty> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), + asm, "\t$Zt, $Pg/z, [$Rn, $Rm]", "", []>, Sched<[]> { + bits<5> Zt; + bits<3> Pg; + bits<5> Rn; + bits<5> Rm; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = sz; + let Inst{22-21} = 0; + let Inst{20-16} = Rm; + let Inst{15-13} = 0; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_ldqr_ss<bits<2> sz, string asm, RegisterOperand listty, + ZPRRegOp zprty, RegisterOperand gprty> { + def NAME : sve_mem_ldqr_ss<sz, asm, listty, gprty>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Rm]", + (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; +} + +class sve_mem_ld_dup<bits<2> dtypeh, bits<2> dtypel, string asm, + RegisterOperand VecList, Operand immtype> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, immtype:$imm6), + asm, "\t$Zt, $Pg/z, [$Rn, $imm6]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zt; + bits<6> imm6; + let Inst{31-25} = 0b1000010; + let Inst{24-23} = dtypeh; + let Inst{22} = 1; + let Inst{21-16} = imm6; + let Inst{15} = 0b1; + let Inst{14-13} = dtypel; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_ld_dup<bits<2> dtypeh, bits<2> dtypel, string asm, + RegisterOperand zlistty, ZPRRegOp zprty, Operand immtype> { + def NAME : sve_mem_ld_dup<dtypeh, dtypel, asm, zlistty, immtype>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]", + (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $imm6]", + (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, immtype:$imm6), 0>; + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]", + (!cast<Instruction>(NAME) zlistty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_cld_ss_base<bits<4> dtype, bit ff, dag iops, string asm, + RegisterOperand VecList> +: I<(outs VecList:$Zt), iops, + asm, "\t$Zt, $Pg/z, [$Rn, $Rm]", + "", + []>, Sched<[]> { + bits<5> Zt; + bits<3> Pg; + bits<5> Rm; + bits<5> Rn; + let Inst{31-25} = 0b1010010; + let Inst{24-21} = dtype; + let Inst{20-16} = Rm; + let Inst{15-14} = 0b01; + let Inst{13} = ff; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; + let Uses = !if(!eq(ff, 1), [FFR], []); + let Defs = !if(!eq(ff, 1), [FFR], []); +} + +multiclass sve_mem_cld_ss<bits<4> dtype, string asm, RegisterOperand listty, + ZPRRegOp zprty, RegisterOperand gprty> { + def "" : sve_mem_cld_ss_base<dtype, 0, (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), + asm, listty>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Rm]", + (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; +} + +multiclass sve_mem_cldff_ss<bits<4> dtype, string asm, RegisterOperand listty, + ZPRRegOp zprty, RegisterOperand gprty> { + def _REAL : sve_mem_cld_ss_base<dtype, 1, (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), + asm, listty>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Rm]", + (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]", + (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]", + (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>; +} + +multiclass sve_mem_cldnf_si<bits<4> dtype, string asm, RegisterOperand listty, + ZPRRegOp zprty> +: sve_mem_cld_si_base<dtype, 1, asm, listty, zprty>; + +class sve_mem_eld_si<bits<2> sz, bits<2> nregs, RegisterOperand VecList, + string asm, Operand immtype> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, immtype:$imm4), + asm, "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]", + "", + []>, Sched<[]> { + bits<5> Zt; + bits<3> Pg; + bits<5> Rn; + bits<4> imm4; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = sz; + let Inst{22-21} = nregs; + let Inst{20} = 0; + let Inst{19-16} = imm4; + let Inst{15-13} = 0b111; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_eld_si<bits<2> sz, bits<2> nregs, RegisterOperand VecList, + string asm, Operand immtype> { + def NAME : sve_mem_eld_si<sz, nregs, VecList, asm, immtype>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]", + (!cast<Instruction>(NAME) VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_eld_ss<bits<2> sz, bits<2> nregs, RegisterOperand VecList, + string asm, RegisterOperand gprty> +: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), + asm, "\t$Zt, $Pg/z, [$Rn, $Rm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rm; + bits<5> Rn; + bits<5> Zt; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = sz; + let Inst{22-21} = nregs; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b110; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +//===----------------------------------------------------------------------===// +// SVE Memory - 32-bit Gather and Unsized Contiguous Group +//===----------------------------------------------------------------------===// + +// bit xs is '1' if offsets are signed +// bit scaled is '1' if the offsets are scaled +class sve_mem_32b_gld_sv<bits<4> opc, bit xs, bit scaled, string asm, + RegisterOperand zprext> +: I<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), + asm, "\t$Zt, $Pg/z, [$Rn, $Zm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zm; + bits<5> Zt; + let Inst{31-25} = 0b1000010; + let Inst{24-23} = opc{3-2}; + let Inst{22} = xs; + let Inst{21} = scaled; + let Inst{20-16} = Zm; + let Inst{15} = 0b0; + let Inst{14-13} = opc{1-0}; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; + let Defs = !if(!eq(opc{0}, 1), [FFR], []); + let Uses = !if(!eq(opc{0}, 1), [FFR], []); +} + +multiclass sve_mem_32b_gld_sv_32_scaled<bits<4> opc, string asm, + RegisterOperand sxtw_opnd, + RegisterOperand uxtw_opnd> { + def _UXTW_SCALED_REAL : sve_mem_32b_gld_sv<opc, 0, 1, asm, uxtw_opnd>; + def _SXTW_SCALED_REAL : sve_mem_32b_gld_sv<opc, 1, 1, asm, sxtw_opnd>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]", + (!cast<Instruction>(NAME # _UXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]", + (!cast<Instruction>(NAME # _SXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; +} + +multiclass sve_mem_32b_gld_vs_32_unscaled<bits<4> opc, string asm, + RegisterOperand sxtw_opnd, + RegisterOperand uxtw_opnd> { + def _UXTW_REAL : sve_mem_32b_gld_sv<opc, 0, 0, asm, uxtw_opnd>; + def _SXTW_REAL : sve_mem_32b_gld_sv<opc, 1, 0, asm, sxtw_opnd>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]", + (!cast<Instruction>(NAME # _UXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]", + (!cast<Instruction>(NAME # _SXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; +} + + +class sve_mem_32b_gld_vi<bits<4> opc, string asm, Operand imm_ty> +: I<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), + asm, "\t$Zt, $Pg/z, [$Zn, $imm5]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zn; + bits<5> Zt; + bits<5> imm5; + let Inst{31-25} = 0b1000010; + let Inst{24-23} = opc{3-2}; + let Inst{22-21} = 0b01; + let Inst{20-16} = imm5; + let Inst{15} = 0b1; + let Inst{14-13} = opc{1-0}; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zt; + + let mayLoad = 1; + let Defs = !if(!eq(opc{0}, 1), [FFR], []); + let Uses = !if(!eq(opc{0}, 1), [FFR], []); +} + +multiclass sve_mem_32b_gld_vi_32_ptrs<bits<4> opc, string asm, Operand imm_ty> { + def _IMM_REAL : sve_mem_32b_gld_vi<opc, asm, imm_ty>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]", + (!cast<Instruction>(NAME # _IMM_REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 0>; + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $imm5]", + (!cast<Instruction>(NAME # _IMM_REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), 0>; + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]", + (!cast<Instruction>(NAME # _IMM_REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>; +} + +class sve_mem_prfm_si<bits<2> msz, string asm> +: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, simm6s1:$imm6), + asm, "\t$prfop, $Pg, [$Rn, $imm6, mul vl]", + "", + []>, Sched<[]> { + bits<5> Rn; + bits<3> Pg; + bits<6> imm6; + bits<4> prfop; + let Inst{31-22} = 0b1000010111; + let Inst{21-16} = imm6; + let Inst{15} = 0b0; + let Inst{14-13} = msz; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = prfop; + + let hasSideEffects = 1; +} + +multiclass sve_mem_prfm_si<bits<2> msz, string asm> { + def NAME : sve_mem_prfm_si<msz, asm>; + + def : InstAlias<asm # "\t$prfop, $Pg, [$Rn]", + (!cast<Instruction>(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_prfm_ss<bits<3> opc, string asm, RegisterOperand gprty> +: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), + asm, "\t$prfop, $Pg, [$Rn, $Rm]", + "", + []>, Sched<[]> { + bits<5> Rm; + bits<5> Rn; + bits<3> Pg; + bits<4> prfop; + let Inst{31-25} = 0b1000010; + let Inst{24-23} = opc{2-1}; + let Inst{22-21} = 0b00; + let Inst{20-16} = Rm; + let Inst{15} = 0b1; + let Inst{14} = opc{0}; + let Inst{13} = 0b0; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = prfop; + + let hasSideEffects = 1; +} + +class sve_mem_32b_prfm_sv<bits<2> msz, bit xs, string asm, + RegisterOperand zprext> +: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), + asm, "\t$prfop, $Pg, [$Rn, $Zm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zm; + bits<4> prfop; + let Inst{31-23} = 0b100001000; + let Inst{22} = xs; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15} = 0b0; + let Inst{14-13} = msz; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = prfop; + + let hasSideEffects = 1; +} + +multiclass sve_mem_32b_prfm_sv_scaled<bits<2> msz, string asm, + RegisterOperand sxtw_opnd, + RegisterOperand uxtw_opnd> { + def _UXTW_SCALED : sve_mem_32b_prfm_sv<msz, 0, asm, uxtw_opnd>; + def _SXTW_SCALED : sve_mem_32b_prfm_sv<msz, 1, asm, sxtw_opnd>; +} + +class sve_mem_32b_prfm_vi<bits<2> msz, string asm, Operand imm_ty> +: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), + asm, "\t$prfop, $Pg, [$Zn, $imm5]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zn; + bits<5> imm5; + bits<4> prfop; + let Inst{31-25} = 0b1000010; + let Inst{24-23} = msz; + let Inst{22-21} = 0b00; + let Inst{20-16} = imm5; + let Inst{15-13} = 0b111; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4} = 0b0; + let Inst{3-0} = prfop; +} + +multiclass sve_mem_32b_prfm_vi<bits<2> msz, string asm, Operand imm_ty> { + def NAME : sve_mem_32b_prfm_vi<msz, asm, imm_ty>; + + def : InstAlias<asm # "\t$prfop, $Pg, [$Zn]", + (!cast<Instruction>(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>; +} + +class sve_mem_z_fill<string asm> +: I<(outs ZPRAny:$Zt), (ins GPR64sp:$Rn, simm9:$imm9), + asm, "\t$Zt, [$Rn, $imm9, mul vl]", + "", + []>, Sched<[]> { + bits<5> Rn; + bits<5> Zt; + bits<9> imm9; + let Inst{31-22} = 0b1000010110; + let Inst{21-16} = imm9{8-3}; + let Inst{15-13} = 0b010; + let Inst{12-10} = imm9{2-0}; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_z_fill<string asm> { + def NAME : sve_mem_z_fill<asm>; + + def : InstAlias<asm # "\t$Zt, [$Rn]", + (!cast<Instruction>(NAME) ZPRAny:$Zt, GPR64sp:$Rn, 0), 1>; +} + +class sve_mem_p_fill<string asm> +: I<(outs PPRAny:$Pt), (ins GPR64sp:$Rn, simm9:$imm9), + asm, "\t$Pt, [$Rn, $imm9, mul vl]", + "", + []>, Sched<[]> { + bits<4> Pt; + bits<5> Rn; + bits<9> imm9; + let Inst{31-22} = 0b1000010110; + let Inst{21-16} = imm9{8-3}; + let Inst{15-13} = 0b000; + let Inst{12-10} = imm9{2-0}; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = Pt; + + let mayLoad = 1; +} + +multiclass sve_mem_p_fill<string asm> { + def NAME : sve_mem_p_fill<asm>; + + def : InstAlias<asm # "\t$Pt, [$Rn]", + (!cast<Instruction>(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>; +} + +//===----------------------------------------------------------------------===// +// SVE Memory - 64-bit Gather Group +//===----------------------------------------------------------------------===// + +// bit xs is '1' if offsets are signed +// bit scaled is '1' if the offsets are scaled +// bit lsl is '0' if the offsets are extended (uxtw/sxtw), '1' if shifted (lsl) +class sve_mem_64b_gld_sv<bits<4> opc, bit xs, bit scaled, bit lsl, string asm, + RegisterOperand zprext> +: I<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), + asm, "\t$Zt, $Pg/z, [$Rn, $Zm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zm; + bits<5> Zt; + let Inst{31-25} = 0b1100010; + let Inst{24-23} = opc{3-2}; + let Inst{22} = xs; + let Inst{21} = scaled; + let Inst{20-16} = Zm; + let Inst{15} = lsl; + let Inst{14-13} = opc{1-0}; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; + let Defs = !if(!eq(opc{0}, 1), [FFR], []); + let Uses = !if(!eq(opc{0}, 1), [FFR], []); +} + +multiclass sve_mem_64b_gld_sv_32_scaled<bits<4> opc, string asm, + RegisterOperand sxtw_opnd, + RegisterOperand uxtw_opnd> { + def _UXTW_SCALED_REAL : sve_mem_64b_gld_sv<opc, 0, 1, 0, asm, uxtw_opnd>; + def _SXTW_SCALED_REAL : sve_mem_64b_gld_sv<opc, 1, 1, 0, asm, sxtw_opnd>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]", + (!cast<Instruction>(NAME # _UXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]", + (!cast<Instruction>(NAME # _SXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; +} + +multiclass sve_mem_64b_gld_vs_32_unscaled<bits<4> opc, string asm, + RegisterOperand sxtw_opnd, + RegisterOperand uxtw_opnd> { + def _UXTW_REAL : sve_mem_64b_gld_sv<opc, 0, 0, 0, asm, uxtw_opnd>; + def _SXTW_REAL : sve_mem_64b_gld_sv<opc, 1, 0, 0, asm, sxtw_opnd>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]", + (!cast<Instruction>(NAME # _UXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>; + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]", + (!cast<Instruction>(NAME # _SXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>; +} + +multiclass sve_mem_64b_gld_sv2_64_scaled<bits<4> opc, string asm, + RegisterOperand zprext> { + def _SCALED_REAL : sve_mem_64b_gld_sv<opc, 1, 1, 1, asm, zprext>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]", + (!cast<Instruction>(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>; +} + +multiclass sve_mem_64b_gld_vs2_64_unscaled<bits<4> opc, string asm> { + def _REAL : sve_mem_64b_gld_sv<opc, 1, 0, 1, asm, ZPR64ExtLSL8>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]", + (!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>; +} + +class sve_mem_64b_gld_vi<bits<4> opc, string asm, Operand imm_ty> +: I<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), + asm, "\t$Zt, $Pg/z, [$Zn, $imm5]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zn; + bits<5> Zt; + bits<5> imm5; + let Inst{31-25} = 0b1100010; + let Inst{24-23} = opc{3-2}; + let Inst{22-21} = 0b01; + let Inst{20-16} = imm5; + let Inst{15} = 0b1; + let Inst{14-13} = opc{1-0}; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zt; + + let mayLoad = 1; + let Defs = !if(!eq(opc{0}, 1), [FFR], []); + let Uses = !if(!eq(opc{0}, 1), [FFR], []); +} + +multiclass sve_mem_64b_gld_vi_64_ptrs<bits<4> opc, string asm, Operand imm_ty> { + def _IMM_REAL : sve_mem_64b_gld_vi<opc, asm, imm_ty>; + + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]", + (!cast<Instruction>(NAME # _IMM_REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 0>; + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $imm5]", + (!cast<Instruction>(NAME # _IMM_REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), 0>; + def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]", + (!cast<Instruction>(NAME # _IMM_REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>; +} + +// bit lsl is '0' if the offsets are extended (uxtw/sxtw), '1' if shifted (lsl) +class sve_mem_64b_prfm_sv<bits<2> msz, bit xs, bit lsl, string asm, + RegisterOperand zprext> +: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), + asm, "\t$prfop, $Pg, [$Rn, $Zm]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Rn; + bits<5> Zm; + bits<4> prfop; + let Inst{31-23} = 0b110001000; + let Inst{22} = xs; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15} = lsl; + let Inst{14-13} = msz; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = prfop; + + let hasSideEffects = 1; +} + +multiclass sve_mem_64b_prfm_sv_ext_scaled<bits<2> msz, string asm, + RegisterOperand sxtw_opnd, + RegisterOperand uxtw_opnd> { + def _UXTW_SCALED : sve_mem_64b_prfm_sv<msz, 0, 0, asm, uxtw_opnd>; + def _SXTW_SCALED : sve_mem_64b_prfm_sv<msz, 1, 0, asm, sxtw_opnd>; +} + +multiclass sve_mem_64b_prfm_sv_lsl_scaled<bits<2> msz, string asm, + RegisterOperand zprext> { + def NAME : sve_mem_64b_prfm_sv<msz, 1, 1, asm, zprext>; +} + + +class sve_mem_64b_prfm_vi<bits<2> msz, string asm, Operand imm_ty> +: I<(outs), (ins sve_prfop:$prfop, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), + asm, "\t$prfop, $Pg, [$Zn, $imm5]", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zn; + bits<5> imm5; + bits<4> prfop; + let Inst{31-25} = 0b1100010; + let Inst{24-23} = msz; + let Inst{22-21} = 0b00; + let Inst{20-16} = imm5; + let Inst{15-13} = 0b111; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4} = 0b0; + let Inst{3-0} = prfop; + + let hasSideEffects = 1; +} + +multiclass sve_mem_64b_prfm_vi<bits<2> msz, string asm, Operand imm_ty> { + def NAME : sve_mem_64b_prfm_vi<msz, asm, imm_ty>; + + def : InstAlias<asm # "\t$prfop, $Pg, [$Zn]", + (!cast<Instruction>(NAME) sve_prfop:$prfop, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>; +} + + +//===----------------------------------------------------------------------===// +// SVE Compute Vector Address Group +//===----------------------------------------------------------------------===// + +class sve_int_bin_cons_misc_0_a<bits<2> opc, bits<2> msz, string asm, + ZPRRegOp zprty, RegisterOperand zprext> +: I<(outs zprty:$Zd), (ins zprty:$Zn, zprext:$Zm), + asm, "\t$Zd, [$Zn, $Zm]", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<5> Zm; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = opc; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-12} = 0b1010; + let Inst{11-10} = msz; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_bin_cons_misc_0_a_uxtw<bits<2> opc, string asm> { + def _0 : sve_int_bin_cons_misc_0_a<opc, 0b00, asm, ZPR64, ZPR64ExtUXTW8>; + def _1 : sve_int_bin_cons_misc_0_a<opc, 0b01, asm, ZPR64, ZPR64ExtUXTW16>; + def _2 : sve_int_bin_cons_misc_0_a<opc, 0b10, asm, ZPR64, ZPR64ExtUXTW32>; + def _3 : sve_int_bin_cons_misc_0_a<opc, 0b11, asm, ZPR64, ZPR64ExtUXTW64>; +} + +multiclass sve_int_bin_cons_misc_0_a_sxtw<bits<2> opc, string asm> { + def _0 : sve_int_bin_cons_misc_0_a<opc, 0b00, asm, ZPR64, ZPR64ExtSXTW8>; + def _1 : sve_int_bin_cons_misc_0_a<opc, 0b01, asm, ZPR64, ZPR64ExtSXTW16>; + def _2 : sve_int_bin_cons_misc_0_a<opc, 0b10, asm, ZPR64, ZPR64ExtSXTW32>; + def _3 : sve_int_bin_cons_misc_0_a<opc, 0b11, asm, ZPR64, ZPR64ExtSXTW64>; +} + +multiclass sve_int_bin_cons_misc_0_a_32_lsl<bits<2> opc, string asm> { + def _0 : sve_int_bin_cons_misc_0_a<opc, 0b00, asm, ZPR32, ZPR32ExtLSL8>; + def _1 : sve_int_bin_cons_misc_0_a<opc, 0b01, asm, ZPR32, ZPR32ExtLSL16>; + def _2 : sve_int_bin_cons_misc_0_a<opc, 0b10, asm, ZPR32, ZPR32ExtLSL32>; + def _3 : sve_int_bin_cons_misc_0_a<opc, 0b11, asm, ZPR32, ZPR32ExtLSL64>; +} + +multiclass sve_int_bin_cons_misc_0_a_64_lsl<bits<2> opc, string asm> { + def _0 : sve_int_bin_cons_misc_0_a<opc, 0b00, asm, ZPR64, ZPR64ExtLSL8>; + def _1 : sve_int_bin_cons_misc_0_a<opc, 0b01, asm, ZPR64, ZPR64ExtLSL16>; + def _2 : sve_int_bin_cons_misc_0_a<opc, 0b10, asm, ZPR64, ZPR64ExtLSL32>; + def _3 : sve_int_bin_cons_misc_0_a<opc, 0b11, asm, ZPR64, ZPR64ExtLSL64>; +} + + +//===----------------------------------------------------------------------===// +// SVE Integer Misc - Unpredicated Group +//===----------------------------------------------------------------------===// + +class sve_int_bin_cons_misc_0_b<bits<2> sz, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), + asm, "\t$Zd, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-10} = 0b101100; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_bin_cons_misc_0_b<string asm> { + def _H : sve_int_bin_cons_misc_0_b<0b01, asm, ZPR16>; + def _S : sve_int_bin_cons_misc_0_b<0b10, asm, ZPR32>; + def _D : sve_int_bin_cons_misc_0_b<0b11, asm, ZPR64>; +} + +class sve_int_bin_cons_misc_0_c<bits<8> opc, string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn), + asm, "\t$Zd, $Zn", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = opc{7-6}; + let Inst{21} = 0b1; + let Inst{20-16} = opc{5-1}; + let Inst{15-11} = 0b10111; + let Inst{10} = opc{0}; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Reduction Group +//===----------------------------------------------------------------------===// + +class sve_int_reduce<bits<2> sz8_32, bits<2> fmt, bits<3> opc, string asm, + ZPRRegOp zprty, RegisterClass regtype> +: I<(outs regtype:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Vd, $Pg, $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Vd; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_32; + let Inst{21} = 0b0; + let Inst{20-19} = fmt; + let Inst{18-16} = opc; + let Inst{15-13} = 0b001; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Vd; +} + +multiclass sve_int_reduce_0_saddv<bits<3> opc, string asm> { + def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>; + def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>; + def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>; +} + +multiclass sve_int_reduce_0_uaddv<bits<3> opc, string asm> { + def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>; + def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>; + def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>; + def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64>; +} + +multiclass sve_int_reduce_1<bits<3> opc, string asm> { + def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8>; + def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16>; + def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32>; + def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64>; +} + +multiclass sve_int_reduce_2<bits<3> opc, string asm> { + def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8>; + def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16>; + def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32>; + def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64>; +} + +class sve_int_movprfx_pred<bits<2> sz8_32, bits<3> opc, string asm, + ZPRRegOp zprty, string pg_suffix, dag iops> +: I<(outs zprty:$Zd), iops, + asm, "\t$Zd, $Pg"#pg_suffix#", $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_32; + let Inst{21-19} = 0b010; + let Inst{18-16} = opc; + let Inst{15-13} = 0b001; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let ElementSize = zprty.ElementSize; +} + +multiclass sve_int_movprfx_pred_merge<bits<3> opc, string asm> { +let Constraints = "$Zd = $_Zd" in { + def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/m", + (ins ZPR8:$_Zd, PPR3bAny:$Pg, ZPR8:$Zn)>; + def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/m", + (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR16:$Zn)>; + def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/m", + (ins ZPR32:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn)>; + def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/m", + (ins ZPR64:$_Zd, PPR3bAny:$Pg, ZPR64:$Zn)>; +} +} + +multiclass sve_int_movprfx_pred_zero<bits<3> opc, string asm> { + def _B : sve_int_movprfx_pred<0b00, opc, asm, ZPR8, "/z", + (ins PPR3bAny:$Pg, ZPR8:$Zn)>; + def _H : sve_int_movprfx_pred<0b01, opc, asm, ZPR16, "/z", + (ins PPR3bAny:$Pg, ZPR16:$Zn)>; + def _S : sve_int_movprfx_pred<0b10, opc, asm, ZPR32, "/z", + (ins PPR3bAny:$Pg, ZPR32:$Zn)>; + def _D : sve_int_movprfx_pred<0b11, opc, asm, ZPR64, "/z", + (ins PPR3bAny:$Pg, ZPR64:$Zn)>; +} + +//===----------------------------------------------------------------------===// +// SVE Propagate Break Group +//===----------------------------------------------------------------------===// + +class sve_int_brkp<bits<2> opc, string asm> +: I<(outs PPR8:$Pd), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$Pm), + asm, "\t$Pd, $Pg/z, $Pn, $Pm", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pg; + bits<4> Pm; + bits<4> Pn; + let Inst{31-24} = 0b00100101; + let Inst{23} = 0b0; + let Inst{22} = opc{1}; + let Inst{21-20} = 0b00; + let Inst{19-16} = Pm; + let Inst{15-14} = 0b11; + let Inst{13-10} = Pg; + let Inst{9} = 0b0; + let Inst{8-5} = Pn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Defs = !if(!eq (opc{1}, 1), [NZCV], []); +} + + +//===----------------------------------------------------------------------===// +// SVE Partition Break Group +//===----------------------------------------------------------------------===// + +class sve_int_brkn<bit S, string asm> +: I<(outs PPR8:$Pdm), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$_Pdm), + asm, "\t$Pdm, $Pg/z, $Pn, $_Pdm", + "", + []>, Sched<[]> { + bits<4> Pdm; + bits<4> Pg; + bits<4> Pn; + let Inst{31-23} = 0b001001010; + let Inst{22} = S; + let Inst{21-14} = 0b01100001; + let Inst{13-10} = Pg; + let Inst{9} = 0b0; + let Inst{8-5} = Pn; + let Inst{4} = 0b0; + let Inst{3-0} = Pdm; + + let Constraints = "$Pdm = $_Pdm"; + let Defs = !if(!eq (S, 0b1), [NZCV], []); +} + +class sve_int_break<bits<3> opc, string asm, string suffix, dag iops> +: I<(outs PPR8:$Pd), iops, + asm, "\t$Pd, $Pg"#suffix#", $Pn", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pg; + bits<4> Pn; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = opc{2-1}; + let Inst{21-14} = 0b01000001; + let Inst{13-10} = Pg; + let Inst{9} = 0b0; + let Inst{8-5} = Pn; + let Inst{4} = opc{0}; + let Inst{3-0} = Pd; + + let Constraints = !if(!eq (opc{0}, 1), "$Pd = $_Pd", ""); + let Defs = !if(!eq (opc{1}, 1), [NZCV], []); + +} + +multiclass sve_int_break_m<bits<3> opc, string asm> { + def NAME : sve_int_break<opc, asm, "/m", (ins PPR8:$_Pd, PPRAny:$Pg, PPR8:$Pn)>; +} + +multiclass sve_int_break_z<bits<3> opc, string asm> { + def NAME : sve_int_break<opc, asm, "/z", (ins PPRAny:$Pg, PPR8:$Pn)>; +} + |