From af1a266670d040d2f4083ff309d732d648afba2a Mon Sep 17 00:00:00 2001 From: Angelos Mouzakitis Date: Tue, 10 Oct 2023 14:33:42 +0000 Subject: Add submodule dependency files Change-Id: Iaf8d18082d3991dec7c0ebbea540f092188eb4ec --- .../tablegen/X86/back/X86ScheduleBtVer2.td | 682 +++++++++++++++++++++ 1 file changed, 682 insertions(+) create mode 100644 capstone/suite/synctools/tablegen/X86/back/X86ScheduleBtVer2.td (limited to 'capstone/suite/synctools/tablegen/X86/back/X86ScheduleBtVer2.td') diff --git a/capstone/suite/synctools/tablegen/X86/back/X86ScheduleBtVer2.td b/capstone/suite/synctools/tablegen/X86/back/X86ScheduleBtVer2.td new file mode 100644 index 000000000..719e71cd2 --- /dev/null +++ b/capstone/suite/synctools/tablegen/X86/back/X86ScheduleBtVer2.td @@ -0,0 +1,682 @@ +//=- X86ScheduleBtVer2.td - X86 BtVer2 (Jaguar) Scheduling ---*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for AMD btver2 (Jaguar) to support +// instruction scheduling and other instruction cost heuristics. Based off AMD Software +// Optimization Guide for AMD Family 16h Processors & Instruction Latency appendix. +// +//===----------------------------------------------------------------------===// + +def BtVer2Model : SchedMachineModel { + // All x86 instructions are modeled as a single micro-op, and btver2 can + // decode 2 instructions per cycle. + let IssueWidth = 2; + let MicroOpBufferSize = 64; // Retire Control Unit + let LoadLatency = 5; // FPU latency (worse case cf Integer 3 cycle latency) + let HighLatency = 25; + let MispredictPenalty = 14; // Minimum branch misdirection penalty + let PostRAScheduler = 1; + + // FIXME: SSE4/AVX is unimplemented. This flag is set to allow + // the scheduler to assign a default model to unrecognized opcodes. + let CompleteModel = 0; +} + +let SchedModel = BtVer2Model in { + +// Jaguar can issue up to 6 micro-ops in one cycle +def JALU0 : ProcResource<1>; // Integer Pipe0: integer ALU0 (also handle FP->INT jam) +def JALU1 : ProcResource<1>; // Integer Pipe1: integer ALU1/MUL/DIV +def JLAGU : ProcResource<1>; // Integer Pipe2: LAGU +def JSAGU : ProcResource<1>; // Integer Pipe3: SAGU (also handles 3-operand LEA) +def JFPU0 : ProcResource<1>; // Vector/FPU Pipe0: VALU0/VIMUL/FPA +def JFPU1 : ProcResource<1>; // Vector/FPU Pipe1: VALU1/STC/FPM + +// The Integer PRF for Jaguar is 64 entries, and it holds the architectural and +// speculative version of the 64-bit integer registers. +// Reference: www.realworldtech.com/jaguar/4/ +// +// The processor always keeps the different parts of an integer register +// together. An instruction that writes to a part of a register will therefore +// have a false dependence on any previous write to the same register or any +// part of it. +// Reference: Section 21.10 "AMD Bobcat and Jaguar pipeline: Partial register +// access" - Agner Fog's "microarchitecture.pdf". +def JIntegerPRF : RegisterFile<64, [GR64, CCR]>; + +// The Jaguar FP Retire Queue renames SIMD and FP uOps onto a pool of 72 SSE +// registers. Operations on 256-bit data types are cracked into two COPs. +// Reference: www.realworldtech.com/jaguar/4/ +def JFpuPRF: RegisterFile<72, [VR64, VR128, VR256], [1, 1, 2]>; + +// The retire control unit (RCU) can track up to 64 macro-ops in-flight. It can +// retire up to two macro-ops per cycle. +// Reference: "Software Optimization Guide for AMD Family 16h Processors" +def JRCU : RetireControlUnit<64, 2>; + +// Integer Pipe Scheduler +def JALU01 : ProcResGroup<[JALU0, JALU1]> { + let BufferSize=20; +} + +// AGU Pipe Scheduler +def JLSAGU : ProcResGroup<[JLAGU, JSAGU]> { + let BufferSize=12; +} + +// Fpu Pipe Scheduler +def JFPU01 : ProcResGroup<[JFPU0, JFPU1]> { + let BufferSize=18; +} + +// Functional units +def JDiv : ProcResource<1>; // integer division +def JMul : ProcResource<1>; // integer multiplication +def JVALU0 : ProcResource<1>; // vector integer +def JVALU1 : ProcResource<1>; // vector integer +def JVIMUL : ProcResource<1>; // vector integer multiplication +def JSTC : ProcResource<1>; // vector store/convert +def JFPM : ProcResource<1>; // FP multiplication +def JFPA : ProcResource<1>; // FP addition + +// Functional unit groups +def JFPX : ProcResGroup<[JFPA, JFPM]>; +def JVALU : ProcResGroup<[JVALU0, JVALU1]>; + +// Integer loads are 3 cycles, so ReadAfterLd registers needn't be available until 3 +// cycles after the memory operand. +def : ReadAdvance; + +// Many SchedWrites are defined in pairs with and without a folded load. +// Instructions with folded loads are usually micro-fused, so they only appear +// as two micro-ops when dispatched by the schedulers. +// This multiclass defines the resource usage for variants with and without +// folded loads. +multiclass JWriteResIntPair ExePorts, + int Lat, list Res = [], int UOps = 1> { + // Register variant is using a single cycle on ExePort. + def : WriteRes { + let Latency = Lat; + let ResourceCycles = Res; + let NumMicroOps = UOps; + } + + // Memory variant also uses a cycle on JLAGU and adds 3 cycles to the + // latency. + def : WriteRes { + let Latency = !add(Lat, 3); + let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res)); + let NumMicroOps = UOps; + } +} + +multiclass JWriteResFpuPair ExePorts, + int Lat, list Res = [], int UOps = 1> { + // Register variant is using a single cycle on ExePort. + def : WriteRes { + let Latency = Lat; + let ResourceCycles = Res; + let NumMicroOps = UOps; + } + + // Memory variant also uses a cycle on JLAGU and adds 5 cycles to the + // latency. + def : WriteRes { + let Latency = !add(Lat, 5); + let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res)); + let NumMicroOps = UOps; + } +} + +multiclass JWriteResYMMPair ExePorts, + int Lat, list Res = [2], int UOps = 2> { + // Register variant is using a single cycle on ExePort. + def : WriteRes { + let Latency = Lat; + let ResourceCycles = Res; + let NumMicroOps = UOps; + } + + // Memory variant also uses 2 cycles on JLAGU and adds 5 cycles to the + // latency. + def : WriteRes { + let Latency = !add(Lat, 5); + let ResourceCycles = !listconcat([2], Res); + let NumMicroOps = UOps; + } +} + +// A folded store needs a cycle on the SAGU for the store data. +def : WriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// Arithmetic. +//////////////////////////////////////////////////////////////////////////////// + +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; // i8/i16/i32 multiplication +defm : JWriteResIntPair; // i64 multiplication +defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; + +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; + +defm : JWriteResIntPair; + +defm : JWriteResIntPair; // Conditional move. +defm : JWriteResIntPair; // Conditional (CF + ZF flag) move. +defm : X86WriteRes; // x87 conditional move. +def : WriteRes; // Setcc. +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// This is for simple LEAs with one or two input operands. +def : WriteRes; + +// Bit counts. +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; + +// BMI1 BEXTR, BMI2 BZHI +defm : JWriteResIntPair; +defm : X86WriteResPairUnsupported; + +//////////////////////////////////////////////////////////////////////////////// +// Integer shifts and rotates. +//////////////////////////////////////////////////////////////////////////////// + +defm : JWriteResIntPair; + +// SHLD/SHRD. +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// Loads, stores, and moves, not folded with other operations. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes { let Latency = 5; } +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Load/store MXCSR. +// FIXME: These are copy and pasted from WriteLoad/Store. +def : WriteRes { let Latency = 5; } +def : WriteRes; + +// Treat misc copies as a move. +def : InstRW<[WriteMove], (instrs COPY)>; + +//////////////////////////////////////////////////////////////////////////////// +// Idioms that clear a register, like xorps %xmm0, %xmm0. +// These can often bypass execution ports completely. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// Branches don't produce values, so they have no latency, but they still +// consume resources. Indirect branches can fold loads. +//////////////////////////////////////////////////////////////////////////////// + +defm : JWriteResIntPair; + +//////////////////////////////////////////////////////////////////////////////// +// Special case scheduling classes. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes { let Latency = 100; } +def : WriteRes { let Latency = 100; } +def : WriteRes; + +// Nops don't have dependencies, so there's no actual latency, but we set this +// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle. +def : WriteRes { let Latency = 1; } + +//////////////////////////////////////////////////////////////////////////////// +// Floating point. This covers both scalar and vector operations. +//////////////////////////////////////////////////////////////////////////////// + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + +defm : X86WriteRes; + +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; + +//////////////////////////////////////////////////////////////////////////////// +// Conversions. +//////////////////////////////////////////////////////////////////////////////// + +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +// FIXME: f+3 ST, LD+STC latency +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteResUnsupported; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteResUnsupported; + +//////////////////////////////////////////////////////////////////////////////// +// Vector integer operations. +//////////////////////////////////////////////////////////////////////////////// + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; + +//////////////////////////////////////////////////////////////////////////////// +// Vector insert/extract operations. +//////////////////////////////////////////////////////////////////////////////// + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// SSE42 String instructions. +//////////////////////////////////////////////////////////////////////////////// + +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; + +//////////////////////////////////////////////////////////////////////////////// +// MOVMSK Instructions. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } +defm : X86WriteResUnsupported; +def : WriteRes { let Latency = 3; } + +//////////////////////////////////////////////////////////////////////////////// +// AES Instructions. +//////////////////////////////////////////////////////////////////////////////// + +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; + +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : X86WriteResPairUnsupported; + +//////////////////////////////////////////////////////////////////////////////// +// Carry-less multiplication instructions. +//////////////////////////////////////////////////////////////////////////////// + +defm : JWriteResFpuPair; + +//////////////////////////////////////////////////////////////////////////////// +// SSE4A instructions. +//////////////////////////////////////////////////////////////////////////////// + +def JWriteINSERTQ: SchedWriteRes<[JFPU01, JVALU]> { + let Latency = 2; + let ResourceCycles = [1, 4]; +} +def : InstRW<[JWriteINSERTQ], (instrs INSERTQ, INSERTQI)>; + +//////////////////////////////////////////////////////////////////////////////// +// AVX instructions. +//////////////////////////////////////////////////////////////////////////////// + +def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { + let Latency = 6; + let ResourceCycles = [1, 2, 4]; + let NumMicroOps = 2; +} +def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, + VBROADCASTSSYrm)>; + +def JWriteJVZEROALL: SchedWriteRes<[]> { + let Latency = 90; + let NumMicroOps = 73; +} +def : InstRW<[JWriteJVZEROALL], (instrs VZEROALL)>; + +def JWriteJVZEROUPPER: SchedWriteRes<[]> { + let Latency = 46; + let NumMicroOps = 37; +} +def : InstRW<[JWriteJVZEROUPPER], (instrs VZEROUPPER)>; + +/////////////////////////////////////////////////////////////////////////////// +// SchedWriteVariant definitions. +/////////////////////////////////////////////////////////////////////////////// + +def JWriteZeroLatency : SchedWriteRes<[]> { + let Latency = 0; +} + +// Certain instructions that use the same register for both source +// operands do not have a real dependency on the previous contents of the +// register, and thus, do not have to wait before completing. They can be +// optimized out at register renaming stage. +// Reference: Section 10.8 of the "Software Optimization Guide for AMD Family +// 15h Processors". +// Reference: Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs", +// Section 21.8 [Dependency-breaking instructions]. + +def JWriteZeroIdiom : SchedWriteVariant<[ + SchedVar, [JWriteZeroLatency]>, + SchedVar, [WriteALU]> +]>; +def : InstRW<[JWriteZeroIdiom], (instrs SUB32rr, SUB64rr, + XOR32rr, XOR64rr)>; + +def JWriteFZeroIdiom : SchedWriteVariant<[ + SchedVar, [JWriteZeroLatency]>, + SchedVar, [WriteFLogic]> +]>; +def : InstRW<[JWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, VXORPDrr, + ANDNPSrr, VANDNPSrr, + ANDNPDrr, VANDNPDrr)>; + +def JWriteVZeroIdiomLogic : SchedWriteVariant<[ + SchedVar, [JWriteZeroLatency]>, + SchedVar, [WriteVecLogic]> +]>; +def : InstRW<[JWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>; + +def JWriteVZeroIdiomLogicX : SchedWriteVariant<[ + SchedVar, [JWriteZeroLatency]>, + SchedVar, [WriteVecLogicX]> +]>; +def : InstRW<[JWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr, + PANDNrr, VPANDNrr)>; + +def JWriteVZeroIdiomALU : SchedWriteVariant<[ + SchedVar, [JWriteZeroLatency]>, + SchedVar, [WriteVecALU]> +]>; +def : InstRW<[JWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr, + MMX_PSUBQirr, MMX_PSUBWirr, + MMX_PCMPGTBirr, MMX_PCMPGTDirr, + MMX_PCMPGTWirr)>; + +def JWriteVZeroIdiomALUX : SchedWriteVariant<[ + SchedVar, [JWriteZeroLatency]>, + SchedVar, [WriteVecALUX]> +]>; +def : InstRW<[JWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr, + PSUBDrr, VPSUBDrr, + PSUBQrr, VPSUBQrr, + PSUBWrr, VPSUBWrr, + PCMPGTBrr, VPCMPGTBrr, + PCMPGTDrr, VPCMPGTDrr, + PCMPGTQrr, VPCMPGTQrr, + PCMPGTWrr, VPCMPGTWrr)>; + +// This write is used for slow LEA instructions. +def JWrite3OpsLEA : SchedWriteRes<[JALU1, JSAGU]> { + let Latency = 2; +} + +// On Jaguar, a slow LEA is either a 3Ops LEA (base, index, offset), or an LEA +// with a `Scale` value different than 1. +def JSlowLEAPredicate : MCSchedPredicate< + CheckAny<[ + // A 3-operand LEA (base, index, offset). + IsThreeOperandsLEAFn, + // An LEA with a "Scale" different than 1. + CheckAll<[ + CheckIsImmOperand<2>, + CheckNot> + ]> + ]> +>; + +def JWriteLEA : SchedWriteVariant<[ + SchedVar, + SchedVar, [WriteLEA]> +]>; + +def : InstRW<[JWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>; + +def JSlowLEA16r : SchedWriteRes<[JALU01]> { + let Latency = 3; + let ResourceCycles = [4]; +} + +def : InstRW<[JSlowLEA16r], (instrs LEA16r)>; + +} // SchedModel -- cgit 1.2.3-korg