aboutsummaryrefslogtreecommitdiffstats
path: root/capstone/suite/synctools/tablegen/PPC/P9InstrResources.td
diff options
context:
space:
mode:
Diffstat (limited to 'capstone/suite/synctools/tablegen/PPC/P9InstrResources.td')
-rw-r--r--capstone/suite/synctools/tablegen/PPC/P9InstrResources.td1420
1 files changed, 1420 insertions, 0 deletions
diff --git a/capstone/suite/synctools/tablegen/PPC/P9InstrResources.td b/capstone/suite/synctools/tablegen/PPC/P9InstrResources.td
new file mode 100644
index 000000000..c6cbb9037
--- /dev/null
+++ b/capstone/suite/synctools/tablegen/PPC/P9InstrResources.td
@@ -0,0 +1,1420 @@
+//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the resources required by P9 instructions. This is part
+// P9 processor model used for instruction scheduling. This file should contain
+// all of the instructions that may be used on Power 9. This is not just
+// instructions that are new on Power 9 but also instructions that were
+// available on earlier architectures and are still used in Power 9.
+//
+// The makeup of the P9 CPU is modeled as follows:
+// - Each CPU is made up of two superslices.
+// - Each superslice is made up of two slices. Therefore, there are 4 slices
+// for each CPU.
+// - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
+// - Each CPU has:
+// - One CY (Crypto) unit P9_CY_*
+// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
+// - Two PM (Permute) units. One on each superslice. P9_PM_*
+// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
+// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
+// - Four DP (Floating Point) units. One on each slice. P9_DP_*
+// This also includes fixed point multiply add.
+// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
+// - Four Load/Store Queues. P9_LS_*
+// - Each set of instructions will require a number of these resources.
+//===----------------------------------------------------------------------===//
+
+// Two cycle ALU vector operation that uses an entire superslice.
+// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
+// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
+def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
+ DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "VADDU(B|H|W|D)M$"),
+ (instregex "VAND(C)?$"),
+ (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
+ (instregex "V_SET0(B|H)?$"),
+ (instregex "VS(R|L)(B|H|W|D)$"),
+ (instregex "VSUBU(B|H|W|D)M$"),
+ (instregex "VPOPCNT(B|H)$"),
+ (instregex "VRL(B|H|W|D)$"),
+ (instregex "VSRA(B|H|W|D)$"),
+ (instregex "XV(N)?ABS(D|S)P$"),
+ (instregex "XVCPSGN(D|S)P$"),
+ (instregex "XV(I|X)EXP(D|S)P$"),
+ (instregex "VRL(D|W)(MI|NM)$"),
+ (instregex "VMRG(E|O)W$"),
+ MTVSRDD,
+ VEQV,
+ VNAND,
+ VNEGD,
+ VNEGW,
+ VNOR,
+ VOR,
+ VORC,
+ VSEL,
+ VXOR,
+ XVNEGDP,
+ XVNEGSP,
+ XXLAND,
+ XXLANDC,
+ XXLEQV,
+ XXLNAND,
+ XXLNOR,
+ XXLOR,
+ XXLORf,
+ XXLORC,
+ XXLXOR,
+ XXLXORdpz,
+ XXLXORspz,
+ XXLXORz,
+ XXSEL,
+ XSABSQP,
+ XSCPSGNQP,
+ XSIEXPQP,
+ XSNABSQP,
+ XSNEGQP,
+ XSXEXPQP
+)>;
+
+// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
+// slingle slice. However, since it is Restricted it requires all 3 dispatches
+// (DISP) for that superslice.
+def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "TABORT(D|W)C(I)?$"),
+ (instregex "MTFSB(0|1)$"),
+ (instregex "MFFSC(D)?RN(I)?$"),
+ (instregex "CMPRB(8)?$"),
+ (instregex "TD(I)?$"),
+ (instregex "TW(I)?$"),
+ (instregex "FCMPU(S|D)$"),
+ (instregex "XSTSTDC(S|D)P$"),
+ FTDIV,
+ FTSQRT,
+ CMPEQB
+)>;
+
+// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
+def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "XSMAX(C|J)?DP$"),
+ (instregex "XSMIN(C|J)?DP$"),
+ (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
+ (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"),
+ (instregex "POPCNT(D|W)$"),
+ (instregex "CMPB(8)?$"),
+ XSTDIVDP,
+ XSTSQRTDP,
+ XSXSIGDP,
+ XSCVSPDPN,
+ SETB,
+ BPERMD
+)>;
+
+// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "S(L|R)D$"),
+ (instregex "SRAD(I)?$"),
+ (instregex "EXTSWSLI$"),
+ (instregex "MFV(S)?RD$"),
+ (instregex "MTVSRD$"),
+ (instregex "MTVSRW(A|Z)$"),
+ (instregex "CMP(WI|LWI|W|LW)(8)?$"),
+ (instregex "CMP(L)?D(I)?$"),
+ (instregex "SUBF(I)?C(8)?$"),
+ (instregex "ANDI(S)?o(8)?$"),
+ (instregex "ADDC(8)?$"),
+ (instregex "ADDIC(8)?(o)?$"),
+ (instregex "ADD(8|4)(o)?$"),
+ (instregex "ADD(E|ME|ZE)(8)?(o)?$"),
+ (instregex "SUBF(E|ME|ZE)?(8)?(o)?$"),
+ (instregex "NEG(8)?(o)?$"),
+ (instregex "POPCNTB$"),
+ (instregex "ADD(I|IS)?(8)?$"),
+ (instregex "LI(S)?(8)?$"),
+ (instregex "(X)?OR(I|IS)?(8)?(o)?$"),
+ (instregex "NAND(8)?(o)?$"),
+ (instregex "AND(C)?(8)?(o)?$"),
+ (instregex "NOR(8)?(o)?$"),
+ (instregex "OR(C)?(8)?(o)?$"),
+ (instregex "EQV(8)?(o)?$"),
+ (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"),
+ (instregex "ADD(4|8)(TLS)?(_)?$"),
+ (instregex "NEG(8)?$"),
+ (instregex "ADDI(S)?toc(HA|L)$"),
+ COPY,
+ MCRF,
+ MCRXRX,
+ XSNABSDP,
+ XSXEXPDP,
+ XSABSDP,
+ XSNEGDP,
+ XSCPSGNDP,
+ MFVSRWZ,
+ SRADI_32,
+ RLDIC,
+ RFEBB,
+ LA,
+ TBEGIN,
+ TRECHKPT,
+ NOP,
+ WAIT
+)>;
+
+// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
+// slingle slice. However, since it is Restricted it requires all 3 dispatches
+// (DISP) for that superslice.
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "RLDC(L|R)$"),
+ (instregex "RLWIMI(8)?$"),
+ (instregex "RLDIC(L|R)(_32)?(_64)?$"),
+ (instregex "M(F|T)OCRF(8)?$"),
+ (instregex "CR(6)?(UN)?SET$"),
+ (instregex "CR(N)?(OR|AND)(C)?$"),
+ (instregex "S(L|R)W(8)?$"),
+ (instregex "RLW(INM|NM)(8)?$"),
+ (instregex "F(N)?ABS(D|S)$"),
+ (instregex "FNEG(D|S)$"),
+ (instregex "FCPSGN(D|S)$"),
+ (instregex "SRAW(I)?$"),
+ (instregex "ISEL(8)?$"),
+ RLDIMI,
+ XSIEXPDP,
+ FMR,
+ CREQV,
+ CRXOR,
+ TRECLAIM,
+ TSR,
+ TABORT
+)>;
+
+// Three cycle ALU vector operation that uses an entire superslice.
+// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
+// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
+def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
+ DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "M(T|F)VSCR$"),
+ (instregex "VCMPNEZ(B|H|W)$"),
+ (instregex "VCMPEQU(B|H|W|D)$"),
+ (instregex "VCMPNE(B|H|W)$"),
+ (instregex "VABSDU(B|H|W)$"),
+ (instregex "VADDU(B|H|W)S$"),
+ (instregex "VAVG(S|U)(B|H|W)$"),
+ (instregex "VCMP(EQ|GE|GT)FP(o)?$"),
+ (instregex "VCMPBFP(o)?$"),
+ (instregex "VC(L|T)Z(B|H|W|D)$"),
+ (instregex "VADDS(B|H|W)S$"),
+ (instregex "V(MIN|MAX)FP$"),
+ (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
+ VBPERMD,
+ VADDCUW,
+ VPOPCNTW,
+ VPOPCNTD,
+ VPRTYBD,
+ VPRTYBW,
+ VSHASIGMAD,
+ VSHASIGMAW,
+ VSUBSBS,
+ VSUBSHS,
+ VSUBSWS,
+ VSUBUBS,
+ VSUBUHS,
+ VSUBUWS,
+ VSUBCUW,
+ VCMPGTSB,
+ VCMPGTSBo,
+ VCMPGTSD,
+ VCMPGTSDo,
+ VCMPGTSH,
+ VCMPGTSHo,
+ VCMPGTSW,
+ VCMPGTSWo,
+ VCMPGTUB,
+ VCMPGTUBo,
+ VCMPGTUD,
+ VCMPGTUDo,
+ VCMPGTUH,
+ VCMPGTUHo,
+ VCMPGTUW,
+ VCMPGTUWo,
+ VCMPNEBo,
+ VCMPNEHo,
+ VCMPNEWo,
+ VCMPNEZBo,
+ VCMPNEZHo,
+ VCMPNEZWo,
+ VCMPEQUBo,
+ VCMPEQUDo,
+ VCMPEQUHo,
+ VCMPEQUWo,
+ XVCMPEQDP,
+ XVCMPEQDPo,
+ XVCMPEQSP,
+ XVCMPEQSPo,
+ XVCMPGEDP,
+ XVCMPGEDPo,
+ XVCMPGESP,
+ XVCMPGESPo,
+ XVCMPGTDP,
+ XVCMPGTDPo,
+ XVCMPGTSP,
+ XVCMPGTSPo,
+ XVMAXDP,
+ XVMAXSP,
+ XVMINDP,
+ XVMINSP,
+ XVTDIVDP,
+ XVTDIVSP,
+ XVTSQRTDP,
+ XVTSQRTSP,
+ XVTSTDCDP,
+ XVTSTDCSP,
+ XVXSIGDP,
+ XVXSIGSP
+)>;
+
+// 7 cycle DP vector operation that uses an entire superslice.
+// Uses both DP units (the even DPE and odd DPO units), two pipelines
+// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
+def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
+ DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ VADDFP,
+ VCTSXS,
+ VCTSXS_0,
+ VCTUXS,
+ VCTUXS_0,
+ VEXPTEFP,
+ VLOGEFP,
+ VMADDFP,
+ VMHADDSHS,
+ VNMSUBFP,
+ VREFP,
+ VRFIM,
+ VRFIN,
+ VRFIP,
+ VRFIZ,
+ VRSQRTEFP,
+ VSUBFP,
+ XVADDDP,
+ XVADDSP,
+ XVCVDPSP,
+ XVCVDPSXDS,
+ XVCVDPSXWS,
+ XVCVDPUXDS,
+ XVCVDPUXWS,
+ XVCVHPSP,
+ XVCVSPDP,
+ XVCVSPHP,
+ XVCVSPSXDS,
+ XVCVSPSXWS,
+ XVCVSPUXDS,
+ XVCVSPUXWS,
+ XVCVSXDDP,
+ XVCVSXDSP,
+ XVCVSXWDP,
+ XVCVSXWSP,
+ XVCVUXDDP,
+ XVCVUXDSP,
+ XVCVUXWDP,
+ XVCVUXWSP,
+ XVMADDADP,
+ XVMADDASP,
+ XVMADDMDP,
+ XVMADDMSP,
+ XVMSUBADP,
+ XVMSUBASP,
+ XVMSUBMDP,
+ XVMSUBMSP,
+ XVMULDP,
+ XVMULSP,
+ XVNMADDADP,
+ XVNMADDASP,
+ XVNMADDMDP,
+ XVNMADDMSP,
+ XVNMSUBADP,
+ XVNMSUBASP,
+ XVNMSUBMDP,
+ XVNMSUBMSP,
+ XVRDPI,
+ XVRDPIC,
+ XVRDPIM,
+ XVRDPIP,
+ XVRDPIZ,
+ XVREDP,
+ XVRESP,
+ XVRSPI,
+ XVRSPIC,
+ XVRSPIM,
+ XVRSPIP,
+ XVRSPIZ,
+ XVRSQRTEDP,
+ XVRSQRTESP,
+ XVSUBDP,
+ XVSUBSP,
+ VCFSX,
+ VCFSX_0,
+ VCFUX,
+ VCFUX_0,
+ VMHRADDSHS,
+ VMLADDUHM,
+ VMSUMMBM,
+ VMSUMSHM,
+ VMSUMSHS,
+ VMSUMUBM,
+ VMSUMUHM,
+ VMSUMUHS,
+ VMULESB,
+ VMULESH,
+ VMULESW,
+ VMULEUB,
+ VMULEUH,
+ VMULEUW,
+ VMULOSB,
+ VMULOSH,
+ VMULOSW,
+ VMULOUB,
+ VMULOUH,
+ VMULOUW,
+ VMULUWM,
+ VSUM2SWS,
+ VSUM4SBS,
+ VSUM4SHS,
+ VSUM4UBS,
+ VSUMSWS
+)>;
+
+
+// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
+// dispatch units for the superslice.
+def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "MADD(HD|HDU|LD)$"),
+ (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$")
+)>;
+
+// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
+// dispatch units for the superslice.
+def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ FRSP,
+ (instregex "FRI(N|P|Z|M)(D|S)$"),
+ (instregex "FRE(S)?$"),
+ (instregex "FADD(S)?$"),
+ (instregex "FMSUB(S)?$"),
+ (instregex "FMADD(S)?$"),
+ (instregex "FSUB(S)?$"),
+ (instregex "FCFID(U)?(S)?$"),
+ (instregex "FCTID(U)?(Z)?$"),
+ (instregex "FCTIW(U)?(Z)?$"),
+ (instregex "FRSQRTE(S)?$"),
+ FNMADDS,
+ FNMADD,
+ FNMSUBS,
+ FNMSUB,
+ FSELD,
+ FSELS,
+ FMULS,
+ FMUL,
+ XSMADDADP,
+ XSMADDASP,
+ XSMADDMDP,
+ XSMADDMSP,
+ XSMSUBADP,
+ XSMSUBASP,
+ XSMSUBMDP,
+ XSMSUBMSP,
+ XSMULDP,
+ XSMULSP,
+ XSNMADDADP,
+ XSNMADDASP,
+ XSNMADDMDP,
+ XSNMADDMSP,
+ XSNMSUBADP,
+ XSNMSUBASP,
+ XSNMSUBMDP,
+ XSNMSUBMSP
+)>;
+
+// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
+// These operations can be done in parallel.
+// The DP is restricted so we need a full 5 dispatches.
+def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "FSEL(D|S)o$")
+)>;
+
+// 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
+def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "MUL(H|L)(D|W)(U)?o$")
+)>;
+
+// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
+// These operations must be done sequentially.
+// The DP is restricted so we need a full 5 dispatches.
+def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "FRI(N|P|Z|M)(D|S)o$"),
+ (instregex "FRE(S)?o$"),
+ (instregex "FADD(S)?o$"),
+ (instregex "FSUB(S)?o$"),
+ (instregex "F(N)?MSUB(S)?o$"),
+ (instregex "F(N)?MADD(S)?o$"),
+ (instregex "FCFID(U)?(S)?o$"),
+ (instregex "FCTID(U)?(Z)?o$"),
+ (instregex "FCTIW(U)?(Z)?o$"),
+ (instregex "FMUL(S)?o$"),
+ (instregex "FRSQRTE(S)?o$"),
+ FRSPo
+)>;
+
+// 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
+def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
+ (instrs
+ XSADDDP,
+ XSADDSP,
+ XSCVDPHP,
+ XSCVDPSP,
+ XSCVDPSXDS,
+ XSCVDPSXDSs,
+ XSCVDPSXWS,
+ XSCVDPUXDS,
+ XSCVDPUXDSs,
+ XSCVDPUXWS,
+ XSCVDPSXWSs,
+ XSCVDPUXWSs,
+ XSCVHPDP,
+ XSCVSPDP,
+ XSCVSXDDP,
+ XSCVSXDSP,
+ XSCVUXDDP,
+ XSCVUXDSP,
+ XSRDPI,
+ XSRDPIC,
+ XSRDPIM,
+ XSRDPIP,
+ XSRDPIZ,
+ XSREDP,
+ XSRESP,
+ XSRSQRTEDP,
+ XSRSQRTESP,
+ XSSUBDP,
+ XSSUBSP,
+ XSCVDPSPN,
+ XSRSP
+)>;
+
+// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "LVS(L|R)$"),
+ (instregex "VSPLTIS(W|H|B)$"),
+ (instregex "VSPLT(W|H|B)(s)?$"),
+ (instregex "V_SETALLONES(B|H)?$"),
+ (instregex "VEXTRACTU(B|H|W)$"),
+ (instregex "VINSERT(B|H|W|D)$"),
+ MFVSRLD,
+ MTVSRWS,
+ VBPERMQ,
+ VCLZLSBB,
+ VCTZLSBB,
+ VEXTRACTD,
+ VEXTUBLX,
+ VEXTUBRX,
+ VEXTUHLX,
+ VEXTUHRX,
+ VEXTUWLX,
+ VEXTUWRX,
+ VGBBD,
+ VMRGHB,
+ VMRGHH,
+ VMRGHW,
+ VMRGLB,
+ VMRGLH,
+ VMRGLW,
+ VPERM,
+ VPERMR,
+ VPERMXOR,
+ VPKPX,
+ VPKSDSS,
+ VPKSDUS,
+ VPKSHSS,
+ VPKSHUS,
+ VPKSWSS,
+ VPKSWUS,
+ VPKUDUM,
+ VPKUDUS,
+ VPKUHUM,
+ VPKUHUS,
+ VPKUWUM,
+ VPKUWUS,
+ VPRTYBQ,
+ VSL,
+ VSLDOI,
+ VSLO,
+ VSLV,
+ VSR,
+ VSRO,
+ VSRV,
+ VUPKHPX,
+ VUPKHSB,
+ VUPKHSH,
+ VUPKHSW,
+ VUPKLPX,
+ VUPKLSB,
+ VUPKLSH,
+ VUPKLSW,
+ XXBRD,
+ XXBRH,
+ XXBRQ,
+ XXBRW,
+ XXEXTRACTUW,
+ XXINSERTW,
+ XXMRGHW,
+ XXMRGLW,
+ XXPERM,
+ XXPERMR,
+ XXSLDWI,
+ XXSLDWIs,
+ XXSPLTIB,
+ XXSPLTW,
+ XXSPLTWs,
+ XXPERMDI,
+ XXPERMDIs,
+ VADDCUQ,
+ VADDECUQ,
+ VADDEUQM,
+ VADDUQM,
+ VMUL10CUQ,
+ VMUL10ECUQ,
+ VMUL10EUQ,
+ VMUL10UQ,
+ VSUBCUQ,
+ VSUBECUQ,
+ VSUBEUQM,
+ VSUBUQM,
+ XSCMPEXPQP,
+ XSCMPOQP,
+ XSCMPUQP,
+ XSTSTDCQP,
+ XSXSIGQP,
+ BCDCFNo,
+ BCDCFZo,
+ BCDCPSGNo,
+ BCDCTNo,
+ BCDCTZo,
+ BCDSETSGNo,
+ BCDSo,
+ BCDTRUNCo,
+ BCDUSo,
+ BCDUTRUNCo
+)>;
+
+// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ BCDSRo,
+ XSADDQP,
+ XSADDQPO,
+ XSCVDPQP,
+ XSCVQPDP,
+ XSCVQPDPO,
+ XSCVQPSDZ,
+ XSCVQPSWZ,
+ XSCVQPUDZ,
+ XSCVQPUWZ,
+ XSCVSDQP,
+ XSCVUDQP,
+ XSRQPI,
+ XSRQPIX,
+ XSRQPXP,
+ XSSUBQP,
+ XSSUBQPO
+)>;
+
+// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ BCDCTSQo
+)>;
+
+// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ XSMADDQP,
+ XSMADDQPO,
+ XSMSUBQP,
+ XSMSUBQPO,
+ XSMULQP,
+ XSMULQPO,
+ XSNMADDQP,
+ XSNMADDQPO,
+ XSNMSUBQP,
+ XSNMSUBQPO
+)>;
+
+// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ BCDCFSQo
+)>;
+
+// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ XSDIVQP,
+ XSDIVQPO
+)>;
+
+// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ XSSQRTQP,
+ XSSQRTQPO
+)>;
+
+// 6 Cycle Load uses a single slice.
+def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "LXVL(L)?")
+)>;
+
+// 5 Cycle Load uses a single slice.
+def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "LVE(B|H|W)X$"),
+ (instregex "LVX(L)?"),
+ (instregex "LXSI(B|H)ZX$"),
+ LXSDX,
+ LXVB16X,
+ LXVD2X,
+ LXVWSX,
+ LXSIWZX,
+ LXV,
+ LXVX,
+ LXSD,
+ DFLOADf64,
+ XFLOADf64,
+ LIWZX
+)>;
+
+// 4 Cycle Load uses a single slice.
+def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "DCB(F|T|ST)(EP)?$"),
+ (instregex "DCBZ(L)?(EP)?$"),
+ (instregex "DCBTST(EP)?$"),
+ (instregex "CP_COPY(8)?$"),
+ (instregex "CP_PASTE(8)?$"),
+ (instregex "ICBI(EP)?$"),
+ (instregex "ICBT(LS)?$"),
+ (instregex "LBARX(L)?$"),
+ (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"),
+ (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"),
+ (instregex "LH(A|B)RX(L)?(8)?$"),
+ (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
+ (instregex "LWARX(L)?$"),
+ (instregex "LWBRX(8)?$"),
+ (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
+ CP_ABORT,
+ DARN,
+ EnforceIEIO,
+ ISYNC,
+ MSGSYNC,
+ TLBSYNC,
+ SYNC,
+ LMW,
+ LSWI
+)>;
+
+// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
+// superslice.
+def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ LFIWZX,
+ LFDX,
+ LFD
+)>;
+
+// Cracked Load Instructions.
+// Load instructions that can be done in parallel.
+def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ SLBIA,
+ SLBIE,
+ SLBMFEE,
+ SLBMFEV,
+ SLBMTE,
+ TLBIEL
+)>;
+
+// Cracked Load Instruction.
+// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
+// operations can be run in parallel.
+def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "L(W|H)ZU(X)?(8)?$"),
+ TEND
+)>;
+
+// Cracked Store Instruction
+// Consecutive Store and ALU instructions. The store is restricted and requires
+// three dispatches.
+def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "ST(B|H|W|D)CX$")
+)>;
+
+// Cracked Load Instruction.
+// Two consecutive load operations for a total of 8 cycles.
+def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ LDMX
+)>;
+
+// Cracked Load instruction.
+// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
+// operations cannot be done at the same time and so their latencies are added.
+def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "LHA(X)?(8)?$"),
+ (instregex "CP_PASTE(8)?o$"),
+ (instregex "LWA(X)?(_32)?$"),
+ TCHECK
+)>;
+
+// Cracked Restricted Load instruction.
+// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
+// operations cannot be done at the same time and so their latencies are added.
+// Full 6 dispatches are required as this is both cracked and restricted.
+def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ LFIWAX
+)>;
+
+// Cracked Load instruction.
+// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
+// operations cannot be done at the same time and so their latencies are added.
+// Full 4 dispatches are required as this is a cracked instruction.
+def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ LXSIWAX,
+ LIWAX
+)>;
+
+// Cracked Load instruction.
+// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
+// cycles. The Load and ALU operations cannot be done at the same time and so
+// their latencies are added.
+// Full 6 dispatches are required as this is a restricted instruction.
+def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ LFSX,
+ LFS
+)>;
+
+// Cracked Load instruction.
+// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
+// operations cannot be done at the same time and so their latencies are added.
+// Full 4 dispatches are required as this is a cracked instruction.
+def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ LXSSP,
+ LXSSPX,
+ XFLOADf32,
+ DFLOADf32
+)>;
+
+// Cracked 3-Way Load Instruction
+// Load with two ALU operations that depend on each other
+def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "LHAU(X)?(8)?$"),
+ LWAUX
+)>;
+
+// Cracked Load that requires the PM resource.
+// Since the Load and the PM cannot be done at the same time the latencies are
+// added. Requires 8 cycles.
+// Since the PM requires the full superslice we need both EXECE, EXECO pipelines
+// as well as 3 dispatches for the PM. The Load requires the remaining 2
+// dispatches.
+def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ LXVH8X,
+ LXVDSX,
+ LXVW4X
+)>;
+
+// Single slice Restricted store operation. The restricted operation requires
+// all three dispatches for the superslice.
+def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "STF(S|D|IWX|SX|DX)$"),
+ (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
+ (instregex "STW(8)?$"),
+ (instregex "(D|X)FSTORE(f32|f64)$"),
+ (instregex "ST(W|H|D)BRX$"),
+ (instregex "ST(B|H|D)(8)?$"),
+ (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"),
+ STIWX,
+ SLBIEG,
+ STMW,
+ STSWI,
+ TLBIE
+)>;
+
+// Vector Store Instruction
+// Requires the whole superslice and therefore requires all three dispatches
+// as well as both the Even and Odd exec pipelines.
+def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
+ DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "STVE(B|H|W)X$"),
+ (instregex "STVX(L)?$"),
+ (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
+)>;
+
+// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "MTCTR(8)?(loop)?$"),
+ (instregex "MTLR(8)?$")
+)>;
+
+// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "M(T|F)VRSAVE(v)?$"),
+ (instregex "M(T|F)PMR$"),
+ (instregex "M(T|F)TB(8)?$"),
+ (instregex "MF(SPR|CTR|LR)(8)?$"),
+ (instregex "M(T|F)MSR(D)?$"),
+ (instregex "MTSPR(8)?$")
+)>;
+
+// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
+ DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ DIVW,
+ DIVWU,
+ MODSW
+)>;
+
+// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
+ DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ DIVWE,
+ DIVD,
+ DIVWEU,
+ DIVDU,
+ MODSD,
+ MODUD,
+ MODUW
+)>;
+
+// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
+ DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ DIVDE,
+ DIVDEU
+)>;
+
+// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
+// and one full superslice for the DIV operation since there is only one DIV
+// per superslice. Latency of DIV plus ALU is 26.
+def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "DIVW(U)?(O)?o$")
+)>;
+
+// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
+// and one full superslice for the DIV operation since there is only one DIV
+// per superslice. Latency of DIV plus ALU is 26.
+def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ DIVDo,
+ DIVDUo,
+ DIVWEo,
+ DIVWEUo
+)>;
+
+// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
+// and one full superslice for the DIV operation since there is only one DIV
+// per superslice. Latency of DIV plus ALU is 42.
+def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ DIVDEo,
+ DIVDEUo
+)>;
+
+// CR access instructions in _BrMCR, IIC_BrMCRX.
+
+// Cracked, restricted, ALU operations.
+// Here the two ALU ops can actually be done in parallel and therefore the
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 6 dispatches.
+// ALU ops are 2 cycles each.
+def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ MTCRF,
+ MTCRF8
+)>;
+
+// Cracked ALU operations.
+// Here the two ALU ops can actually be done in parallel and therefore the
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 4 dispatches.
+// ALU ops are 2 cycles each.
+def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "ADDC(8)?o$"),
+ (instregex "SUBFC(8)?o$")
+)>;
+
+// Cracked ALU operations.
+// Two ALU ops can be done in parallel.
+// One is three cycle ALU the ohter is a two cycle ALU.
+// One of the ALU ops is restricted the other is not so we have a total of
+// 5 dispatches.
+def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "F(N)?ABS(D|S)o$"),
+ (instregex "FCPSGN(D|S)o$"),
+ (instregex "FNEG(D|S)o$"),
+ FMRo
+)>;
+
+// Cracked ALU operations.
+// Here the two ALU ops can actually be done in parallel and therefore the
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 4 dispatches.
+// ALU ops are 3 cycles each.
+def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ MCRFS
+)>;
+
+// Cracked Restricted ALU operations.
+// Here the two ALU ops can actually be done in parallel and therefore the
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 6 dispatches.
+// ALU ops are 3 cycles each.
+def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "MTFSF(b|o)?$"),
+ (instregex "MTFSFI(o)?$")
+)>;
+
+// Cracked instruction made of two ALU ops.
+// The two ops cannot be done in parallel.
+// One of the ALU ops is restricted and takes 3 dispatches.
+def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "RLD(I)?C(R|L)o$"),
+ (instregex "RLW(IMI|INM|NM)(8)?o$"),
+ (instregex "SLW(8)?o$"),
+ (instregex "SRAW(I)?o$"),
+ (instregex "SRW(8)?o$"),
+ RLDICL_32o,
+ RLDIMIo
+)>;
+
+// Cracked instruction made of two ALU ops.
+// The two ops cannot be done in parallel.
+// Both of the ALU ops are restricted and take 3 dispatches.
+def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "MFFS(L|CE|o)?$")
+)>;
+
+// Cracked ALU instruction composed of three consecutive 2 cycle loads for a
+// total of 6 cycles. All of the ALU operations are also restricted so each
+// takes 3 dispatches for a total of 9.
+def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
+ DISP_1C, DISP_1C],
+ (instrs
+ (instregex "MFCR(8)?$")
+)>;
+
+// Cracked instruction made of two ALU ops.
+// The two ops cannot be done in parallel.
+def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "EXTSWSLIo$"),
+ (instregex "SRAD(I)?o$"),
+ SLDo,
+ SRDo,
+ RLDICo
+)>;
+
+// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
+def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ FDIV
+)>;
+
+// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
+def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ FDIVo
+)>;
+
+// 36 Cycle DP Instruction.
+// Instruction can be done on a single slice.
+def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C],
+ (instrs
+ XSSQRTDP
+)>;
+
+// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
+def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ FSQRT
+)>;
+
+// 36 Cycle DP Vector Instruction.
+def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
+ DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ XVSQRTDP
+)>;
+
+// 27 Cycle DP Vector Instruction.
+def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
+ DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ XVSQRTSP
+)>;
+
+// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
+def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ FSQRTo
+)>;
+
+// 26 Cycle DP Instruction.
+def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
+ (instrs
+ XSSQRTSP
+)>;
+
+// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
+def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ FSQRTS
+)>;
+
+// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
+def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ FSQRTSo
+)>;
+
+// 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
+def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
+ (instrs
+ XSDIVDP
+)>;
+
+// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
+def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ FDIVS
+)>;
+
+// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
+def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ FDIVSo
+)>;
+
+// 22 Cycle DP Instruction. Takes one slice and 2 dispatches.
+def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
+ (instrs
+ XSDIVSP
+)>;
+
+// 24 Cycle DP Vector Instruction. Takes one full superslice.
+// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
+// superslice.
+def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
+ DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ XVDIVSP
+)>;
+
+// 33 Cycle DP Vector Instruction. Takes one full superslice.
+// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
+// superslice.
+def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
+ DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ XVDIVDP
+)>;
+
+// Instruction cracked into three pieces. One Load and two ALU operations.
+// The Load and one of the ALU ops cannot be run at the same time and so the
+// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
+// Both the load and the ALU that depends on it are restricted and so they take
+// a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
+// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
+def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
+ IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "LF(SU|SUX)$")
+)>;
+
+// Cracked instruction made up of a Store and an ALU. The ALU does not depend on
+// the store and so it can be run at the same time as the store. The store is
+// also restricted.
+def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "STF(S|D)U(X)?$"),
+ (instregex "ST(B|H|W|D)U(X)?(8)?$")
+)>;
+
+// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
+// the load and so it can be run at the same time as the load.
+def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "LBZU(X)?(8)?$"),
+ (instregex "LDU(X)?$")
+)>;
+
+
+// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
+// the load and so it can be run at the same time as the load. The load is also
+// restricted. 3 dispatches are from the restricted load while the other two
+// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
+// is required for the ALU.
+def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "LF(DU|DUX)$")
+)>;
+
+// Crypto Instructions
+
+// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "VPMSUM(B|H|W|D)$"),
+ (instregex "V(N)?CIPHER(LAST)?$"),
+ VSBOX
+)>;
+
+// Branch Instructions
+
+// Two Cycle Branch
+def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C],
+ (instrs
+ (instregex "BCCCTR(L)?(8)?$"),
+ (instregex "BCCL(A|R|RL)?$"),
+ (instregex "BCCTR(L)?(8)?(n)?$"),
+ (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
+ (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
+ (instregex "BL(_TLS)?$"),
+ (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
+ (instregex "BLA(8|8_NOP)?$"),
+ (instregex "BLR(8|L)?$"),
+ (instregex "TAILB(A)?(8)?$"),
+ (instregex "TAILBCTR(8)?$"),
+ (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
+ (instregex "BCLR(L)?(n)?$"),
+ (instregex "BCTR(L)?(8)?$"),
+ B,
+ BA,
+ BC,
+ BCC,
+ BCCA,
+ BCL,
+ BCLalways,
+ BCLn,
+ BCTRL8_LDinto_toc,
+ BCn,
+ CTRL_DEP
+)>;
+
+// Five Cycle Branch with a 2 Cycle ALU Op
+// Operations must be done consecutively and not in parallel.
+def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ (instrs
+ ADDPCIS
+)>;
+
+// Special Extracted Instructions For Atomics
+
+// Atomic Load
+def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
+ IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
+ IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
+ DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
+ DISP_1C],
+ (instrs
+ (instregex "L(D|W)AT$")
+)>;
+
+// Atomic Store
+def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
+ IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
+ DISP_1C],
+ (instrs
+ (instregex "ST(D|W)AT$")
+)>;
+
+// Signal Processing Engine (SPE) Instructions
+// These instructions are not supported on Power 9
+def : InstRW<[],
+ (instrs
+ BRINC,
+ EVABS,
+ EVEQV,
+ EVMRA,
+ EVNAND,
+ EVNEG,
+ (instregex "EVADD(I)?W$"),
+ (instregex "EVADD(SM|SS|UM|US)IAAW$"),
+ (instregex "EVAND(C)?$"),
+ (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
+ (instregex "EVCNTL(S|Z)W$"),
+ (instregex "EVDIVW(S|U)$"),
+ (instregex "EVEXTS(B|H)$"),
+ (instregex "EVLD(H|W|D)(X)?$"),
+ (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
+ (instregex "EVLWHE(X)?$"),
+ (instregex "EVLWHO(S|U)(X)?$"),
+ (instregex "EVLW(H|W)SPLAT(X)?$"),
+ (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
+ (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
+ (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
+ (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
+ (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
+ (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
+ (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
+ (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
+ (instregex "EVMWHUMI(A)?$"),
+ (instregex "EVMWLS(M|S)IA(A|N)W$"),
+ (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
+ (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
+ (instregex "EVMWSSF(A|AA|AN)?$"),
+ (instregex "EVMWUMI(A|AA|AN)?$"),
+ (instregex "EV(N|X)?OR(C)?$"),
+ (instregex "EVR(LW|LWI|NDW)$"),
+ (instregex "EVSLW(I)?$"),
+ (instregex "EVSPLAT(F)?I$"),
+ (instregex "EVSRW(I)?(S|U)$"),
+ (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
+ (instregex "EVSUBF(S|U)(M|S)IAAW$"),
+ (instregex "EVSUB(I)?FW$")
+)> { let Unsupported = 1; }
+
+// General Instructions without scheduling support.
+def : InstRW<[],
+ (instrs
+ (instregex "(H)?RFI(D)?$"),
+ (instregex "DSS(ALL)?$"),
+ (instregex "DST(ST)?(T)?(64)?$"),
+ (instregex "ICBL(C|Q)$"),
+ (instregex "L(W|H|B)EPX$"),
+ (instregex "ST(W|H|B)EPX$"),
+ (instregex "(L|ST)FDEPX$"),
+ (instregex "M(T|F)SR(IN)?$"),
+ (instregex "M(T|F)DCR$"),
+ (instregex "NOP_GT_PWR(6|7)$"),
+ (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
+ (instregex "WRTEE(I)?$"),
+ ATTN,
+ CLRBHRB,
+ MFBHRBE,
+ MBAR,
+ MSYNC,
+ SLBSYNC,
+ NAP,
+ STOP,
+ TRAP,
+ RFCI,
+ RFDI,
+ RFMCI,
+ SC,
+ DCBA,
+ DCBI,
+ DCCCI,
+ ICCCI
+)> { let Unsupported = 1; }