diff options
Diffstat (limited to 'capstone/suite/synctools/tablegen/PPC')
25 files changed, 22553 insertions, 0 deletions
diff --git a/capstone/suite/synctools/tablegen/PPC/P9InstrResources.td b/capstone/suite/synctools/tablegen/PPC/P9InstrResources.td new file mode 100644 index 000000000..c6cbb9037 --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/P9InstrResources.td @@ -0,0 +1,1420 @@ +//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the resources required by P9 instructions. This is part +// P9 processor model used for instruction scheduling. This file should contain +// all of the instructions that may be used on Power 9. This is not just +// instructions that are new on Power 9 but also instructions that were +// available on earlier architectures and are still used in Power 9. +// +// The makeup of the P9 CPU is modeled as follows: +// - Each CPU is made up of two superslices. +// - Each superslice is made up of two slices. Therefore, there are 4 slices +// for each CPU. +// - Up to 6 instructions can be dispatched to each CPU. Three per superslice. +// - Each CPU has: +// - One CY (Crypto) unit P9_CY_* +// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_* +// - Two PM (Permute) units. One on each superslice. P9_PM_* +// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_* +// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_* +// - Four DP (Floating Point) units. One on each slice. P9_DP_* +// This also includes fixed point multiply add. +// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_* +// - Four Load/Store Queues. P9_LS_* +// - Each set of instructions will require a number of these resources. +//===----------------------------------------------------------------------===// + +// Two cycle ALU vector operation that uses an entire superslice. +// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines +// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice. +def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, + DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "VADDU(B|H|W|D)M$"), + (instregex "VAND(C)?$"), + (instregex "VEXTS(B|H|W)2(D|W)(s)?$"), + (instregex "V_SET0(B|H)?$"), + (instregex "VS(R|L)(B|H|W|D)$"), + (instregex "VSUBU(B|H|W|D)M$"), + (instregex "VPOPCNT(B|H)$"), + (instregex "VRL(B|H|W|D)$"), + (instregex "VSRA(B|H|W|D)$"), + (instregex "XV(N)?ABS(D|S)P$"), + (instregex "XVCPSGN(D|S)P$"), + (instregex "XV(I|X)EXP(D|S)P$"), + (instregex "VRL(D|W)(MI|NM)$"), + (instregex "VMRG(E|O)W$"), + MTVSRDD, + VEQV, + VNAND, + VNEGD, + VNEGW, + VNOR, + VOR, + VORC, + VSEL, + VXOR, + XVNEGDP, + XVNEGSP, + XXLAND, + XXLANDC, + XXLEQV, + XXLNAND, + XXLNOR, + XXLOR, + XXLORf, + XXLORC, + XXLXOR, + XXLXORdpz, + XXLXORspz, + XXLXORz, + XXSEL, + XSABSQP, + XSCPSGNQP, + XSIEXPQP, + XSNABSQP, + XSNEGQP, + XSXEXPQP +)>; + +// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a +// slingle slice. However, since it is Restricted it requires all 3 dispatches +// (DISP) for that superslice. +def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "TABORT(D|W)C(I)?$"), + (instregex "MTFSB(0|1)$"), + (instregex "MFFSC(D)?RN(I)?$"), + (instregex "CMPRB(8)?$"), + (instregex "TD(I)?$"), + (instregex "TW(I)?$"), + (instregex "FCMPU(S|D)$"), + (instregex "XSTSTDC(S|D)P$"), + FTDIV, + FTSQRT, + CMPEQB +)>; + +// Standard Dispatch ALU operation for 3 cycles. Only one slice used. +def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C], + (instrs + (instregex "XSMAX(C|J)?DP$"), + (instregex "XSMIN(C|J)?DP$"), + (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"), + (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"), + (instregex "POPCNT(D|W)$"), + (instregex "CMPB(8)?$"), + XSTDIVDP, + XSTSQRTDP, + XSXSIGDP, + XSCVSPDPN, + SETB, + BPERMD +)>; + +// Standard Dispatch ALU operation for 2 cycles. Only one slice used. +def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C], + (instrs + (instregex "S(L|R)D$"), + (instregex "SRAD(I)?$"), + (instregex "EXTSWSLI$"), + (instregex "MFV(S)?RD$"), + (instregex "MTVSRD$"), + (instregex "MTVSRW(A|Z)$"), + (instregex "CMP(WI|LWI|W|LW)(8)?$"), + (instregex "CMP(L)?D(I)?$"), + (instregex "SUBF(I)?C(8)?$"), + (instregex "ANDI(S)?o(8)?$"), + (instregex "ADDC(8)?$"), + (instregex "ADDIC(8)?(o)?$"), + (instregex "ADD(8|4)(o)?$"), + (instregex "ADD(E|ME|ZE)(8)?(o)?$"), + (instregex "SUBF(E|ME|ZE)?(8)?(o)?$"), + (instregex "NEG(8)?(o)?$"), + (instregex "POPCNTB$"), + (instregex "ADD(I|IS)?(8)?$"), + (instregex "LI(S)?(8)?$"), + (instregex "(X)?OR(I|IS)?(8)?(o)?$"), + (instregex "NAND(8)?(o)?$"), + (instregex "AND(C)?(8)?(o)?$"), + (instregex "NOR(8)?(o)?$"), + (instregex "OR(C)?(8)?(o)?$"), + (instregex "EQV(8)?(o)?$"), + (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"), + (instregex "ADD(4|8)(TLS)?(_)?$"), + (instregex "NEG(8)?$"), + (instregex "ADDI(S)?toc(HA|L)$"), + COPY, + MCRF, + MCRXRX, + XSNABSDP, + XSXEXPDP, + XSABSDP, + XSNEGDP, + XSCPSGNDP, + MFVSRWZ, + SRADI_32, + RLDIC, + RFEBB, + LA, + TBEGIN, + TRECHKPT, + NOP, + WAIT +)>; + +// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a +// slingle slice. However, since it is Restricted it requires all 3 dispatches +// (DISP) for that superslice. +def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "RLDC(L|R)$"), + (instregex "RLWIMI(8)?$"), + (instregex "RLDIC(L|R)(_32)?(_64)?$"), + (instregex "M(F|T)OCRF(8)?$"), + (instregex "CR(6)?(UN)?SET$"), + (instregex "CR(N)?(OR|AND)(C)?$"), + (instregex "S(L|R)W(8)?$"), + (instregex "RLW(INM|NM)(8)?$"), + (instregex "F(N)?ABS(D|S)$"), + (instregex "FNEG(D|S)$"), + (instregex "FCPSGN(D|S)$"), + (instregex "SRAW(I)?$"), + (instregex "ISEL(8)?$"), + RLDIMI, + XSIEXPDP, + FMR, + CREQV, + CRXOR, + TRECLAIM, + TSR, + TABORT +)>; + +// Three cycle ALU vector operation that uses an entire superslice. +// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines +// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice. +def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, + DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "M(T|F)VSCR$"), + (instregex "VCMPNEZ(B|H|W)$"), + (instregex "VCMPEQU(B|H|W|D)$"), + (instregex "VCMPNE(B|H|W)$"), + (instregex "VABSDU(B|H|W)$"), + (instregex "VADDU(B|H|W)S$"), + (instregex "VAVG(S|U)(B|H|W)$"), + (instregex "VCMP(EQ|GE|GT)FP(o)?$"), + (instregex "VCMPBFP(o)?$"), + (instregex "VC(L|T)Z(B|H|W|D)$"), + (instregex "VADDS(B|H|W)S$"), + (instregex "V(MIN|MAX)FP$"), + (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"), + VBPERMD, + VADDCUW, + VPOPCNTW, + VPOPCNTD, + VPRTYBD, + VPRTYBW, + VSHASIGMAD, + VSHASIGMAW, + VSUBSBS, + VSUBSHS, + VSUBSWS, + VSUBUBS, + VSUBUHS, + VSUBUWS, + VSUBCUW, + VCMPGTSB, + VCMPGTSBo, + VCMPGTSD, + VCMPGTSDo, + VCMPGTSH, + VCMPGTSHo, + VCMPGTSW, + VCMPGTSWo, + VCMPGTUB, + VCMPGTUBo, + VCMPGTUD, + VCMPGTUDo, + VCMPGTUH, + VCMPGTUHo, + VCMPGTUW, + VCMPGTUWo, + VCMPNEBo, + VCMPNEHo, + VCMPNEWo, + VCMPNEZBo, + VCMPNEZHo, + VCMPNEZWo, + VCMPEQUBo, + VCMPEQUDo, + VCMPEQUHo, + VCMPEQUWo, + XVCMPEQDP, + XVCMPEQDPo, + XVCMPEQSP, + XVCMPEQSPo, + XVCMPGEDP, + XVCMPGEDPo, + XVCMPGESP, + XVCMPGESPo, + XVCMPGTDP, + XVCMPGTDPo, + XVCMPGTSP, + XVCMPGTSPo, + XVMAXDP, + XVMAXSP, + XVMINDP, + XVMINSP, + XVTDIVDP, + XVTDIVSP, + XVTSQRTDP, + XVTSQRTSP, + XVTSTDCDP, + XVTSTDCSP, + XVXSIGDP, + XVXSIGSP +)>; + +// 7 cycle DP vector operation that uses an entire superslice. +// Uses both DP units (the even DPE and odd DPO units), two pipelines +// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice. +def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, + DISP_1C, DISP_1C, DISP_1C], + (instrs + VADDFP, + VCTSXS, + VCTSXS_0, + VCTUXS, + VCTUXS_0, + VEXPTEFP, + VLOGEFP, + VMADDFP, + VMHADDSHS, + VNMSUBFP, + VREFP, + VRFIM, + VRFIN, + VRFIP, + VRFIZ, + VRSQRTEFP, + VSUBFP, + XVADDDP, + XVADDSP, + XVCVDPSP, + XVCVDPSXDS, + XVCVDPSXWS, + XVCVDPUXDS, + XVCVDPUXWS, + XVCVHPSP, + XVCVSPDP, + XVCVSPHP, + XVCVSPSXDS, + XVCVSPSXWS, + XVCVSPUXDS, + XVCVSPUXWS, + XVCVSXDDP, + XVCVSXDSP, + XVCVSXWDP, + XVCVSXWSP, + XVCVUXDDP, + XVCVUXDSP, + XVCVUXWDP, + XVCVUXWSP, + XVMADDADP, + XVMADDASP, + XVMADDMDP, + XVMADDMSP, + XVMSUBADP, + XVMSUBASP, + XVMSUBMDP, + XVMSUBMSP, + XVMULDP, + XVMULSP, + XVNMADDADP, + XVNMADDASP, + XVNMADDMDP, + XVNMADDMSP, + XVNMSUBADP, + XVNMSUBASP, + XVNMSUBMDP, + XVNMSUBMSP, + XVRDPI, + XVRDPIC, + XVRDPIM, + XVRDPIP, + XVRDPIZ, + XVREDP, + XVRESP, + XVRSPI, + XVRSPIC, + XVRSPIM, + XVRSPIP, + XVRSPIZ, + XVRSQRTEDP, + XVRSQRTESP, + XVSUBDP, + XVSUBSP, + VCFSX, + VCFSX_0, + VCFUX, + VCFUX_0, + VMHRADDSHS, + VMLADDUHM, + VMSUMMBM, + VMSUMSHM, + VMSUMSHS, + VMSUMUBM, + VMSUMUHM, + VMSUMUHS, + VMULESB, + VMULESH, + VMULESW, + VMULEUB, + VMULEUH, + VMULEUW, + VMULOSB, + VMULOSH, + VMULOSW, + VMULOUB, + VMULOUH, + VMULOUW, + VMULUWM, + VSUM2SWS, + VSUM4SBS, + VSUM4SHS, + VSUM4UBS, + VSUMSWS +)>; + + +// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three +// dispatch units for the superslice. +def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "MADD(HD|HDU|LD)$"), + (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$") +)>; + +// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three +// dispatch units for the superslice. +def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + FRSP, + (instregex "FRI(N|P|Z|M)(D|S)$"), + (instregex "FRE(S)?$"), + (instregex "FADD(S)?$"), + (instregex "FMSUB(S)?$"), + (instregex "FMADD(S)?$"), + (instregex "FSUB(S)?$"), + (instregex "FCFID(U)?(S)?$"), + (instregex "FCTID(U)?(Z)?$"), + (instregex "FCTIW(U)?(Z)?$"), + (instregex "FRSQRTE(S)?$"), + FNMADDS, + FNMADD, + FNMSUBS, + FNMSUB, + FSELD, + FSELS, + FMULS, + FMUL, + XSMADDADP, + XSMADDASP, + XSMADDMDP, + XSMADDMSP, + XSMSUBADP, + XSMSUBASP, + XSMSUBMDP, + XSMSUBMSP, + XSMULDP, + XSMULSP, + XSNMADDADP, + XSNMADDASP, + XSNMADDMDP, + XSNMADDMSP, + XSNMSUBADP, + XSNMSUBASP, + XSNMSUBMDP, + XSNMSUBMSP +)>; + +// 7 cycle Restricted DP operation and one 3 cycle ALU operation. +// These operations can be done in parallel. +// The DP is restricted so we need a full 5 dispatches. +def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "FSEL(D|S)o$") +)>; + +// 5 Cycle Restricted DP operation and one 2 cycle ALU operation. +def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "MUL(H|L)(D|W)(U)?o$") +)>; + +// 7 cycle Restricted DP operation and one 3 cycle ALU operation. +// These operations must be done sequentially. +// The DP is restricted so we need a full 5 dispatches. +def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "FRI(N|P|Z|M)(D|S)o$"), + (instregex "FRE(S)?o$"), + (instregex "FADD(S)?o$"), + (instregex "FSUB(S)?o$"), + (instregex "F(N)?MSUB(S)?o$"), + (instregex "F(N)?MADD(S)?o$"), + (instregex "FCFID(U)?(S)?o$"), + (instregex "FCTID(U)?(Z)?o$"), + (instregex "FCTIW(U)?(Z)?o$"), + (instregex "FMUL(S)?o$"), + (instregex "FRSQRTE(S)?o$"), + FRSPo +)>; + +// 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units. +def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C], + (instrs + XSADDDP, + XSADDSP, + XSCVDPHP, + XSCVDPSP, + XSCVDPSXDS, + XSCVDPSXDSs, + XSCVDPSXWS, + XSCVDPUXDS, + XSCVDPUXDSs, + XSCVDPUXWS, + XSCVDPSXWSs, + XSCVDPUXWSs, + XSCVHPDP, + XSCVSPDP, + XSCVSXDDP, + XSCVSXDSP, + XSCVUXDDP, + XSCVUXDSP, + XSRDPI, + XSRDPIC, + XSRDPIM, + XSRDPIP, + XSRDPIZ, + XSREDP, + XSRESP, + XSRSQRTEDP, + XSRSQRTESP, + XSSUBDP, + XSSUBSP, + XSCVDPSPN, + XSRSP +)>; + +// Three Cycle PM operation. Only one PM unit per superslice so we use the whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "LVS(L|R)$"), + (instregex "VSPLTIS(W|H|B)$"), + (instregex "VSPLT(W|H|B)(s)?$"), + (instregex "V_SETALLONES(B|H)?$"), + (instregex "VEXTRACTU(B|H|W)$"), + (instregex "VINSERT(B|H|W|D)$"), + MFVSRLD, + MTVSRWS, + VBPERMQ, + VCLZLSBB, + VCTZLSBB, + VEXTRACTD, + VEXTUBLX, + VEXTUBRX, + VEXTUHLX, + VEXTUHRX, + VEXTUWLX, + VEXTUWRX, + VGBBD, + VMRGHB, + VMRGHH, + VMRGHW, + VMRGLB, + VMRGLH, + VMRGLW, + VPERM, + VPERMR, + VPERMXOR, + VPKPX, + VPKSDSS, + VPKSDUS, + VPKSHSS, + VPKSHUS, + VPKSWSS, + VPKSWUS, + VPKUDUM, + VPKUDUS, + VPKUHUM, + VPKUHUS, + VPKUWUM, + VPKUWUS, + VPRTYBQ, + VSL, + VSLDOI, + VSLO, + VSLV, + VSR, + VSRO, + VSRV, + VUPKHPX, + VUPKHSB, + VUPKHSH, + VUPKHSW, + VUPKLPX, + VUPKLSB, + VUPKLSH, + VUPKLSW, + XXBRD, + XXBRH, + XXBRQ, + XXBRW, + XXEXTRACTUW, + XXINSERTW, + XXMRGHW, + XXMRGLW, + XXPERM, + XXPERMR, + XXSLDWI, + XXSLDWIs, + XXSPLTIB, + XXSPLTW, + XXSPLTWs, + XXPERMDI, + XXPERMDIs, + VADDCUQ, + VADDECUQ, + VADDEUQM, + VADDUQM, + VMUL10CUQ, + VMUL10ECUQ, + VMUL10EUQ, + VMUL10UQ, + VSUBCUQ, + VSUBECUQ, + VSUBEUQM, + VSUBUQM, + XSCMPEXPQP, + XSCMPOQP, + XSCMPUQP, + XSTSTDCQP, + XSXSIGQP, + BCDCFNo, + BCDCFZo, + BCDCPSGNo, + BCDCTNo, + BCDCTZo, + BCDSETSGNo, + BCDSo, + BCDTRUNCo, + BCDUSo, + BCDUTRUNCo +)>; + +// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + BCDSRo, + XSADDQP, + XSADDQPO, + XSCVDPQP, + XSCVQPDP, + XSCVQPDPO, + XSCVQPSDZ, + XSCVQPSWZ, + XSCVQPUDZ, + XSCVQPUWZ, + XSCVSDQP, + XSCVUDQP, + XSRQPI, + XSRQPIX, + XSRQPXP, + XSSUBQP, + XSSUBQPO +)>; + +// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + BCDCTSQo +)>; + +// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + XSMADDQP, + XSMADDQPO, + XSMSUBQP, + XSMSUBQPO, + XSMULQP, + XSMULQPO, + XSNMADDQP, + XSNMADDQPO, + XSNMSUBQP, + XSNMSUBQPO +)>; + +// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + BCDCFSQo +)>; + +// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + XSDIVQP, + XSDIVQPO +)>; + +// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + XSSQRTQP, + XSSQRTQPO +)>; + +// 6 Cycle Load uses a single slice. +def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C, DISP_1C], + (instrs + (instregex "LXVL(L)?") +)>; + +// 5 Cycle Load uses a single slice. +def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C], + (instrs + (instregex "LVE(B|H|W)X$"), + (instregex "LVX(L)?"), + (instregex "LXSI(B|H)ZX$"), + LXSDX, + LXVB16X, + LXVD2X, + LXVWSX, + LXSIWZX, + LXV, + LXVX, + LXSD, + DFLOADf64, + XFLOADf64, + LIWZX +)>; + +// 4 Cycle Load uses a single slice. +def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C], + (instrs + (instregex "DCB(F|T|ST)(EP)?$"), + (instregex "DCBZ(L)?(EP)?$"), + (instregex "DCBTST(EP)?$"), + (instregex "CP_COPY(8)?$"), + (instregex "CP_PASTE(8)?$"), + (instregex "ICBI(EP)?$"), + (instregex "ICBT(LS)?$"), + (instregex "LBARX(L)?$"), + (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"), + (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"), + (instregex "LH(A|B)RX(L)?(8)?$"), + (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), + (instregex "LWARX(L)?$"), + (instregex "LWBRX(8)?$"), + (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), + CP_ABORT, + DARN, + EnforceIEIO, + ISYNC, + MSGSYNC, + TLBSYNC, + SYNC, + LMW, + LSWI +)>; + +// 4 Cycle Restricted load uses a single slice but the dispatch for the whole +// superslice. +def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + LFIWZX, + LFDX, + LFD +)>; + +// Cracked Load Instructions. +// Load instructions that can be done in parallel. +def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + SLBIA, + SLBIE, + SLBMFEE, + SLBMFEV, + SLBMTE, + TLBIEL +)>; + +// Cracked Load Instruction. +// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU +// operations can be run in parallel. +def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "L(W|H)ZU(X)?(8)?$"), + TEND +)>; + +// Cracked Store Instruction +// Consecutive Store and ALU instructions. The store is restricted and requires +// three dispatches. +def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "ST(B|H|W|D)CX$") +)>; + +// Cracked Load Instruction. +// Two consecutive load operations for a total of 8 cycles. +def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + LDMX +)>; + +// Cracked Load instruction. +// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU +// operations cannot be done at the same time and so their latencies are added. +def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "LHA(X)?(8)?$"), + (instregex "CP_PASTE(8)?o$"), + (instregex "LWA(X)?(_32)?$"), + TCHECK +)>; + +// Cracked Restricted Load instruction. +// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU +// operations cannot be done at the same time and so their latencies are added. +// Full 6 dispatches are required as this is both cracked and restricted. +def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + LFIWAX +)>; + +// Cracked Load instruction. +// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU +// operations cannot be done at the same time and so their latencies are added. +// Full 4 dispatches are required as this is a cracked instruction. +def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + LXSIWAX, + LIWAX +)>; + +// Cracked Load instruction. +// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7 +// cycles. The Load and ALU operations cannot be done at the same time and so +// their latencies are added. +// Full 6 dispatches are required as this is a restricted instruction. +def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + LFSX, + LFS +)>; + +// Cracked Load instruction. +// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU +// operations cannot be done at the same time and so their latencies are added. +// Full 4 dispatches are required as this is a cracked instruction. +def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + LXSSP, + LXSSPX, + XFLOADf32, + DFLOADf32 +)>; + +// Cracked 3-Way Load Instruction +// Load with two ALU operations that depend on each other +def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "LHAU(X)?(8)?$"), + LWAUX +)>; + +// Cracked Load that requires the PM resource. +// Since the Load and the PM cannot be done at the same time the latencies are +// added. Requires 8 cycles. +// Since the PM requires the full superslice we need both EXECE, EXECO pipelines +// as well as 3 dispatches for the PM. The Load requires the remaining 2 +// dispatches. +def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + LXVH8X, + LXVDSX, + LXVW4X +)>; + +// Single slice Restricted store operation. The restricted operation requires +// all three dispatches for the superslice. +def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "STF(S|D|IWX|SX|DX)$"), + (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), + (instregex "STW(8)?$"), + (instregex "(D|X)FSTORE(f32|f64)$"), + (instregex "ST(W|H|D)BRX$"), + (instregex "ST(B|H|D)(8)?$"), + (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"), + STIWX, + SLBIEG, + STMW, + STSWI, + TLBIE +)>; + +// Vector Store Instruction +// Requires the whole superslice and therefore requires all three dispatches +// as well as both the Even and Odd exec pipelines. +def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, + DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "STVE(B|H|W)X$"), + (instregex "STVX(L)?$"), + (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$") +)>; + +// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "MTCTR(8)?(loop)?$"), + (instregex "MTLR(8)?$") +)>; + +// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "M(T|F)VRSAVE(v)?$"), + (instregex "M(T|F)PMR$"), + (instregex "M(T|F)TB(8)?$"), + (instregex "MF(SPR|CTR|LR)(8)?$"), + (instregex "M(T|F)MSR(D)?$"), + (instregex "MTSPR(8)?$") +)>; + +// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, + DISP_1C, DISP_1C, DISP_1C], + (instrs + DIVW, + DIVWU, + MODSW +)>; + +// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, + DISP_1C, DISP_1C, DISP_1C], + (instrs + DIVWE, + DIVD, + DIVWEU, + DIVDU, + MODSD, + MODUD, + MODUW +)>; + +// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, + DISP_1C, DISP_1C, DISP_1C], + (instrs + DIVDE, + DIVDEU +)>; + +// Cracked DIV and ALU operation. Requires one full slice for the ALU operation +// and one full superslice for the DIV operation since there is only one DIV +// per superslice. Latency of DIV plus ALU is 26. +def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "DIVW(U)?(O)?o$") +)>; + +// Cracked DIV and ALU operation. Requires one full slice for the ALU operation +// and one full superslice for the DIV operation since there is only one DIV +// per superslice. Latency of DIV plus ALU is 26. +def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + DIVDo, + DIVDUo, + DIVWEo, + DIVWEUo +)>; + +// Cracked DIV and ALU operation. Requires one full slice for the ALU operation +// and one full superslice for the DIV operation since there is only one DIV +// per superslice. Latency of DIV plus ALU is 42. +def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + DIVDEo, + DIVDEUo +)>; + +// CR access instructions in _BrMCR, IIC_BrMCRX. + +// Cracked, restricted, ALU operations. +// Here the two ALU ops can actually be done in parallel and therefore the +// latencies are not added together. Otherwise this is like having two +// instructions running together on two pipelines and 6 dispatches. +// ALU ops are 2 cycles each. +def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + MTCRF, + MTCRF8 +)>; + +// Cracked ALU operations. +// Here the two ALU ops can actually be done in parallel and therefore the +// latencies are not added together. Otherwise this is like having two +// instructions running together on two pipelines and 4 dispatches. +// ALU ops are 2 cycles each. +def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "ADDC(8)?o$"), + (instregex "SUBFC(8)?o$") +)>; + +// Cracked ALU operations. +// Two ALU ops can be done in parallel. +// One is three cycle ALU the ohter is a two cycle ALU. +// One of the ALU ops is restricted the other is not so we have a total of +// 5 dispatches. +def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "F(N)?ABS(D|S)o$"), + (instregex "FCPSGN(D|S)o$"), + (instregex "FNEG(D|S)o$"), + FMRo +)>; + +// Cracked ALU operations. +// Here the two ALU ops can actually be done in parallel and therefore the +// latencies are not added together. Otherwise this is like having two +// instructions running together on two pipelines and 4 dispatches. +// ALU ops are 3 cycles each. +def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + MCRFS +)>; + +// Cracked Restricted ALU operations. +// Here the two ALU ops can actually be done in parallel and therefore the +// latencies are not added together. Otherwise this is like having two +// instructions running together on two pipelines and 6 dispatches. +// ALU ops are 3 cycles each. +def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "MTFSF(b|o)?$"), + (instregex "MTFSFI(o)?$") +)>; + +// Cracked instruction made of two ALU ops. +// The two ops cannot be done in parallel. +// One of the ALU ops is restricted and takes 3 dispatches. +def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "RLD(I)?C(R|L)o$"), + (instregex "RLW(IMI|INM|NM)(8)?o$"), + (instregex "SLW(8)?o$"), + (instregex "SRAW(I)?o$"), + (instregex "SRW(8)?o$"), + RLDICL_32o, + RLDIMIo +)>; + +// Cracked instruction made of two ALU ops. +// The two ops cannot be done in parallel. +// Both of the ALU ops are restricted and take 3 dispatches. +def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "MFFS(L|CE|o)?$") +)>; + +// Cracked ALU instruction composed of three consecutive 2 cycle loads for a +// total of 6 cycles. All of the ALU operations are also restricted so each +// takes 3 dispatches for a total of 9. +def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, + DISP_1C, DISP_1C], + (instrs + (instregex "MFCR(8)?$") +)>; + +// Cracked instruction made of two ALU ops. +// The two ops cannot be done in parallel. +def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "EXTSWSLIo$"), + (instregex "SRAD(I)?o$"), + SLDo, + SRDo, + RLDICo +)>; + +// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. +def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + FDIV +)>; + +// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. +def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + FDIVo +)>; + +// 36 Cycle DP Instruction. +// Instruction can be done on a single slice. +def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C], + (instrs + XSSQRTDP +)>; + +// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. +def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + FSQRT +)>; + +// 36 Cycle DP Vector Instruction. +def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, + DISP_1C, DISP_1C, DISP_1C], + (instrs + XVSQRTDP +)>; + +// 27 Cycle DP Vector Instruction. +def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, + DISP_1C, DISP_1C, DISP_1C], + (instrs + XVSQRTSP +)>; + +// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. +def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + FSQRTo +)>; + +// 26 Cycle DP Instruction. +def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C], + (instrs + XSSQRTSP +)>; + +// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. +def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + FSQRTS +)>; + +// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. +def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + FSQRTSo +)>; + +// 33 Cycle DP Instruction. Takes one slice and 2 dispatches. +def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C], + (instrs + XSDIVDP +)>; + +// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. +def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + FDIVS +)>; + +// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. +def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + FDIVSo +)>; + +// 22 Cycle DP Instruction. Takes one slice and 2 dispatches. +def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C], + (instrs + XSDIVSP +)>; + +// 24 Cycle DP Vector Instruction. Takes one full superslice. +// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given +// superslice. +def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, + DISP_1C, DISP_1C, DISP_1C], + (instrs + XVDIVSP +)>; + +// 33 Cycle DP Vector Instruction. Takes one full superslice. +// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given +// superslice. +def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, + DISP_1C, DISP_1C, DISP_1C], + (instrs + XVDIVDP +)>; + +// Instruction cracked into three pieces. One Load and two ALU operations. +// The Load and one of the ALU ops cannot be run at the same time and so the +// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. +// Both the load and the ALU that depends on it are restricted and so they take +// a total of 6 dispatches. The final 2 dispatches come from the second ALU op. +// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. +def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, + IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "LF(SU|SUX)$") +)>; + +// Cracked instruction made up of a Store and an ALU. The ALU does not depend on +// the store and so it can be run at the same time as the store. The store is +// also restricted. +def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "STF(S|D)U(X)?$"), + (instregex "ST(B|H|W|D)U(X)?(8)?$") +)>; + +// Cracked instruction made up of a Load and an ALU. The ALU does not depend on +// the load and so it can be run at the same time as the load. +def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "LBZU(X)?(8)?$"), + (instregex "LDU(X)?$") +)>; + + +// Cracked instruction made up of a Load and an ALU. The ALU does not depend on +// the load and so it can be run at the same time as the load. The load is also +// restricted. 3 dispatches are from the restricted load while the other two +// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline +// is required for the ALU. +def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "LF(DU|DUX)$") +)>; + +// Crypto Instructions + +// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "VPMSUM(B|H|W|D)$"), + (instregex "V(N)?CIPHER(LAST)?$"), + VSBOX +)>; + +// Branch Instructions + +// Two Cycle Branch +def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C], + (instrs + (instregex "BCCCTR(L)?(8)?$"), + (instregex "BCCL(A|R|RL)?$"), + (instregex "BCCTR(L)?(8)?(n)?$"), + (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), + (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), + (instregex "BL(_TLS)?$"), + (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"), + (instregex "BLA(8|8_NOP)?$"), + (instregex "BLR(8|L)?$"), + (instregex "TAILB(A)?(8)?$"), + (instregex "TAILBCTR(8)?$"), + (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"), + (instregex "BCLR(L)?(n)?$"), + (instregex "BCTR(L)?(8)?$"), + B, + BA, + BC, + BCC, + BCCA, + BCL, + BCLalways, + BCLn, + BCTRL8_LDinto_toc, + BCn, + CTRL_DEP +)>; + +// Five Cycle Branch with a 2 Cycle ALU Op +// Operations must be done consecutively and not in parallel. +def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + ADDPCIS +)>; + +// Special Extracted Instructions For Atomics + +// Atomic Load +def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, + IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, + IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, + DISP_1C], + (instrs + (instregex "L(D|W)AT$") +)>; + +// Atomic Store +def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, + IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, + DISP_1C], + (instrs + (instregex "ST(D|W)AT$") +)>; + +// Signal Processing Engine (SPE) Instructions +// These instructions are not supported on Power 9 +def : InstRW<[], + (instrs + BRINC, + EVABS, + EVEQV, + EVMRA, + EVNAND, + EVNEG, + (instregex "EVADD(I)?W$"), + (instregex "EVADD(SM|SS|UM|US)IAAW$"), + (instregex "EVAND(C)?$"), + (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"), + (instregex "EVCNTL(S|Z)W$"), + (instregex "EVDIVW(S|U)$"), + (instregex "EVEXTS(B|H)$"), + (instregex "EVLD(H|W|D)(X)?$"), + (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"), + (instregex "EVLWHE(X)?$"), + (instregex "EVLWHO(S|U)(X)?$"), + (instregex "EVLW(H|W)SPLAT(X)?$"), + (instregex "EVMERGE(HI|LO|HILO|LOHI)$"), + (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"), + (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"), + (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"), + (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"), + (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"), + (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"), + (instregex "EVMWHS(M|S)(F|FA|I|IA)$"), + (instregex "EVMWHUMI(A)?$"), + (instregex "EVMWLS(M|S)IA(A|N)W$"), + (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"), + (instregex "EVMWSM(F|I)(A|AA|AN)?$"), + (instregex "EVMWSSF(A|AA|AN)?$"), + (instregex "EVMWUMI(A|AA|AN)?$"), + (instregex "EV(N|X)?OR(C)?$"), + (instregex "EVR(LW|LWI|NDW)$"), + (instregex "EVSLW(I)?$"), + (instregex "EVSPLAT(F)?I$"), + (instregex "EVSRW(I)?(S|U)$"), + (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"), + (instregex "EVSUBF(S|U)(M|S)IAAW$"), + (instregex "EVSUB(I)?FW$") +)> { let Unsupported = 1; } + +// General Instructions without scheduling support. +def : InstRW<[], + (instrs + (instregex "(H)?RFI(D)?$"), + (instregex "DSS(ALL)?$"), + (instregex "DST(ST)?(T)?(64)?$"), + (instregex "ICBL(C|Q)$"), + (instregex "L(W|H|B)EPX$"), + (instregex "ST(W|H|B)EPX$"), + (instregex "(L|ST)FDEPX$"), + (instregex "M(T|F)SR(IN)?$"), + (instregex "M(T|F)DCR$"), + (instregex "NOP_GT_PWR(6|7)$"), + (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), + (instregex "WRTEE(I)?$"), + ATTN, + CLRBHRB, + MFBHRBE, + MBAR, + MSYNC, + SLBSYNC, + NAP, + STOP, + TRAP, + RFCI, + RFDI, + RFMCI, + SC, + DCBA, + DCBI, + DCCCI, + ICCCI +)> { let Unsupported = 1; } diff --git a/capstone/suite/synctools/tablegen/PPC/PPC.td b/capstone/suite/synctools/tablegen/PPC/PPC.td new file mode 100644 index 000000000..80ad4962a --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPC.td @@ -0,0 +1,480 @@ +//===-- PPC.td - Describe the PowerPC Target Machine -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the top level entry point for the PowerPC target. +// +//===----------------------------------------------------------------------===// + +// Get the target-independent interfaces which we are implementing. +// +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// PowerPC Subtarget features. +// + +//===----------------------------------------------------------------------===// +// CPU Directives // +//===----------------------------------------------------------------------===// + +def Directive440 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_440", "">; +def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">; +def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">; +def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">; +def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">; +def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">; +def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">; +def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">; +def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">; +def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">; +def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">; +def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">; +def DirectiveE500 : SubtargetFeature<"", "DarwinDirective", + "PPC::DIR_E500", "">; +def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective", + "PPC::DIR_E500mc", "">; +def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective", + "PPC::DIR_E5500", "">; +def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">; +def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">; +def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">; +def DirectivePwr5x + : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">; +def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; +def DirectivePwr6x + : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">; +def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; +def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">; +def DirectivePwr9: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR9", "">; + +def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", + "Enable 64-bit instructions">; +def FeatureHardFloat : SubtargetFeature<"hard-float", "HasHardFloat", "true", + "Enable floating-point instructions">; +def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true", + "Enable 64-bit registers usage for ppc32 [beta]">; +def FeatureCRBits : SubtargetFeature<"crbits", "UseCRBits", "true", + "Use condition-register bits individually">; +def FeatureFPU : SubtargetFeature<"fpu","HasFPU","true", + "Enable classic FPU instructions", + [FeatureHardFloat]>; +def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true", + "Enable Altivec instructions", + [FeatureFPU]>; +def FeatureSPE : SubtargetFeature<"spe","HasSPE", "true", + "Enable SPE instructions", + [FeatureHardFloat]>; +def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true", + "Enable the MFOCRF instruction">; +def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", + "Enable the fsqrt instruction", + [FeatureFPU]>; +def FeatureFCPSGN : SubtargetFeature<"fcpsgn", "HasFCPSGN", "true", + "Enable the fcpsgn instruction", + [FeatureFPU]>; +def FeatureFRE : SubtargetFeature<"fre", "HasFRE", "true", + "Enable the fre instruction", + [FeatureFPU]>; +def FeatureFRES : SubtargetFeature<"fres", "HasFRES", "true", + "Enable the fres instruction", + [FeatureFPU]>; +def FeatureFRSQRTE : SubtargetFeature<"frsqrte", "HasFRSQRTE", "true", + "Enable the frsqrte instruction", + [FeatureFPU]>; +def FeatureFRSQRTES : SubtargetFeature<"frsqrtes", "HasFRSQRTES", "true", + "Enable the frsqrtes instruction", + [FeatureFPU]>; +def FeatureRecipPrec : SubtargetFeature<"recipprec", "HasRecipPrec", "true", + "Assume higher precision reciprocal estimates">; +def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true", + "Enable the stfiwx instruction", + [FeatureFPU]>; +def FeatureLFIWAX : SubtargetFeature<"lfiwax","HasLFIWAX", "true", + "Enable the lfiwax instruction", + [FeatureFPU]>; +def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true", + "Enable the fri[mnpz] instructions", + [FeatureFPU]>; +def FeatureFPCVT : SubtargetFeature<"fpcvt", "HasFPCVT", "true", + "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions", + [FeatureFPU]>; +def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", + "Enable the isel instruction">; +def FeatureBPERMD : SubtargetFeature<"bpermd", "HasBPERMD", "true", + "Enable the bpermd instruction">; +def FeatureExtDiv : SubtargetFeature<"extdiv", "HasExtDiv", "true", + "Enable extended divide instructions">; +def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true", + "Enable the ldbrx instruction">; +def FeatureCMPB : SubtargetFeature<"cmpb", "HasCMPB", "true", + "Enable the cmpb instruction">; +def FeatureICBT : SubtargetFeature<"icbt","HasICBT", "true", + "Enable icbt instruction">; +def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", + "Enable Book E instructions", + [FeatureICBT]>; +def FeatureMSYNC : SubtargetFeature<"msync", "HasOnlyMSYNC", "true", + "Has only the msync instruction instead of sync", + [FeatureBookE]>; +def FeatureE500 : SubtargetFeature<"e500", "IsE500", "true", + "Enable E500/E500mc instructions">; +def FeatureSecurePlt : SubtargetFeature<"secure-plt","SecurePlt", "true", + "Enable secure plt mode">; +def FeaturePPC4xx : SubtargetFeature<"ppc4xx", "IsPPC4xx", "true", + "Enable PPC 4xx instructions">; +def FeaturePPC6xx : SubtargetFeature<"ppc6xx", "IsPPC6xx", "true", + "Enable PPC 6xx instructions">; +def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", + "Enable QPX instructions", + [FeatureFPU]>; +def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true", + "Enable VSX instructions", + [FeatureAltivec]>; +def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true", + "Enable POWER8 Altivec instructions", + [FeatureAltivec]>; +def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true", + "Enable POWER8 Crypto instructions", + [FeatureP8Altivec]>; +def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true", + "Enable POWER8 vector instructions", + [FeatureVSX, FeatureP8Altivec]>; +def FeatureDirectMove : + SubtargetFeature<"direct-move", "HasDirectMove", "true", + "Enable Power8 direct move instructions", + [FeatureVSX]>; +def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics", + "HasPartwordAtomics", "true", + "Enable l[bh]arx and st[bh]cx.">; +def FeatureInvariantFunctionDescriptors : + SubtargetFeature<"invariant-function-descriptors", + "HasInvariantFunctionDescriptors", "true", + "Assume function descriptors are invariant">; +def FeatureLongCall : SubtargetFeature<"longcall", "UseLongCalls", "true", + "Always use indirect calls">; +def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true", + "Enable Hardware Transactional Memory instructions">; +def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true", + "Implement mftb using the mfspr instruction">; +def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true", + "Target supports add/load integer fusion.">; +def FeatureFloat128 : + SubtargetFeature<"float128", "HasFloat128", "true", + "Enable the __float128 data type for IEEE-754R Binary128.", + [FeatureVSX]>; +def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", + "POPCNTD_Fast", + "Enable the popcnt[dw] instructions">; +// Note that for the a2/a2q processor models we should not use popcnt[dw] by +// default. These processors do support the instructions, but they're +// microcoded, and the software emulation is about twice as fast. +def FeatureSlowPOPCNTD : SubtargetFeature<"slow-popcntd","HasPOPCNTD", + "POPCNTD_Slow", + "Has slow popcnt[dw] instructions">; + +def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true", + "Treat vector data stream cache control instructions as deprecated">; + +def FeatureISA3_0 : SubtargetFeature<"isa-v30-instructions", "IsISA3_0", + "true", + "Enable instructions added in ISA 3.0.">; +def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true", + "Enable POWER9 Altivec instructions", + [FeatureISA3_0, FeatureP8Altivec]>; +def FeatureP9Vector : SubtargetFeature<"power9-vector", "HasP9Vector", "true", + "Enable POWER9 vector instructions", + [FeatureISA3_0, FeatureP8Vector, + FeatureP9Altivec]>; + +// Since new processors generally contain a superset of features of those that +// came before them, the idea is to make implementations of new processors +// less error prone and easier to read. +// Namely: +// list<SubtargetFeature> Power8FeatureList = ... +// list<SubtargetFeature> FutureProcessorSpecificFeatureList = +// [ features that Power8 does not support ] +// list<SubtargetFeature> FutureProcessorFeatureList = +// !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList) + +// Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as +// well as providing a single point of definition if the feature set will be +// used elsewhere. +def ProcessorFeatures { + list<SubtargetFeature> Power7FeatureList = + [DirectivePwr7, FeatureAltivec, FeatureVSX, + FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, + FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, + FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */, + FeatureBPERMD, FeatureExtDiv, + FeatureMFTB, DeprecatedDST]; + list<SubtargetFeature> Power8SpecificFeatures = + [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto, + FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic, + FeatureFusion]; + list<SubtargetFeature> Power8FeatureList = + !listconcat(Power7FeatureList, Power8SpecificFeatures); + list<SubtargetFeature> Power9SpecificFeatures = + [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0]; + list<SubtargetFeature> Power9FeatureList = + !listconcat(Power8FeatureList, Power9SpecificFeatures); +} + +// Note: Future features to add when support is extended to more +// recent ISA levels: +// +// DFP p6, p6x, p7 decimal floating-point instructions +// POPCNTB p5 through p7 popcntb and related instructions + +//===----------------------------------------------------------------------===// +// Classes used for relation maps. +//===----------------------------------------------------------------------===// +// RecFormRel - Filter class used to relate non-record-form instructions with +// their record-form variants. +class RecFormRel; + +// AltVSXFMARel - Filter class used to relate the primary addend-killing VSX +// FMA instruction forms with their corresponding factor-killing forms. +class AltVSXFMARel { + bit IsVSXFMAAlt = 0; +} + +//===----------------------------------------------------------------------===// +// Relation Map Definitions. +//===----------------------------------------------------------------------===// + +def getRecordFormOpcode : InstrMapping { + let FilterClass = "RecFormRel"; + // Instructions with the same BaseName and Interpretation64Bit values + // form a row. + let RowFields = ["BaseName", "Interpretation64Bit"]; + // Instructions with the same RC value form a column. + let ColFields = ["RC"]; + // The key column are the non-record-form instructions. + let KeyCol = ["0"]; + // Value columns RC=1 + let ValueCols = [["1"]]; +} + +def getNonRecordFormOpcode : InstrMapping { + let FilterClass = "RecFormRel"; + // Instructions with the same BaseName and Interpretation64Bit values + // form a row. + let RowFields = ["BaseName", "Interpretation64Bit"]; + // Instructions with the same RC value form a column. + let ColFields = ["RC"]; + // The key column are the record-form instructions. + let KeyCol = ["1"]; + // Value columns are RC=0 + let ValueCols = [["0"]]; +} + +def getAltVSXFMAOpcode : InstrMapping { + let FilterClass = "AltVSXFMARel"; + // Instructions with the same BaseName and Interpretation64Bit values + // form a row. + let RowFields = ["BaseName"]; + // Instructions with the same RC value form a column. + let ColFields = ["IsVSXFMAAlt"]; + // The key column are the (default) addend-killing instructions. + let KeyCol = ["0"]; + // Value columns IsVSXFMAAlt=1 + let ValueCols = [["1"]]; +} + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "PPCRegisterInfo.td" +include "PPCSchedule.td" + +//===----------------------------------------------------------------------===// +// PowerPC processors supported. +// + +def : Processor<"generic", G3Itineraries, [Directive32, FeatureHardFloat, + FeatureMFTB]>; +def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL, + FeatureFRES, FeatureFRSQRTE, + FeatureICBT, FeatureBookE, + FeatureMSYNC, FeatureMFTB]>; +def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL, + FeatureFRES, FeatureFRSQRTE, + FeatureICBT, FeatureBookE, + FeatureMSYNC, FeatureMFTB]>; +def : Processor<"601", G3Itineraries, [Directive601, FeatureFPU]>; +def : Processor<"602", G3Itineraries, [Directive602, FeatureFPU, + FeatureMFTB]>; +def : Processor<"603", G3Itineraries, [Directive603, + FeatureFRES, FeatureFRSQRTE, + FeatureMFTB]>; +def : Processor<"603e", G3Itineraries, [Directive603, + FeatureFRES, FeatureFRSQRTE, + FeatureMFTB]>; +def : Processor<"603ev", G3Itineraries, [Directive603, + FeatureFRES, FeatureFRSQRTE, + FeatureMFTB]>; +def : Processor<"604", G3Itineraries, [Directive604, + FeatureFRES, FeatureFRSQRTE, + FeatureMFTB]>; +def : Processor<"604e", G3Itineraries, [Directive604, + FeatureFRES, FeatureFRSQRTE, + FeatureMFTB]>; +def : Processor<"620", G3Itineraries, [Directive620, + FeatureFRES, FeatureFRSQRTE, + FeatureMFTB]>; +def : Processor<"750", G4Itineraries, [Directive750, + FeatureFRES, FeatureFRSQRTE, + FeatureMFTB]>; +def : Processor<"g3", G3Itineraries, [Directive750, + FeatureFRES, FeatureFRSQRTE, + FeatureMFTB]>; +def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec, + FeatureFRES, FeatureFRSQRTE, + FeatureMFTB]>; +def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec, + FeatureFRES, FeatureFRSQRTE, + FeatureMFTB]>; +def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec, + FeatureFRES, FeatureFRSQRTE, + FeatureMFTB]>; +def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec, + FeatureFRES, FeatureFRSQRTE, + FeatureMFTB]>; + +def : ProcessorModel<"970", G5Model, + [Directive970, FeatureAltivec, + FeatureMFOCRF, FeatureFSqrt, + FeatureFRES, FeatureFRSQRTE, FeatureSTFIWX, + Feature64Bit /*, Feature64BitRegs */, + FeatureMFTB]>; +def : ProcessorModel<"g5", G5Model, + [Directive970, FeatureAltivec, + FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, + FeatureFRES, FeatureFRSQRTE, + Feature64Bit /*, Feature64BitRegs */, + FeatureMFTB, DeprecatedDST]>; +def : ProcessorModel<"e500", PPCE500Model, + [DirectiveE500, + FeatureICBT, FeatureBookE, + FeatureISEL, FeatureMFTB]>; +def : ProcessorModel<"e500mc", PPCE500mcModel, + [DirectiveE500mc, + FeatureSTFIWX, FeatureICBT, FeatureBookE, + FeatureISEL, FeatureMFTB]>; +def : ProcessorModel<"e5500", PPCE5500Model, + [DirectiveE5500, FeatureMFOCRF, Feature64Bit, + FeatureSTFIWX, FeatureICBT, FeatureBookE, + FeatureISEL, FeatureMFTB]>; +def : ProcessorModel<"a2", PPCA2Model, + [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF, + FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, + FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, + FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>; +def : ProcessorModel<"a2q", PPCA2Model, + [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF, + FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, + FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, + FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */, FeatureQPX, + FeatureMFTB]>; +def : ProcessorModel<"pwr3", G5Model, + [DirectivePwr3, FeatureAltivec, + FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF, + FeatureSTFIWX, Feature64Bit]>; +def : ProcessorModel<"pwr4", G5Model, + [DirectivePwr4, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureFRES, FeatureFRSQRTE, + FeatureSTFIWX, Feature64Bit, FeatureMFTB]>; +def : ProcessorModel<"pwr5", G5Model, + [DirectivePwr5, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureFRE, FeatureFRES, + FeatureFRSQRTE, FeatureFRSQRTES, + FeatureSTFIWX, Feature64Bit, + FeatureMFTB, DeprecatedDST]>; +def : ProcessorModel<"pwr5x", G5Model, + [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureFRE, FeatureFRES, + FeatureFRSQRTE, FeatureFRSQRTES, + FeatureSTFIWX, FeatureFPRND, Feature64Bit, + FeatureMFTB, DeprecatedDST]>; +def : ProcessorModel<"pwr6", G5Model, + [DirectivePwr6, FeatureAltivec, + FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, + FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, + FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB, + FeatureFPRND, Feature64Bit /*, Feature64BitRegs */, + FeatureMFTB, DeprecatedDST]>; +def : ProcessorModel<"pwr6x", G5Model, + [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, + FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, + FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, + FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB, + FeatureFPRND, Feature64Bit, + FeatureMFTB, DeprecatedDST]>; +def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>; +def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>; +def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>; +def : Processor<"ppc", G3Itineraries, [Directive32, FeatureHardFloat, + FeatureMFTB]>; +def : Processor<"ppc32", G3Itineraries, [Directive32, FeatureHardFloat, + FeatureMFTB]>; +def : ProcessorModel<"ppc64", G5Model, + [Directive64, FeatureAltivec, + FeatureMFOCRF, FeatureFSqrt, FeatureFRES, + FeatureFRSQRTE, FeatureSTFIWX, + Feature64Bit /*, Feature64BitRegs */, + FeatureMFTB]>; +def : ProcessorModel<"ppc64le", P8Model, ProcessorFeatures.Power8FeatureList>; + +//===----------------------------------------------------------------------===// +// Calling Conventions +//===----------------------------------------------------------------------===// + +include "PPCCallingConv.td" + +def PPCInstrInfo : InstrInfo { + let isLittleEndianEncoding = 1; + + // FIXME: Unset this when no longer needed! + let decodePositionallyEncodedOperands = 1; + + let noNamedPositionallyEncodedOperands = 1; +} + +def PPCAsmParser : AsmParser { + let ShouldEmitMatchRegisterName = 0; +} + +def PPCAsmParserVariant : AsmParserVariant { + int Variant = 0; + + // We do not use hard coded registers in asm strings. However, some + // InstAlias definitions use immediate literals. Set RegisterPrefix + // so that those are not misinterpreted as registers. + string RegisterPrefix = "%"; + string BreakCharacters = "."; +} + +def PPC : Target { + // Information about the instructions. + let InstructionSet = PPCInstrInfo; + + let AssemblyParsers = [PPCAsmParser]; + let AssemblyParserVariants = [PPCAsmParserVariant]; + let AllowRegisterRenaming = 1; +} diff --git a/capstone/suite/synctools/tablegen/PPC/PPCCallingConv.td b/capstone/suite/synctools/tablegen/PPC/PPCCallingConv.td new file mode 100644 index 000000000..12c581023 --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCCallingConv.td @@ -0,0 +1,378 @@ +//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the PowerPC 32- and 64-bit +// architectures. +// +//===----------------------------------------------------------------------===// + +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget<string F, CCAction A> + : CCIf<!strconcat("static_cast<const PPCSubtarget&>" + "(State.getMachineFunction().getSubtarget()).", + F), + A>; +class CCIfNotSubtarget<string F, CCAction A> + : CCIf<!strconcat("!static_cast<const PPCSubtarget&>" + "(State.getMachineFunction().getSubtarget()).", + F), + A>; +class CCIfOrigArgWasNotPPCF128<CCAction A> + : CCIf<"!static_cast<PPCCCState *>(&State)->WasOriginalArgPPCF128(ValNo)", + A>; +class CCIfOrigArgWasPPCF128<CCAction A> + : CCIf<"static_cast<PPCCCState *>(&State)->WasOriginalArgPPCF128(ValNo)", + A>; + +//===----------------------------------------------------------------------===// +// Return Value Calling Convention +//===----------------------------------------------------------------------===// + +// PPC64 AnyReg return-value convention. No explicit register is specified for +// the return-value. The register allocator is allowed and expected to choose +// any free register. +// +// This calling convention is currently only supported by the stackmap and +// patchpoint intrinsics. All other uses will result in an assert on Debug +// builds. On Release builds we fallback to the PPC C calling convention. +def RetCC_PPC64_AnyReg : CallingConv<[ + CCCustom<"CC_PPC_AnyReg_Error"> +]>; + +// Return-value convention for PowerPC coldcc. +def RetCC_PPC_Cold : CallingConv<[ + // Use the same return registers as RetCC_PPC, but limited to only + // one return value. The remaining return values will be saved to + // the stack. + CCIfType<[i32, i1], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>, + CCIfType<[i1], CCIfNotSubtarget<"isPPC64()", CCPromoteToType<i32>>>, + + CCIfType<[i32], CCAssignToReg<[R3]>>, + CCIfType<[i64], CCAssignToReg<[X3]>>, + CCIfType<[i128], CCAssignToReg<[X3]>>, + + CCIfType<[f32], CCAssignToReg<[F1]>>, + CCIfType<[f64], CCAssignToReg<[F1]>>, + CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2]>>>, + + CCIfType<[v4f64, v4f32, v4i1], + CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1]>>>, + + CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64], + CCIfSubtarget<"hasAltivec()", + CCAssignToReg<[V2]>>> +]>; + +// Return-value convention for PowerPC +def RetCC_PPC : CallingConv<[ + CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>, + + // On PPC64, integer return values are always promoted to i64 + CCIfType<[i32, i1], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>, + CCIfType<[i1], CCIfNotSubtarget<"isPPC64()", CCPromoteToType<i32>>>, + + CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, + CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, + CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, + + // Floating point types returned as "direct" go into F1 .. F8; note that + // only the ELFv2 ABI fully utilizes all these registers. + CCIfNotSubtarget<"hasSPE()", + CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>>, + CCIfNotSubtarget<"hasSPE()", + CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>>, + CCIfSubtarget<"hasSPE()", + CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>, + CCIfSubtarget<"hasSPE()", + CCIfType<[f64], CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>, + + // For P9, f128 are passed in vector registers. + CCIfType<[f128], + CCIfSubtarget<"hasP9Vector()", + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, + + // QPX vectors are returned in QF1 and QF2. + CCIfType<[v4f64, v4f32, v4i1], + CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>, + + // Vector types returned as "direct" go into V2 .. V9; note that only the + // ELFv2 ABI fully utilizes all these registers. + CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64], + CCIfSubtarget<"hasAltivec()", + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>> +]>; + +// No explicit register is specified for the AnyReg calling convention. The +// register allocator may assign the arguments to any free register. +// +// This calling convention is currently only supported by the stackmap and +// patchpoint intrinsics. All other uses will result in an assert on Debug +// builds. On Release builds we fallback to the PPC C calling convention. +def CC_PPC64_AnyReg : CallingConv<[ + CCCustom<"CC_PPC_AnyReg_Error"> +]>; + +// Note that we don't currently have calling conventions for 64-bit +// PowerPC, but handle all the complexities of the ABI in the lowering +// logic. FIXME: See if the logic can be simplified with use of CCs. +// This may require some extensions to current table generation. + +// Simple calling convention for 64-bit ELF PowerPC fast isel. +// Only handle ints and floats. All ints are promoted to i64. +// Vector types and quadword ints are not handled. +def CC_PPC64_ELF_FIS : CallingConv<[ + CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_PPC64_AnyReg>>, + + CCIfType<[i1], CCPromoteToType<i64>>, + CCIfType<[i8], CCPromoteToType<i64>>, + CCIfType<[i16], CCPromoteToType<i64>>, + CCIfType<[i32], CCPromoteToType<i64>>, + CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>, + CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>> +]>; + +// Simple return-value convention for 64-bit ELF PowerPC fast isel. +// All small ints are promoted to i64. Vector types, quadword ints, +// and multiple register returns are "supported" to avoid compile +// errors, but none are handled by the fast selector. +def RetCC_PPC64_ELF_FIS : CallingConv<[ + CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>, + + CCIfType<[i1], CCPromoteToType<i64>>, + CCIfType<[i8], CCPromoteToType<i64>>, + CCIfType<[i16], CCPromoteToType<i64>>, + CCIfType<[i32], CCPromoteToType<i64>>, + CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, + CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, + CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + CCIfType<[f128], + CCIfSubtarget<"hasP9Vector()", + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, + CCIfType<[v4f64, v4f32, v4i1], + CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>, + CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64], + CCIfSubtarget<"hasAltivec()", + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>> +]>; + +//===----------------------------------------------------------------------===// +// PowerPC System V Release 4 32-bit ABI +//===----------------------------------------------------------------------===// + +def CC_PPC32_SVR4_Common : CallingConv<[ + CCIfType<[i1], CCPromoteToType<i32>>, + + // The ABI requires i64 to be passed in two adjacent registers with the first + // register having an odd register number. + CCIfType<[i32], + CCIfSplit<CCIfSubtarget<"useSoftFloat()", + CCIfOrigArgWasNotPPCF128< + CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>>>, + + CCIfType<[i32], + CCIfSplit<CCIfNotSubtarget<"useSoftFloat()", + CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>>, + CCIfSplit<CCIfSubtarget<"useSoftFloat()", + CCIfOrigArgWasPPCF128<CCCustom< + "CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128">>>>, + + // The 'nest' parameter, if any, is passed in R11. + CCIfNest<CCAssignToReg<[R11]>>, + + // The first 8 integer arguments are passed in integer registers. + CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, + + // Make sure the i64 words from a long double are either both passed in + // registers or both passed on the stack. + CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignFPArgRegs">>>, + + // FP values are passed in F1 - F8. + CCIfType<[f32, f64], + CCIfNotSubtarget<"hasSPE()", + CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>>, + CCIfType<[f64], + CCIfSubtarget<"hasSPE()", + CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>, + CCIfType<[f32], + CCIfSubtarget<"hasSPE()", + CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>, + + // Split arguments have an alignment of 8 bytes on the stack. + CCIfType<[i32], CCIfSplit<CCAssignToStack<4, 8>>>, + + CCIfType<[i32], CCAssignToStack<4, 4>>, + + // Floats are stored in double precision format, thus they have the same + // alignment and size as doubles. + // With SPE floats are stored as single precision, so have alignment and + // size of int. + CCIfType<[f32,f64], CCIfNotSubtarget<"hasSPE()", CCAssignToStack<8, 8>>>, + CCIfType<[f32], CCIfSubtarget<"hasSPE()", CCAssignToStack<4, 4>>>, + CCIfType<[f64], CCIfSubtarget<"hasSPE()", CCAssignToStack<8, 8>>>, + + // QPX vectors that are stored in double precision need 32-byte alignment. + CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>, + + // Vectors and float128 get 16-byte stack slots that are 16-byte aligned. + CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>, + CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToStack<16, 16>>> +]>; + +// This calling convention puts vector arguments always on the stack. It is used +// to assign vector arguments which belong to the variable portion of the +// parameter list of a variable argument function. +def CC_PPC32_SVR4_VarArg : CallingConv<[ + CCDelegateTo<CC_PPC32_SVR4_Common> +]>; + +// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to +// put vector arguments in vector registers before putting them on the stack. +def CC_PPC32_SVR4 : CallingConv<[ + // QPX vectors mirror the scalar FP convention. + CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()", + CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>, + + // The first 12 Vector arguments are passed in AltiVec registers. + CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64], + CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, + V8, V9, V10, V11, V12, V13]>>>, + + // Float128 types treated as vector arguments. + CCIfType<[f128], + CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, + V8, V9, V10, V11, V12, V13]>>>, + + CCDelegateTo<CC_PPC32_SVR4_Common> +]>; + +// Helper "calling convention" to handle aggregate by value arguments. +// Aggregate by value arguments are always placed in the local variable space +// of the caller. This calling convention is only used to assign those stack +// offsets in the callers stack frame. +// +// Still, the address of the aggregate copy in the callers stack frame is passed +// in a GPR (or in the parameter list area if all GPRs are allocated) from the +// caller to the callee. The location for the address argument is assigned by +// the CC_PPC32_SVR4 calling convention. +// +// The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are +// not passed by value. + +def CC_PPC32_SVR4_ByVal : CallingConv<[ + CCIfByVal<CCPassByVal<4, 4>>, + + CCCustom<"CC_PPC32_SVR4_Custom_Dummy"> +]>; + +def CSR_Altivec : CalleeSavedRegs<(add V20, V21, V22, V23, V24, V25, V26, V27, + V28, V29, V30, V31)>; + +def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, + R29, R30, R31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4 + )>; + +def CSR_Darwin32_Altivec : CalleeSavedRegs<(add CSR_Darwin32, CSR_Altivec)>; + +// SPE does not use FPRs, so break out the common register set as base. +def CSR_SVR432_COMM : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, + R28, R29, R30, R31, CR2, CR3, CR4 + )>; +def CSR_SVR432 : CalleeSavedRegs<(add CSR_SVR432_COMM, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31 + )>; +def CSR_SPE : CalleeSavedRegs<(add S14, S15, S16, S17, S18, S19, S20, S21, S22, + S23, S24, S25, S26, S27, S28, S29, S30, S31 + )>; + +def CSR_SVR432_Altivec : CalleeSavedRegs<(add CSR_SVR432, CSR_Altivec)>; + +def CSR_SVR432_SPE : CalleeSavedRegs<(add CSR_SVR432_COMM, CSR_SPE)>; + +def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20, + X21, X22, X23, X24, X25, X26, X27, X28, + X29, X30, X31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4 + )>; + +def CSR_Darwin64_Altivec : CalleeSavedRegs<(add CSR_Darwin64, CSR_Altivec)>; + +def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, + X21, X22, X23, X24, X25, X26, X27, X28, + X29, X30, X31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4 + )>; + +// CSRs that are handled by prologue, epilogue. +def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>; + +def CSR_SVR464_ViaCopy : CalleeSavedRegs<(add CSR_SVR464)>; + +def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>; + +def CSR_SVR464_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_Altivec)>; + +def CSR_SVR464_R2 : CalleeSavedRegs<(add CSR_SVR464, X2)>; + +def CSR_SVR464_R2_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2)>; + +def CSR_SVR464_R2_Altivec : CalleeSavedRegs<(add CSR_SVR464_Altivec, X2)>; + +def CSR_SVR464_R2_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2_Altivec)>; + +def CSR_NoRegs : CalleeSavedRegs<(add)>; + +// coldcc calling convection marks most registers as non-volatile. +// Do not include r1 since the stack pointer is never considered a CSR. +// Do not include r2, since it is the TOC register and is added depending +// on wether or not the function uses the TOC and is a non-leaf. +// Do not include r0,r11,r13 as they are optional in functional linkage +// and value may be altered by inter-library calls. +// Do not include r12 as it is used as a scratch register. +// Do not include return registers r3, f1, v2. +def CSR_SVR32_ColdCC : CalleeSavedRegs<(add (sequence "R%u", 4, 10), + (sequence "R%u", 14, 31), + F0, (sequence "F%u", 2, 31), + (sequence "CR%u", 0, 7))>; + +def CSR_SVR32_ColdCC_Altivec : CalleeSavedRegs<(add CSR_SVR32_ColdCC, + (sequence "V%u", 0, 1), + (sequence "V%u", 3, 31))>; + +def CSR_SVR64_ColdCC : CalleeSavedRegs<(add (sequence "X%u", 4, 10), + (sequence "X%u", 14, 31), + F0, (sequence "F%u", 2, 31), + (sequence "CR%u", 0, 7))>; + +def CSR_SVR64_ColdCC_R2: CalleeSavedRegs<(add CSR_SVR64_ColdCC, X2)>; + +def CSR_SVR64_ColdCC_Altivec : CalleeSavedRegs<(add CSR_SVR64_ColdCC, + (sequence "V%u", 0, 1), + (sequence "V%u", 3, 31))>; + +def CSR_SVR64_ColdCC_R2_Altivec : CalleeSavedRegs<(add CSR_SVR64_ColdCC_Altivec, X2)>; + +def CSR_64_AllRegs: CalleeSavedRegs<(add X0, (sequence "X%u", 3, 10), + (sequence "X%u", 14, 31), + (sequence "F%u", 0, 31), + (sequence "CR%u", 0, 7))>; + +def CSR_64_AllRegs_Altivec : CalleeSavedRegs<(add CSR_64_AllRegs, + (sequence "V%u", 0, 31))>; + +def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec, + (sequence "VSL%u", 0, 31))>; + diff --git a/capstone/suite/synctools/tablegen/PPC/PPCInstr64Bit.td b/capstone/suite/synctools/tablegen/PPC/PPCInstr64Bit.td new file mode 100644 index 000000000..cdd57c6a1 --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCInstr64Bit.td @@ -0,0 +1,1453 @@ +//===-- PPCInstr64Bit.td - The PowerPC 64-bit Support ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the PowerPC 64-bit instructions. These patterns are used +// both when in ppc64 mode and when in "use 64-bit extensions in 32-bit" mode. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// 64-bit operands. +// +def s16imm64 : Operand<i64> { + let PrintMethod = "printS16ImmOperand"; + let EncoderMethod = "getImm16Encoding"; + let ParserMatchClass = PPCS16ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; +} +def u16imm64 : Operand<i64> { + let PrintMethod = "printU16ImmOperand"; + let EncoderMethod = "getImm16Encoding"; + let ParserMatchClass = PPCU16ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<16>"; +} +def s17imm64 : Operand<i64> { + // This operand type is used for addis/lis to allow the assembler parser + // to accept immediates in the range -65536..65535 for compatibility with + // the GNU assembler. The operand is treated as 16-bit otherwise. + let PrintMethod = "printS16ImmOperand"; + let EncoderMethod = "getImm16Encoding"; + let ParserMatchClass = PPCS17ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; +} +def tocentry : Operand<iPTR> { + let MIOperandInfo = (ops i64imm:$imm); +} +def tlsreg : Operand<i64> { + let EncoderMethod = "getTLSRegEncoding"; + let ParserMatchClass = PPCTLSRegOperand; +} +def tlsgd : Operand<i64> {} +def tlscall : Operand<i64> { + let PrintMethod = "printTLSCall"; + let MIOperandInfo = (ops calltarget:$func, tlsgd:$sym); + let EncoderMethod = "getTLSCallEncoding"; +} + +//===----------------------------------------------------------------------===// +// 64-bit transformation functions. +// + +def SHL64 : SDNodeXForm<imm, [{ + // Transformation function: 63 - imm + return getI32Imm(63 - N->getZExtValue(), SDLoc(N)); +}]>; + +def SRL64 : SDNodeXForm<imm, [{ + // Transformation function: 64 - imm + return N->getZExtValue() ? getI32Imm(64 - N->getZExtValue(), SDLoc(N)) + : getI32Imm(0, SDLoc(N)); +}]>; + + +//===----------------------------------------------------------------------===// +// Calls. +// + +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { +let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { + let isReturn = 1, Uses = [LR8, RM] in + def BLR8 : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB, + [(retflag)]>, Requires<[In64BitMode]>; + let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in { + def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, + []>, + Requires<[In64BitMode]>; + def BCCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), + "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB, + []>, + Requires<[In64BitMode]>; + + def BCCTR8 : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi), + "bcctr 12, $bi, 0", IIC_BrB, []>, + Requires<[In64BitMode]>; + def BCCTR8n : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi), + "bcctr 4, $bi, 0", IIC_BrB, []>, + Requires<[In64BitMode]>; + } +} + +let Defs = [LR8] in + def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>, + PPC970_Unit_BRU; + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { + let Defs = [CTR8], Uses = [CTR8] in { + def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), + "bdz $dst">; + def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), + "bdnz $dst">; + } + + let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in { + def BDZLR8 : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins), + "bdzlr", IIC_BrB, []>; + def BDNZLR8 : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins), + "bdnzlr", IIC_BrB, []>; + } +} + + + +let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { + // Convenient aliases for call instructions + let Uses = [RM] in { + def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func), + "bl $func", IIC_BrB, []>; // See Pat patterns below. + + def BL8_TLS : IForm<18, 0, 1, (outs), (ins tlscall:$func), + "bl $func", IIC_BrB, []>; + + def BLA8 : IForm<18, 1, 1, (outs), (ins abscalltarget:$func), + "bla $func", IIC_BrB, [(PPCcall (i64 imm:$func))]>; + } + let Uses = [RM], isCodeGenOnly = 1 in { + def BL8_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins calltarget:$func), + "bl $func\n\tnop", IIC_BrB, []>; + + def BL8_NOP_TLS : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins tlscall:$func), + "bl $func\n\tnop", IIC_BrB, []>; + + def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24, + (outs), (ins abscalltarget:$func), + "bla $func\n\tnop", IIC_BrB, + [(PPCcall_nop (i64 imm:$func))]>; + } + let Uses = [CTR8, RM] in { + def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), + "bctrl", IIC_BrB, [(PPCbctrl)]>, + Requires<[In64BitMode]>; + + let isCodeGenOnly = 1 in { + def BCCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), + "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB, + []>, + Requires<[In64BitMode]>; + + def BCCTRL8 : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi), + "bcctrl 12, $bi, 0", IIC_BrB, []>, + Requires<[In64BitMode]>; + def BCCTRL8n : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi), + "bcctrl 4, $bi, 0", IIC_BrB, []>, + Requires<[In64BitMode]>; + } + } +} + +let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, + Defs = [LR8, X2], Uses = [CTR8, RM], RST = 2 in { + def BCTRL8_LDinto_toc : + XLForm_2_ext_and_DSForm_1<19, 528, 20, 0, 1, 58, 0, (outs), + (ins memrix:$src), + "bctrl\n\tld 2, $src", IIC_BrB, + [(PPCbctrl_load_toc ixaddr:$src)]>, + Requires<[In64BitMode]>; +} + +} // Interpretation64Bit + +// FIXME: Duplicating this for the asm parser should be unnecessary, but the +// previous definition must be marked as CodeGen only to prevent decoding +// conflicts. +let Interpretation64Bit = 1, isAsmParserOnly = 1 in +let isCall = 1, PPC970_Unit = 7, Defs = [LR8], Uses = [RM] in +def BL8_TLS_ : IForm<18, 0, 1, (outs), (ins tlscall:$func), + "bl $func", IIC_BrB, []>; + +// Calls +def : Pat<(PPCcall (i64 tglobaladdr:$dst)), + (BL8 tglobaladdr:$dst)>; +def : Pat<(PPCcall_nop (i64 tglobaladdr:$dst)), + (BL8_NOP tglobaladdr:$dst)>; + +def : Pat<(PPCcall (i64 texternalsym:$dst)), + (BL8 texternalsym:$dst)>; +def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), + (BL8_NOP texternalsym:$dst)>; + +// Atomic operations +// FIXME: some of these might be used with constant operands. This will result +// in constant materialization instructions that may be redundant. We currently +// clean this up in PPCMIPeephole with calls to +// PPCInstrInfo::convertToImmediateForm() but we should probably not emit them +// in the first place. +let usesCustomInserter = 1 in { + let Defs = [CR0] in { + def ATOMIC_LOAD_ADD_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_ADD_I64", + [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_SUB_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_SUB_I64", + [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_OR_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_OR_I64", + [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_XOR_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_XOR_I64", + [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_AND_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_AND_i64", + [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_NAND_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64", + [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_MIN_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MIN_I64", + [(set i64:$dst, (atomic_load_min_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_MAX_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MAX_I64", + [(set i64:$dst, (atomic_load_max_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_UMIN_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMIN_I64", + [(set i64:$dst, (atomic_load_umin_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_UMAX_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMAX_I64", + [(set i64:$dst, (atomic_load_umax_64 xoaddr:$ptr, i64:$incr))]>; + + def ATOMIC_CMP_SWAP_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64", + [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>; + + def ATOMIC_SWAP_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64", + [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>; + } +} + +// Instructions to support atomic operations +let mayLoad = 1, hasSideEffects = 0 in { +def LDARX : XForm_1_memOp<31, 84, (outs g8rc:$rD), (ins memrr:$ptr), + "ldarx $rD, $ptr", IIC_LdStLDARX, []>; + +// Instruction to support lock versions of atomics +// (EH=1 - see Power ISA 2.07 Book II 4.4.2) +def LDARXL : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr), + "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isDOT; + +let hasExtraDefRegAllocReq = 1 in +def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$rD), (ins g8rc:$rA, u5imm:$FC), + "ldat $rD, $rA, $FC", IIC_LdStLoad>, isPPC64, + Requires<[IsISA3_0]>; +} + +let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in +def STDCX : XForm_1_memOp<31, 214, (outs), (ins g8rc:$rS, memrr:$dst), + "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT; + +let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in +def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC), + "stdat $rS, $rA, $FC", IIC_LdStStore>, isPPC64, + Requires<[IsISA3_0]>; + +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNdi8 :Pseudo< (outs), + (ins calltarget:$dst, i32imm:$offset), + "#TC_RETURNd8 $dst $offset", + []>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNai8 :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset), + "#TC_RETURNa8 $func $offset", + [(PPCtc_return (i64 imm:$func), imm:$offset)]>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset), + "#TC_RETURNr8 $dst $offset", + []>; + +let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, + isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in +def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, + []>, + Requires<[In64BitMode]>; + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, + isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in +def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst), + "b $dst", IIC_BrB, + []>; + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, + isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in +def TAILBA8 : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst), + "ba $dst", IIC_BrB, + []>; +} // Interpretation64Bit + +def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm), + (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>; + +def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm), + (TCRETURNdi8 texternalsym:$dst, imm:$imm)>; + +def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), + (TCRETURNri8 CTRRC8:$dst, imm:$imm)>; + + +// 64-bit CR instructions +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { +let hasSideEffects = 0 in { +// mtocrf's input needs to be prepared by shifting by an amount dependent +// on the cr register selected. Thus, post-ra anti-dep breaking must not +// later change that register assignment. +let hasExtraDefRegAllocReq = 1 in { +def MTOCRF8: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins g8rc:$ST), + "mtocrf $FXM, $ST", IIC_BrMCRX>, + PPC970_DGroup_First, PPC970_Unit_CRU; + +// Similarly to mtocrf, the mask for mtcrf must be prepared in a way that +// is dependent on the cr fields being set. +def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS), + "mtcrf $FXM, $rS", IIC_BrMCRX>, + PPC970_MicroCode, PPC970_Unit_CRU; +} // hasExtraDefRegAllocReq = 1 + +// mfocrf's input needs to be prepared by shifting by an amount dependent +// on the cr register selected. Thus, post-ra anti-dep breaking must not +// later change that register assignment. +let hasExtraSrcRegAllocReq = 1 in { +def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM), + "mfocrf $rT, $FXM", IIC_SprMFCRF>, + PPC970_DGroup_First, PPC970_Unit_CRU; + +// Similarly to mfocrf, the mask for mfcrf must be prepared in a way that +// is dependent on the cr fields being copied. +def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins), + "mfcr $rT", IIC_SprMFCR>, + PPC970_MicroCode, PPC970_Unit_CRU; +} // hasExtraSrcRegAllocReq = 1 +} // hasSideEffects = 0 + +let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { + let Defs = [CTR8] in + def EH_SjLj_SetJmp64 : Pseudo<(outs gprc:$dst), (ins memr:$buf), + "#EH_SJLJ_SETJMP64", + [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, + Requires<[In64BitMode]>; + let isTerminator = 1 in + def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf), + "#EH_SJLJ_LONGJMP64", + [(PPCeh_sjlj_longjmp addr:$buf)]>, + Requires<[In64BitMode]>; +} + +def MFSPR8 : XFXForm_1<31, 339, (outs g8rc:$RT), (ins i32imm:$SPR), + "mfspr $RT, $SPR", IIC_SprMFSPR>; +def MTSPR8 : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, g8rc:$RT), + "mtspr $SPR, $RT", IIC_SprMTSPR>; + + +//===----------------------------------------------------------------------===// +// 64-bit SPR manipulation instrs. + +let Uses = [CTR8] in { +def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs g8rc:$rT), (ins), + "mfctr $rT", IIC_SprMFSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} +let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in { +def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS), + "mtctr $rS", IIC_SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} +let hasSideEffects = 1, Defs = [CTR8] in { +let Pattern = [(int_ppc_mtctr i64:$rS)] in +def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS), + "mtctr $rS", IIC_SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +let Pattern = [(set i64:$rT, readcyclecounter)] in +def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins), + "mfspr $rT, 268", IIC_SprMFTB>, + PPC970_DGroup_First, PPC970_Unit_FXU; +// Note that encoding mftb using mfspr is now the preferred form, +// and has been since at least ISA v2.03. The mftb instruction has +// now been phased out. Using mfspr, however, is known not to work on +// the POWER3. + +let Defs = [X1], Uses = [X1] in +def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8", + [(set i64:$result, + (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>; +def DYNAREAOFFSET8 : Pseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8", + [(set i64:$result, (PPCdynareaoffset iaddr:$fpsi))]>; + +let Defs = [LR8] in { +def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS), + "mtlr $rS", IIC_SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} +let Uses = [LR8] in { +def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins), + "mflr $rT", IIC_SprMFSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} +} // Interpretation64Bit + +//===----------------------------------------------------------------------===// +// Fixed point instructions. +// + +let PPC970_Unit = 1 in { // FXU Operations. +let Interpretation64Bit = 1 in { +let hasSideEffects = 0 in { +let isCodeGenOnly = 1 in { + +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { +def LI8 : DForm_2_r0<14, (outs g8rc:$rD), (ins s16imm64:$imm), + "li $rD, $imm", IIC_IntSimple, + [(set i64:$rD, imm64SExt16:$imm)]>; +def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins s17imm64:$imm), + "lis $rD, $imm", IIC_IntSimple, + [(set i64:$rD, imm16ShiftedSExt:$imm)]>; +} + +// Logical ops. +let isCommutable = 1 in { +defm NAND8: XForm_6r<31, 476, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "nand", "$rA, $rS, $rB", IIC_IntSimple, + [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>; +defm AND8 : XForm_6r<31, 28, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "and", "$rA, $rS, $rB", IIC_IntSimple, + [(set i64:$rA, (and i64:$rS, i64:$rB))]>; +} // isCommutable +defm ANDC8: XForm_6r<31, 60, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "andc", "$rA, $rS, $rB", IIC_IntSimple, + [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>; +let isCommutable = 1 in { +defm OR8 : XForm_6r<31, 444, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "or", "$rA, $rS, $rB", IIC_IntSimple, + [(set i64:$rA, (or i64:$rS, i64:$rB))]>; +defm NOR8 : XForm_6r<31, 124, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "nor", "$rA, $rS, $rB", IIC_IntSimple, + [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>; +} // isCommutable +defm ORC8 : XForm_6r<31, 412, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "orc", "$rA, $rS, $rB", IIC_IntSimple, + [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>; +let isCommutable = 1 in { +defm EQV8 : XForm_6r<31, 284, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "eqv", "$rA, $rS, $rB", IIC_IntSimple, + [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>; +defm XOR8 : XForm_6r<31, 316, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "xor", "$rA, $rS, $rB", IIC_IntSimple, + [(set i64:$rA, (xor i64:$rS, i64:$rB))]>; +} // let isCommutable = 1 + +// Logical ops with immediate. +let Defs = [CR0] in { +def ANDIo8 : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "andi. $dst, $src1, $src2", IIC_IntGeneral, + [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>, + isDOT; +def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "andis. $dst, $src1, $src2", IIC_IntGeneral, + [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>, + isDOT; +} +def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "ori $dst, $src1, $src2", IIC_IntSimple, + [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>; +def ORIS8 : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "oris $dst, $src1, $src2", IIC_IntSimple, + [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>; +def XORI8 : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "xori $dst, $src1, $src2", IIC_IntSimple, + [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>; +def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "xoris $dst, $src1, $src2", IIC_IntSimple, + [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>; + +let isCommutable = 1 in +defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "add", "$rT, $rA, $rB", IIC_IntSimple, + [(set i64:$rT, (add i64:$rA, i64:$rB))]>; +// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the +// initial-exec thread-local storage model. We need to forbid r0 here - +// while it works for add just fine, the linker can relax this to local-exec +// addi, which won't work for r0. +def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc_nox0:$rA, tlsreg:$rB), + "add $rT, $rA, $rB", IIC_IntSimple, + [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>; +let mayLoad = 1 in { +def LBZXTLS : XForm_1<31, 87, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "lbzx $rD, $rA, $rB", IIC_LdStLoad, []>; +def LHZXTLS : XForm_1<31, 279, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "lhzx $rD, $rA, $rB", IIC_LdStLoad, []>; +def LWZXTLS : XForm_1<31, 23, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "lwzx $rD, $rA, $rB", IIC_LdStLoad, []>; +def LDXTLS : XForm_1<31, 21, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "ldx $rD, $rA, $rB", IIC_LdStLD, []>, isPPC64; +def LBZXTLS_32 : XForm_1<31, 87, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "lbzx $rD, $rA, $rB", IIC_LdStLoad, []>; +def LHZXTLS_32 : XForm_1<31, 279, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "lhzx $rD, $rA, $rB", IIC_LdStLoad, []>; +def LWZXTLS_32 : XForm_1<31, 23, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "lwzx $rD, $rA, $rB", IIC_LdStLoad, []>; + +} + +let mayStore = 1 in { +def STBXTLS : XForm_8<31, 215, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "stbx $rS, $rA, $rB", IIC_LdStStore, []>, + PPC970_DGroup_Cracked; +def STHXTLS : XForm_8<31, 407, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "sthx $rS, $rA, $rB", IIC_LdStStore, []>, + PPC970_DGroup_Cracked; +def STWXTLS : XForm_8<31, 151, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "stwx $rS, $rA, $rB", IIC_LdStStore, []>, + PPC970_DGroup_Cracked; +def STDXTLS : XForm_8<31, 149, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "stdx $rS, $rA, $rB", IIC_LdStSTD, []>, isPPC64, + PPC970_DGroup_Cracked; +def STBXTLS_32 : XForm_8<31, 215, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "stbx $rS, $rA, $rB", IIC_LdStStore, []>, + PPC970_DGroup_Cracked; +def STHXTLS_32 : XForm_8<31, 407, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "sthx $rS, $rA, $rB", IIC_LdStStore, []>, + PPC970_DGroup_Cracked; +def STWXTLS_32 : XForm_8<31, 151, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "stwx $rS, $rA, $rB", IIC_LdStStore, []>, + PPC970_DGroup_Cracked; + +} + +let isCommutable = 1 in +defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "addc", "$rT, $rA, $rB", IIC_IntGeneral, + [(set i64:$rT, (addc i64:$rA, i64:$rB))]>, + PPC970_DGroup_Cracked; + +let Defs = [CARRY] in +def ADDIC8 : DForm_2<12, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), + "addic $rD, $rA, $imm", IIC_IntGeneral, + [(set i64:$rD, (addc i64:$rA, imm64SExt16:$imm))]>; +def ADDI8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$imm), + "addi $rD, $rA, $imm", IIC_IntSimple, + [(set i64:$rD, (add i64:$rA, imm64SExt16:$imm))]>; +def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s17imm64:$imm), + "addis $rD, $rA, $imm", IIC_IntSimple, + [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>; + +let Defs = [CARRY] in { +def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), + "subfic $rD, $rA, $imm", IIC_IntGeneral, + [(set i64:$rD, (subc imm64SExt16:$imm, i64:$rA))]>; +} +defm SUBFC8 : XOForm_1rc<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "subfc", "$rT, $rA, $rB", IIC_IntGeneral, + [(set i64:$rT, (subc i64:$rB, i64:$rA))]>, + PPC970_DGroup_Cracked; +defm SUBF8 : XOForm_1r<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "subf", "$rT, $rA, $rB", IIC_IntGeneral, + [(set i64:$rT, (sub i64:$rB, i64:$rA))]>; +defm NEG8 : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA), + "neg", "$rT, $rA", IIC_IntSimple, + [(set i64:$rT, (ineg i64:$rA))]>; +let Uses = [CARRY] in { +let isCommutable = 1 in +defm ADDE8 : XOForm_1rc<31, 138, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "adde", "$rT, $rA, $rB", IIC_IntGeneral, + [(set i64:$rT, (adde i64:$rA, i64:$rB))]>; +defm ADDME8 : XOForm_3rc<31, 234, 0, (outs g8rc:$rT), (ins g8rc:$rA), + "addme", "$rT, $rA", IIC_IntGeneral, + [(set i64:$rT, (adde i64:$rA, -1))]>; +defm ADDZE8 : XOForm_3rc<31, 202, 0, (outs g8rc:$rT), (ins g8rc:$rA), + "addze", "$rT, $rA", IIC_IntGeneral, + [(set i64:$rT, (adde i64:$rA, 0))]>; +defm SUBFE8 : XOForm_1rc<31, 136, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "subfe", "$rT, $rA, $rB", IIC_IntGeneral, + [(set i64:$rT, (sube i64:$rB, i64:$rA))]>; +defm SUBFME8 : XOForm_3rc<31, 232, 0, (outs g8rc:$rT), (ins g8rc:$rA), + "subfme", "$rT, $rA", IIC_IntGeneral, + [(set i64:$rT, (sube -1, i64:$rA))]>; +defm SUBFZE8 : XOForm_3rc<31, 200, 0, (outs g8rc:$rT), (ins g8rc:$rA), + "subfze", "$rT, $rA", IIC_IntGeneral, + [(set i64:$rT, (sube 0, i64:$rA))]>; +} +} // isCodeGenOnly + +// FIXME: Duplicating this for the asm parser should be unnecessary, but the +// previous definition must be marked as CodeGen only to prevent decoding +// conflicts. +let isAsmParserOnly = 1 in { +def ADD8TLS_ : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB), + "add $rT, $rA, $rB", IIC_IntSimple, []>; + +let mayLoad = 1 in { +def LBZXTLS_ : XForm_1<31, 87, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "lbzx $rD, $rA, $rB", IIC_LdStLoad, []>; +def LHZXTLS_ : XForm_1<31, 279, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "lhzx $rD, $rA, $rB", IIC_LdStLoad, []>; +def LWZXTLS_ : XForm_1<31, 23, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "lwzx $rD, $rA, $rB", IIC_LdStLoad, []>; +def LDXTLS_ : XForm_1<31, 21, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB), + "ldx $rD, $rA, $rB", IIC_LdStLD, []>, isPPC64; +} + +let mayStore = 1 in { +def STBXTLS_ : XForm_8<31, 215, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "stbx $rS, $rA, $rB", IIC_LdStStore, []>, + PPC970_DGroup_Cracked; +def STHXTLS_ : XForm_8<31, 407, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "sthx $rS, $rA, $rB", IIC_LdStStore, []>, + PPC970_DGroup_Cracked; +def STWXTLS_ : XForm_8<31, 151, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "stwx $rS, $rA, $rB", IIC_LdStStore, []>, + PPC970_DGroup_Cracked; +def STDXTLS_ : XForm_8<31, 149, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB), + "stdx $rS, $rA, $rB", IIC_LdStSTD, []>, isPPC64, + PPC970_DGroup_Cracked; +} +} + +let isCommutable = 1 in { +defm MULHD : XOForm_1r<31, 73, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "mulhd", "$rT, $rA, $rB", IIC_IntMulHW, + [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>; +defm MULHDU : XOForm_1r<31, 9, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "mulhdu", "$rT, $rA, $rB", IIC_IntMulHWU, + [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>; +} // isCommutable +} +} // Interpretation64Bit + +let isCompare = 1, hasSideEffects = 0 in { + def CMPD : XForm_16_ext<31, 0, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB), + "cmpd $crD, $rA, $rB", IIC_IntCompare>, isPPC64; + def CMPLD : XForm_16_ext<31, 32, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB), + "cmpld $crD, $rA, $rB", IIC_IntCompare>, isPPC64; + def CMPDI : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm64:$imm), + "cmpdi $crD, $rA, $imm", IIC_IntCompare>, isPPC64; + def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "cmpldi $dst, $src1, $src2", + IIC_IntCompare>, isPPC64; + let Interpretation64Bit = 1, isCodeGenOnly = 1 in + def CMPRB8 : X_BF3_L1_RS5_RS5<31, 192, (outs crbitrc:$BF), + (ins u1imm:$L, g8rc:$rA, g8rc:$rB), + "cmprb $BF, $L, $rA, $rB", IIC_IntCompare, []>, + Requires<[IsISA3_0]>; + def CMPEQB : X_BF3_RS5_RS5<31, 224, (outs crbitrc:$BF), + (ins g8rc:$rA, g8rc:$rB), "cmpeqb $BF, $rA, $rB", + IIC_IntCompare, []>, Requires<[IsISA3_0]>; +} + +let hasSideEffects = 0 in { +defm SLD : XForm_6r<31, 27, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB), + "sld", "$rA, $rS, $rB", IIC_IntRotateD, + [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64; +defm SRD : XForm_6r<31, 539, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB), + "srd", "$rA, $rS, $rB", IIC_IntRotateD, + [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64; +defm SRAD : XForm_6rc<31, 794, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB), + "srad", "$rA, $rS, $rB", IIC_IntRotateD, + [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64; + +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { +defm CNTLZW8 : XForm_11r<31, 26, (outs g8rc:$rA), (ins g8rc:$rS), + "cntlzw", "$rA, $rS", IIC_IntGeneral, []>; +defm CNTTZW8 : XForm_11r<31, 538, (outs g8rc:$rA), (ins g8rc:$rS), + "cnttzw", "$rA, $rS", IIC_IntGeneral, []>, + Requires<[IsISA3_0]>; + +defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS), + "extsb", "$rA, $rS", IIC_IntSimple, + [(set i64:$rA, (sext_inreg i64:$rS, i8))]>; +defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS), + "extsh", "$rA, $rS", IIC_IntSimple, + [(set i64:$rA, (sext_inreg i64:$rS, i16))]>; + +defm SLW8 : XForm_6r<31, 24, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "slw", "$rA, $rS, $rB", IIC_IntGeneral, []>; +defm SRW8 : XForm_6r<31, 536, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "srw", "$rA, $rS, $rB", IIC_IntGeneral, []>; +} // Interpretation64Bit + +// For fast-isel: +let isCodeGenOnly = 1 in { +def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS), + "extsb $rA, $rS", IIC_IntSimple, []>, isPPC64; +def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS), + "extsh $rA, $rS", IIC_IntSimple, []>, isPPC64; +} // isCodeGenOnly for fast-isel + +defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS), + "extsw", "$rA, $rS", IIC_IntSimple, + [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64; +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$rA), (ins gprc:$rS), + "extsw", "$rA, $rS", IIC_IntSimple, + [(set i64:$rA, (sext i32:$rS))]>, isPPC64; +let isCodeGenOnly = 1 in +def EXTSW_32 : XForm_11<31, 986, (outs gprc:$rA), (ins gprc:$rS), + "extsw $rA, $rS", IIC_IntSimple, + []>, isPPC64; + +defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH), + "sradi", "$rA, $rS, $SH", IIC_IntRotateDI, + [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64; + +defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH), + "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI, + []>, isPPC64; + +// For fast-isel: +let isCodeGenOnly = 1, Defs = [CARRY] in +def SRADI_32 : XSForm_1<31, 413, (outs gprc:$rA), (ins gprc:$rS, u6imm:$SH), + "sradi $rA, $rS, $SH", IIC_IntRotateDI, []>, isPPC64; + +defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS), + "cntlzd", "$rA, $rS", IIC_IntGeneral, + [(set i64:$rA, (ctlz i64:$rS))]>; +defm CNTTZD : XForm_11r<31, 570, (outs g8rc:$rA), (ins g8rc:$rS), + "cnttzd", "$rA, $rS", IIC_IntGeneral, + [(set i64:$rA, (cttz i64:$rS))]>, Requires<[IsISA3_0]>; +def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS), + "popcntd $rA, $rS", IIC_IntGeneral, + [(set i64:$rA, (ctpop i64:$rS))]>; +def BPERMD : XForm_6<31, 252, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "bpermd $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, (int_ppc_bpermd g8rc:$rS, g8rc:$rB))]>, + isPPC64, Requires<[HasBPERMD]>; + +let isCodeGenOnly = 1, isCommutable = 1 in +def CMPB8 : XForm_6<31, 508, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "cmpb $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, (PPCcmpb i64:$rS, i64:$rB))]>; + +// popcntw also does a population count on the high 32 bits (storing the +// results in the high 32-bits of the output). We'll ignore that here (which is +// safe because we never separately use the high part of the 64-bit registers). +def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS), + "popcntw $rA, $rS", IIC_IntGeneral, + [(set i32:$rA, (ctpop i32:$rS))]>; + +def POPCNTB : XForm_11<31, 122, (outs gprc:$rA), (ins gprc:$rS), + "popcntb $rA, $rS", IIC_IntGeneral, []>; + +defm DIVD : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divd", "$rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64; +defm DIVDU : XOForm_1rcr<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divdu", "$rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64; +def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divde $rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>, + isPPC64, Requires<[HasExtDiv]>; + +let Predicates = [IsISA3_0] in { +def MADDHD : VAForm_1a<48, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC), + "maddhd $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64; +def MADDHDU : VAForm_1a<49, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC), + "maddhdu $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64; +def MADDLD : VAForm_1a<51, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC), + "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64; +def SETB : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA), + "setb $RT, $BFA", IIC_IntGeneral>, isPPC64; +def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins i32imm:$L), + "darn $RT, $L", IIC_LdStLD>, isPPC64; +def ADDPCIS : DXForm<19, 2, (outs g8rc:$RT), (ins i32imm:$D), + "addpcis $RT, $D", IIC_BrB, []>, isPPC64; +def MODSD : XForm_8<31, 777, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "modsd $rT, $rA, $rB", IIC_IntDivW, + [(set i64:$rT, (srem i64:$rA, i64:$rB))]>; +def MODUD : XForm_8<31, 265, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "modud $rT, $rA, $rB", IIC_IntDivW, + [(set i64:$rT, (urem i64:$rA, i64:$rB))]>; +} + +let Defs = [CR0] in +def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divde. $rT, $rA, $rB", IIC_IntDivD, + []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, + isPPC64, Requires<[HasExtDiv]>; +def DIVDEU : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divdeu $rT, $rA, $rB", IIC_IntDivD, + [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>, + isPPC64, Requires<[HasExtDiv]>; +let Defs = [CR0] in +def DIVDEUo : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divdeu. $rT, $rA, $rB", IIC_IntDivD, + []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, + isPPC64, Requires<[HasExtDiv]>; +let isCommutable = 1 in +defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "mulld", "$rT, $rA, $rB", IIC_IntMulHD, + [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64; +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +def MULLI8 : DForm_2<7, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), + "mulli $rD, $rA, $imm", IIC_IntMulLI, + [(set i64:$rD, (mul i64:$rA, imm64SExt16:$imm))]>; +} + +let hasSideEffects = 0 in { +defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA), + (ins g8rc:$rSi, g8rc:$rS, u6imm:$SH, u6imm:$MBE), + "rldimi", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, + []>, isPPC64, RegConstraint<"$rSi = $rA">, + NoEncode<"$rSi">; + +// Rotate instructions. +defm RLDCL : MDSForm_1r<30, 8, + (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE), + "rldcl", "$rA, $rS, $rB, $MBE", IIC_IntRotateD, + []>, isPPC64; +defm RLDCR : MDSForm_1r<30, 9, + (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE), + "rldcr", "$rA, $rS, $rB, $MBE", IIC_IntRotateD, + []>, isPPC64; +defm RLDICL : MDForm_1r<30, 0, + (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), + "rldicl", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, + []>, isPPC64; +// For fast-isel: +let isCodeGenOnly = 1 in +def RLDICL_32_64 : MDForm_1<30, 0, + (outs g8rc:$rA), + (ins gprc:$rS, u6imm:$SH, u6imm:$MBE), + "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI, + []>, isPPC64; +// End fast-isel. +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +defm RLDICL_32 : MDForm_1r<30, 0, + (outs gprc:$rA), + (ins gprc:$rS, u6imm:$SH, u6imm:$MBE), + "rldicl", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, + []>, isPPC64; +defm RLDICR : MDForm_1r<30, 1, + (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), + "rldicr", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, + []>, isPPC64; +let isCodeGenOnly = 1 in +def RLDICR_32 : MDForm_1<30, 1, + (outs gprc:$rA), (ins gprc:$rS, u6imm:$SH, u6imm:$MBE), + "rldicr $rA, $rS, $SH, $MBE", IIC_IntRotateDI, + []>, isPPC64; +defm RLDIC : MDForm_1r<30, 2, + (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), + "rldic", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, + []>, isPPC64; + +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { +defm RLWINM8 : MForm_2r<21, (outs g8rc:$rA), + (ins g8rc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), + "rlwinm", "$rA, $rS, $SH, $MB, $ME", IIC_IntGeneral, + []>; + +defm RLWNM8 : MForm_2r<23, (outs g8rc:$rA), + (ins g8rc:$rS, g8rc:$rB, u5imm:$MB, u5imm:$ME), + "rlwnm", "$rA, $rS, $rB, $MB, $ME", IIC_IntGeneral, + []>; + +// RLWIMI can be commuted if the rotate amount is zero. +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +defm RLWIMI8 : MForm_2r<20, (outs g8rc:$rA), + (ins g8rc:$rSi, g8rc:$rS, u5imm:$SH, u5imm:$MB, + u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", + IIC_IntRotate, []>, PPC970_DGroup_Cracked, + RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">; + +let isSelect = 1 in +def ISEL8 : AForm_4<31, 15, + (outs g8rc:$rT), (ins g8rc_nox0:$rA, g8rc:$rB, crbitrc:$cond), + "isel $rT, $rA, $rB, $cond", IIC_IntISEL, + []>; +} // Interpretation64Bit +} // hasSideEffects = 0 +} // End FXU Operations. + + +//===----------------------------------------------------------------------===// +// Load/Store instructions. +// + + +// Sign extending loads. +let PPC970_Unit = 2 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src), + "lha $rD, $src", IIC_LdStLHA, + [(set i64:$rD, (sextloadi16 iaddr:$src))]>, + PPC970_DGroup_Cracked; +def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src), + "lwa $rD, $src", IIC_LdStLWA, + [(set i64:$rD, + (aligned4sextloadi32 ixaddr:$src))]>, isPPC64, + PPC970_DGroup_Cracked; +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src), + "lhax $rD, $src", IIC_LdStLHA, + [(set i64:$rD, (sextloadi16 xaddr:$src))]>, + PPC970_DGroup_Cracked; +def LWAX : XForm_1_memOp<31, 341, (outs g8rc:$rD), (ins memrr:$src), + "lwax $rD, $src", IIC_LdStLHA, + [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64, + PPC970_DGroup_Cracked; +// For fast-isel: +let isCodeGenOnly = 1, mayLoad = 1 in { +def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src), + "lwa $rD, $src", IIC_LdStLWA, []>, isPPC64, + PPC970_DGroup_Cracked; +def LWAX_32 : XForm_1_memOp<31, 341, (outs gprc:$rD), (ins memrr:$src), + "lwax $rD, $src", IIC_LdStLHA, []>, isPPC64, + PPC970_DGroup_Cracked; +} // end fast-isel isCodeGenOnly + +// Update forms. +let mayLoad = 1, hasSideEffects = 0 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +def LHAU8 : DForm_1<43, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), + (ins memri:$addr), + "lhau $rD, $addr", IIC_LdStLHAU, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; +// NO LWAU! + +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +def LHAUX8 : XForm_1_memOp<31, 375, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), + (ins memrr:$addr), + "lhaux $rD, $addr", IIC_LdStLHAUX, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, + NoEncode<"$ea_result">; +def LWAUX : XForm_1_memOp<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), + (ins memrr:$addr), + "lwaux $rD, $addr", IIC_LdStLHAUX, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, + NoEncode<"$ea_result">, isPPC64; +} +} + +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { +// Zero extending loads. +let PPC970_Unit = 2 in { +def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src), + "lbz $rD, $src", IIC_LdStLoad, + [(set i64:$rD, (zextloadi8 iaddr:$src))]>; +def LHZ8 : DForm_1<40, (outs g8rc:$rD), (ins memri:$src), + "lhz $rD, $src", IIC_LdStLoad, + [(set i64:$rD, (zextloadi16 iaddr:$src))]>; +def LWZ8 : DForm_1<32, (outs g8rc:$rD), (ins memri:$src), + "lwz $rD, $src", IIC_LdStLoad, + [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; + +def LBZX8 : XForm_1_memOp<31, 87, (outs g8rc:$rD), (ins memrr:$src), + "lbzx $rD, $src", IIC_LdStLoad, + [(set i64:$rD, (zextloadi8 xaddr:$src))]>; +def LHZX8 : XForm_1_memOp<31, 279, (outs g8rc:$rD), (ins memrr:$src), + "lhzx $rD, $src", IIC_LdStLoad, + [(set i64:$rD, (zextloadi16 xaddr:$src))]>; +def LWZX8 : XForm_1_memOp<31, 23, (outs g8rc:$rD), (ins memrr:$src), + "lwzx $rD, $src", IIC_LdStLoad, + [(set i64:$rD, (zextloadi32 xaddr:$src))]>; + + +// Update forms. +let mayLoad = 1, hasSideEffects = 0 in { +def LBZU8 : DForm_1<35, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), + (ins memri:$addr), + "lbzu $rD, $addr", IIC_LdStLoadUpd, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; +def LHZU8 : DForm_1<41, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), + (ins memri:$addr), + "lhzu $rD, $addr", IIC_LdStLoadUpd, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; +def LWZU8 : DForm_1<33, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), + (ins memri:$addr), + "lwzu $rD, $addr", IIC_LdStLoadUpd, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; + +def LBZUX8 : XForm_1_memOp<31, 119, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), + (ins memrr:$addr), + "lbzux $rD, $addr", IIC_LdStLoadUpdX, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, + NoEncode<"$ea_result">; +def LHZUX8 : XForm_1_memOp<31, 311, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), + (ins memrr:$addr), + "lhzux $rD, $addr", IIC_LdStLoadUpdX, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, + NoEncode<"$ea_result">; +def LWZUX8 : XForm_1_memOp<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), + (ins memrr:$addr), + "lwzux $rD, $addr", IIC_LdStLoadUpdX, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, + NoEncode<"$ea_result">; +} +} +} // Interpretation64Bit + + +// Full 8-byte loads. +let PPC970_Unit = 2 in { +def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src), + "ld $rD, $src", IIC_LdStLD, + [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64; +// The following four definitions are selected for small code model only. +// Otherwise, we need to create two instructions to form a 32-bit offset, +// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select(). +def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), + "#LDtoc", + [(set i64:$rD, + (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64; +def LDtocJTI: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), + "#LDtocJTI", + [(set i64:$rD, + (PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64; +def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), + "#LDtocCPT", + [(set i64:$rD, + (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64; +def LDtocBA: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), + "#LDtocCPT", + [(set i64:$rD, + (PPCtoc_entry tblockaddress:$disp, i64:$reg))]>, isPPC64; + +def LDX : XForm_1_memOp<31, 21, (outs g8rc:$rD), (ins memrr:$src), + "ldx $rD, $src", IIC_LdStLD, + [(set i64:$rD, (load xaddr:$src))]>, isPPC64; +def LDBRX : XForm_1_memOp<31, 532, (outs g8rc:$rD), (ins memrr:$src), + "ldbrx $rD, $src", IIC_LdStLoad, + [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64; + +let mayLoad = 1, hasSideEffects = 0, isCodeGenOnly = 1 in { +def LHBRX8 : XForm_1_memOp<31, 790, (outs g8rc:$rD), (ins memrr:$src), + "lhbrx $rD, $src", IIC_LdStLoad, []>; +def LWBRX8 : XForm_1_memOp<31, 534, (outs g8rc:$rD), (ins memrr:$src), + "lwbrx $rD, $src", IIC_LdStLoad, []>; +} + +let mayLoad = 1, hasSideEffects = 0 in { +def LDU : DSForm_1<58, 1, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), + (ins memrix:$addr), + "ldu $rD, $addr", IIC_LdStLDU, + []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, + NoEncode<"$ea_result">; + +def LDUX : XForm_1_memOp<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), + (ins memrr:$addr), + "ldux $rD, $addr", IIC_LdStLDUX, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, + NoEncode<"$ea_result">, isPPC64; + +def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src), + "ldmx $rD, $src", IIC_LdStLD, []>, isPPC64, + Requires<[IsISA3_0]>; +} +} + +// Support for medium and large code model. +let hasSideEffects = 0 in { +let isReMaterializable = 1 in { +def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), + "#ADDIStocHA", []>, isPPC64; +def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), + "#ADDItocL", []>, isPPC64; +} +let mayLoad = 1 in +def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg), + "#LDtocL", []>, isPPC64; +} + +// Support for thread-local storage. +def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), + "#ADDISgotTprelHA", + [(set i64:$rD, + (PPCaddisGotTprelHA i64:$reg, + tglobaltlsaddr:$disp))]>, + isPPC64; +def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg), + "#LDgotTprelL", + [(set i64:$rD, + (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>, + isPPC64; + +let isPseudo = 1, Defs = [CR7], Itinerary = IIC_LdStSync in +def CFENCE8 : Pseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>; + +def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g), + (ADD8TLS $in, tglobaltlsaddr:$g)>; +def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), + "#ADDIStlsgdHA", + [(set i64:$rD, + (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), + "#ADDItlsgdL", + [(set i64:$rD, + (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +// LR8 is a true define, while the rest of the Defs are clobbers. X3 is +// explicitly defined when this op is created, so not mentioned here. +// This is lowered to BL8_NOP_TLS by the assembly printer, so the size must be +// correct because the branch select pass is relying on it. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Size = 8, + Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in +def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), + "#GETtlsADDR", + [(set i64:$rD, + (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; +// Combined op for ADDItlsgdL and GETtlsADDR, late expanded. X3 and LR8 +// are true defines while the rest of the Defs are clobbers. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, + Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] + in +def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD), + (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym), + "#ADDItlsgdLADDR", + [(set i64:$rD, + (PPCaddiTlsgdLAddr i64:$reg, + tglobaltlsaddr:$disp, + tglobaltlsaddr:$sym))]>, + isPPC64; +def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), + "#ADDIStlsldHA", + [(set i64:$rD, + (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), + "#ADDItlsldL", + [(set i64:$rD, + (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +// LR8 is a true define, while the rest of the Defs are clobbers. X3 is +// explicitly defined when this op is created, so not mentioned here. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, + Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in +def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), + "#GETtlsldADDR", + [(set i64:$rD, + (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; +// Combined op for ADDItlsldL and GETtlsADDR, late expanded. X3 and LR8 +// are true defines, while the rest of the Defs are clobbers. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, + Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] + in +def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD), + (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym), + "#ADDItlsldLADDR", + [(set i64:$rD, + (PPCaddiTlsldLAddr i64:$reg, + tglobaltlsaddr:$disp, + tglobaltlsaddr:$sym))]>, + isPPC64; +def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), + "#ADDISdtprelHA", + [(set i64:$rD, + (PPCaddisDtprelHA i64:$reg, + tglobaltlsaddr:$disp))]>, + isPPC64; +def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), + "#ADDIdtprelL", + [(set i64:$rD, + (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; + +let PPC970_Unit = 2 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { +// Truncating stores. +def STB8 : DForm_1<38, (outs), (ins g8rc:$rS, memri:$src), + "stb $rS, $src", IIC_LdStStore, + [(truncstorei8 i64:$rS, iaddr:$src)]>; +def STH8 : DForm_1<44, (outs), (ins g8rc:$rS, memri:$src), + "sth $rS, $src", IIC_LdStStore, + [(truncstorei16 i64:$rS, iaddr:$src)]>; +def STW8 : DForm_1<36, (outs), (ins g8rc:$rS, memri:$src), + "stw $rS, $src", IIC_LdStStore, + [(truncstorei32 i64:$rS, iaddr:$src)]>; +def STBX8 : XForm_8_memOp<31, 215, (outs), (ins g8rc:$rS, memrr:$dst), + "stbx $rS, $dst", IIC_LdStStore, + [(truncstorei8 i64:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; +def STHX8 : XForm_8_memOp<31, 407, (outs), (ins g8rc:$rS, memrr:$dst), + "sthx $rS, $dst", IIC_LdStStore, + [(truncstorei16 i64:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; +def STWX8 : XForm_8_memOp<31, 151, (outs), (ins g8rc:$rS, memrr:$dst), + "stwx $rS, $dst", IIC_LdStStore, + [(truncstorei32 i64:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; +} // Interpretation64Bit + +// Normal 8-byte stores. +def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst), + "std $rS, $dst", IIC_LdStSTD, + [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64; +def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst), + "stdx $rS, $dst", IIC_LdStSTD, + [(store i64:$rS, xaddr:$dst)]>, isPPC64, + PPC970_DGroup_Cracked; +def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst), + "stdbrx $rS, $dst", IIC_LdStStore, + [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64, + PPC970_DGroup_Cracked; +} + +// Stores with Update (pre-inc). +let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { +def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), + "stbu $rS, $dst", IIC_LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), + "sthu $rS, $dst", IIC_LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), + "stwu $rS, $dst", IIC_LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; + +def STBUX8: XForm_8_memOp<31, 247, (outs ptr_rc_nor0:$ea_res), + (ins g8rc:$rS, memrr:$dst), + "stbux $rS, $dst", IIC_LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STHUX8: XForm_8_memOp<31, 439, (outs ptr_rc_nor0:$ea_res), + (ins g8rc:$rS, memrr:$dst), + "sthux $rS, $dst", IIC_LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STWUX8: XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res), + (ins g8rc:$rS, memrr:$dst), + "stwux $rS, $dst", IIC_LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +} // Interpretation64Bit + +def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), + (ins g8rc:$rS, memrix:$dst), + "stdu $rS, $dst", IIC_LdStSTDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">, + isPPC64; + +def STDUX : XForm_8_memOp<31, 181, (outs ptr_rc_nor0:$ea_res), + (ins g8rc:$rS, memrr:$dst), + "stdux $rS, $dst", IIC_LdStSTDUX, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">, + PPC970_DGroup_Cracked, isPPC64; +} + +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STBU8 $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STHU8 $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STWU8 $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STDU $rS, iaddroff:$ptroff, $ptrreg)>; + +def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STBUX8 $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STHUX8 $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STWUX8 $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STDUX $rS, $ptrreg, $ptroff)>; + + +//===----------------------------------------------------------------------===// +// Floating point instructions. +// + + +let PPC970_Unit = 3, hasSideEffects = 0, + Uses = [RM] in { // FPU Operations. +defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB), + "fcfid", "$frD, $frB", IIC_FPGeneral, + [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64; +defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB), + "fctid", "$frD, $frB", IIC_FPGeneral, + []>, isPPC64; +defm FCTIDU : XForm_26r<63, 942, (outs f8rc:$frD), (ins f8rc:$frB), + "fctidu", "$frD, $frB", IIC_FPGeneral, + []>, isPPC64; +defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB), + "fctidz", "$frD, $frB", IIC_FPGeneral, + [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64; + +defm FCFIDU : XForm_26r<63, 974, (outs f8rc:$frD), (ins f8rc:$frB), + "fcfidu", "$frD, $frB", IIC_FPGeneral, + [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64; +defm FCFIDS : XForm_26r<59, 846, (outs f4rc:$frD), (ins f8rc:$frB), + "fcfids", "$frD, $frB", IIC_FPGeneral, + [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64; +defm FCFIDUS : XForm_26r<59, 974, (outs f4rc:$frD), (ins f8rc:$frB), + "fcfidus", "$frD, $frB", IIC_FPGeneral, + [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64; +defm FCTIDUZ : XForm_26r<63, 943, (outs f8rc:$frD), (ins f8rc:$frB), + "fctiduz", "$frD, $frB", IIC_FPGeneral, + [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64; +defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB), + "fctiwuz", "$frD, $frB", IIC_FPGeneral, + [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64; +} + + +//===----------------------------------------------------------------------===// +// Instruction Patterns +// + +// Extensions and truncates to/from 32-bit regs. +def : Pat<(i64 (zext i32:$in)), + (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32), + 0, 32)>; +def : Pat<(i64 (anyext i32:$in)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32)>; +def : Pat<(i32 (trunc i64:$in)), + (EXTRACT_SUBREG $in, sub_32)>; + +// Implement the 'not' operation with the NOR instruction. +// (we could use the default xori pattern, but nor has lower latency on some +// cores (such as the A2)). +def i64not : OutPatFrag<(ops node:$in), + (NOR8 $in, $in)>; +def : Pat<(not i64:$in), + (i64not $in)>; + +// Extending loads with i64 targets. +def : Pat<(zextloadi1 iaddr:$src), + (LBZ8 iaddr:$src)>; +def : Pat<(zextloadi1 xaddr:$src), + (LBZX8 xaddr:$src)>; +def : Pat<(extloadi1 iaddr:$src), + (LBZ8 iaddr:$src)>; +def : Pat<(extloadi1 xaddr:$src), + (LBZX8 xaddr:$src)>; +def : Pat<(extloadi8 iaddr:$src), + (LBZ8 iaddr:$src)>; +def : Pat<(extloadi8 xaddr:$src), + (LBZX8 xaddr:$src)>; +def : Pat<(extloadi16 iaddr:$src), + (LHZ8 iaddr:$src)>; +def : Pat<(extloadi16 xaddr:$src), + (LHZX8 xaddr:$src)>; +def : Pat<(extloadi32 iaddr:$src), + (LWZ8 iaddr:$src)>; +def : Pat<(extloadi32 xaddr:$src), + (LWZX8 xaddr:$src)>; + +// Standard shifts. These are represented separately from the real shifts above +// so that we can distinguish between shifts that allow 6-bit and 7-bit shift +// amounts. +def : Pat<(sra i64:$rS, i32:$rB), + (SRAD $rS, $rB)>; +def : Pat<(srl i64:$rS, i32:$rB), + (SRD $rS, $rB)>; +def : Pat<(shl i64:$rS, i32:$rB), + (SLD $rS, $rB)>; + +// SUBFIC +def : Pat<(sub imm64SExt16:$imm, i64:$in), + (SUBFIC8 $in, imm:$imm)>; + +// SHL/SRL +def : Pat<(shl i64:$in, (i32 imm:$imm)), + (RLDICR $in, imm:$imm, (SHL64 imm:$imm))>; +def : Pat<(srl i64:$in, (i32 imm:$imm)), + (RLDICL $in, (SRL64 imm:$imm), imm:$imm)>; + +// ROTL +def : Pat<(rotl i64:$in, i32:$sh), + (RLDCL $in, $sh, 0)>; +def : Pat<(rotl i64:$in, (i32 imm:$imm)), + (RLDICL $in, imm:$imm, 0)>; + +// Hi and Lo for Darwin Global Addresses. +def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>; +def : Pat<(PPClo tglobaladdr:$in, 0), (LI8 tglobaladdr:$in)>; +def : Pat<(PPChi tconstpool:$in , 0), (LIS8 tconstpool:$in)>; +def : Pat<(PPClo tconstpool:$in , 0), (LI8 tconstpool:$in)>; +def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>; +def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>; +def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>; +def : Pat<(PPClo tblockaddress:$in, 0), (LI8 tblockaddress:$in)>; +def : Pat<(PPChi tglobaltlsaddr:$g, i64:$in), + (ADDIS8 $in, tglobaltlsaddr:$g)>; +def : Pat<(PPClo tglobaltlsaddr:$g, i64:$in), + (ADDI8 $in, tglobaltlsaddr:$g)>; +def : Pat<(add i64:$in, (PPChi tglobaladdr:$g, 0)), + (ADDIS8 $in, tglobaladdr:$g)>; +def : Pat<(add i64:$in, (PPChi tconstpool:$g, 0)), + (ADDIS8 $in, tconstpool:$g)>; +def : Pat<(add i64:$in, (PPChi tjumptable:$g, 0)), + (ADDIS8 $in, tjumptable:$g)>; +def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)), + (ADDIS8 $in, tblockaddress:$g)>; + +// Patterns to match r+r indexed loads and stores for +// addresses without at least 4-byte alignment. +def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)), + (LWAX xoaddr:$src)>; +def : Pat<(i64 (unaligned4load xoaddr:$src)), + (LDX xoaddr:$src)>; +def : Pat<(unaligned4store i64:$rS, xoaddr:$dst), + (STDX $rS, xoaddr:$dst)>; + +// 64-bits atomic loads and stores +def : Pat<(atomic_load_64 ixaddr:$src), (LD memrix:$src)>; +def : Pat<(atomic_load_64 xaddr:$src), (LDX memrr:$src)>; + +def : Pat<(atomic_store_64 ixaddr:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr)>; +def : Pat<(atomic_store_64 xaddr:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>; + +let Predicates = [IsISA3_0] in { + +class X_L1_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, RegisterOperand ty, + InstrItinClass itin, list<dag> pattern> + : X_L1_RS5_RS5<opcode, xo, (outs), (ins ty:$rA, ty:$rB, u1imm:$L), + !strconcat(opc, " $rA, $rB, $L"), itin, pattern>; + +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { +def CP_COPY8 : X_L1_RA5_RB5<31, 774, "copy" , g8rc, IIC_LdStCOPY, []>; +def CP_PASTE8 : X_L1_RA5_RB5<31, 902, "paste" , g8rc, IIC_LdStPASTE, []>; +def CP_PASTE8o : X_L1_RA5_RB5<31, 902, "paste.", g8rc, IIC_LdStPASTE, []>,isDOT; +} + +// SLB Invalidate Entry Global +def SLBIEG : XForm_26<31, 466, (outs), (ins gprc:$RS, gprc:$RB), + "slbieg $RS, $RB", IIC_SprSLBIEG, []>; +// SLB Synchronize +def SLBSYNC : XForm_0<31, 338, (outs), (ins), "slbsync", IIC_SprSLBSYNC, []>; + +} // IsISA3_0 diff --git a/capstone/suite/synctools/tablegen/PPC/PPCInstrAltivec.td b/capstone/suite/synctools/tablegen/PPC/PPCInstrAltivec.td new file mode 100644 index 000000000..24969d7ef --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCInstrAltivec.td @@ -0,0 +1,1507 @@ +//===-- PPCInstrAltivec.td - The PowerPC Altivec Extension -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Altivec extension to the PowerPC instruction set. +// +//===----------------------------------------------------------------------===// + +// *********************************** NOTE *********************************** +// ** For POWER8 Little Endian, the VSX swap optimization relies on knowing ** +// ** which VMX and VSX instructions are lane-sensitive and which are not. ** +// ** A lane-sensitive instruction relies, implicitly or explicitly, on ** +// ** whether lanes are numbered from left to right. An instruction like ** +// ** VADDFP is not lane-sensitive, because each lane of the result vector ** +// ** relies only on the corresponding lane of the source vectors. However, ** +// ** an instruction like VMULESB is lane-sensitive, because "even" and ** +// ** "odd" lanes are different for big-endian and little-endian numbering. ** +// ** ** +// ** When adding new VMX and VSX instructions, please consider whether they ** +// ** are lane-sensitive. If so, they must be added to a switch statement ** +// ** in PPCVSXSwapRemoval::gatherVectorInstructions(). ** +// **************************************************************************** + + +//===----------------------------------------------------------------------===// +// Altivec transformation functions and pattern fragments. +// + +// Since we canonicalize buildvectors to v16i8, all vnots "-1" operands will be +// of that type. +def vnot_ppc : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>; + +def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG); +}]>; +def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG); +}]>; +def vpkudum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUDUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG); +}]>; +def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG); +}]>; +def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG); +}]>; +def vpkudum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUDUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG); +}]>; + +// These fragments are provided for little-endian, where the inputs must be +// swapped for correct semantics. +def vpkuhum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG); +}]>; +def vpkuwum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG); +}]>; +def vpkudum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUDUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG); +}]>; + +def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG); +}]>; +def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG); +}]>; +def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG); +}]>; +def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG); +}]>; +def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG); +}]>; +def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG); +}]>; + + +def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG); +}]>; +def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG); +}]>; +def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG); +}]>; +def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG); +}]>; +def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG); +}]>; +def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG); +}]>; + + +// These fragments are provided for little-endian, where the inputs must be +// swapped for correct semantics. +def vmrglb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG); +}]>; +def vmrglh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG); +}]>; +def vmrglw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG); +}]>; +def vmrghb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG); +}]>; +def vmrghh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG); +}]>; +def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG); +}]>; + + +def vmrgew_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 0, *CurDAG); +}]>; +def vmrgow_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 0, *CurDAG); +}]>; +def vmrgew_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 1, *CurDAG); +}]>; +def vmrgow_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 1, *CurDAG); +}]>; +def vmrgew_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 2, *CurDAG); +}]>; +def vmrgow_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 2, *CurDAG); +}]>; + + + +def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{ + return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG), SDLoc(N)); +}]>; +def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVSLDOIShuffleMask(N, 0, *CurDAG) != -1; +}], VSLDOI_get_imm>; + + +/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into +/// vector_shuffle(X,undef,mask) by the dag combiner. +def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{ + return getI32Imm(PPC::isVSLDOIShuffleMask(N, 1, *CurDAG), SDLoc(N)); +}]>; +def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVSLDOIShuffleMask(N, 1, *CurDAG) != -1; +}], VSLDOI_unary_get_imm>; + + +/// VSLDOI_swapped* - These fragments are provided for little-endian, where +/// the inputs must be swapped for correct semantics. +def VSLDOI_swapped_get_imm : SDNodeXForm<vector_shuffle, [{ + return getI32Imm(PPC::isVSLDOIShuffleMask(N, 2, *CurDAG), SDLoc(N)); +}]>; +def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVSLDOIShuffleMask(N, 2, *CurDAG) != -1; +}], VSLDOI_get_imm>; + + +// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm. +def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{ + return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG), SDLoc(N)); +}]>; +def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1); +}], VSPLTB_get_imm>; +def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{ + return getI32Imm(PPC::getVSPLTImmediate(N, 2, *CurDAG), SDLoc(N)); +}]>; +def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2); +}], VSPLTH_get_imm>; +def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{ + return getI32Imm(PPC::getVSPLTImmediate(N, 4, *CurDAG), SDLoc(N)); +}]>; +def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 4); +}], VSPLTW_get_imm>; + + +// VSPLTISB_get_imm xform function: convert build_vector to VSPLTISB imm. +def VSPLTISB_get_imm : SDNodeXForm<build_vector, [{ + return PPC::get_VSPLTI_elt(N, 1, *CurDAG); +}]>; +def vecspltisb : PatLeaf<(build_vector), [{ + return PPC::get_VSPLTI_elt(N, 1, *CurDAG).getNode() != nullptr; +}], VSPLTISB_get_imm>; + +// VSPLTISH_get_imm xform function: convert build_vector to VSPLTISH imm. +def VSPLTISH_get_imm : SDNodeXForm<build_vector, [{ + return PPC::get_VSPLTI_elt(N, 2, *CurDAG); +}]>; +def vecspltish : PatLeaf<(build_vector), [{ + return PPC::get_VSPLTI_elt(N, 2, *CurDAG).getNode() != nullptr; +}], VSPLTISH_get_imm>; + +// VSPLTISW_get_imm xform function: convert build_vector to VSPLTISW imm. +def VSPLTISW_get_imm : SDNodeXForm<build_vector, [{ + return PPC::get_VSPLTI_elt(N, 4, *CurDAG); +}]>; +def vecspltisw : PatLeaf<(build_vector), [{ + return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != nullptr; +}], VSPLTISW_get_imm>; + +//===----------------------------------------------------------------------===// +// Helpers for defining instructions that directly correspond to intrinsics. + +// VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type. +class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty> + : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), + !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP, + [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>; + +// VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the +// inputs doesn't match the type of the output. +class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType InTy> + : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), + !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP, + [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>; + +// VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two +// input types and an output type. +class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType In1Ty, ValueType In2Ty> + : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), + !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP, + [(set OutTy:$vD, + (IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>; + +// VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type. +class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty> + : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, + [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>; + +// VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the +// inputs doesn't match the type of the output. +class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType InTy> + : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, + [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>; + +// VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two +// input types and an output type. +class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType In1Ty, ValueType In2Ty> + : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, + [(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>; + +// VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type. +class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID> + : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB), + !strconcat(opc, " $vD, $vB"), IIC_VecFP, + [(set v4f32:$vD, (IntID v4f32:$vB))]>; + +// VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the +// inputs doesn't match the type of the output. +class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType InTy> + : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB), + !strconcat(opc, " $vD, $vB"), IIC_VecFP, + [(set OutTy:$vD, (IntID InTy:$vB))]>; + +class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty> + : VXForm_BX<xo, (outs vrrc:$vD), (ins vrrc:$vA), + !strconcat(opc, " $vD, $vA"), IIC_VecFP, + [(set Ty:$vD, (IntID Ty:$vA))]>; + +class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty> + : VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX), + !strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP, + [(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>; + +//===----------------------------------------------------------------------===// +// Instruction Definitions. + +def HasAltivec : Predicate<"PPCSubTarget->hasAltivec()">; +let Predicates = [HasAltivec] in { + +def DSS : DSS_Form<0, 822, (outs), (ins u5imm:$STRM), + "dss $STRM", IIC_LdStLoad /*FIXME*/, [(int_ppc_altivec_dss imm:$STRM)]>, + Deprecated<DeprecatedDST> { + let A = 0; + let B = 0; +} + +def DSSALL : DSS_Form<1, 822, (outs), (ins), + "dssall", IIC_LdStLoad /*FIXME*/, [(int_ppc_altivec_dssall)]>, + Deprecated<DeprecatedDST> { + let STRM = 0; + let A = 0; + let B = 0; +} + +def DST : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB), + "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, + [(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM)]>, + Deprecated<DeprecatedDST>; + +def DSTT : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB), + "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, + [(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM)]>, + Deprecated<DeprecatedDST>; + +def DSTST : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB), + "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, + [(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM)]>, + Deprecated<DeprecatedDST>; + +def DSTSTT : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB), + "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, + [(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM)]>, + Deprecated<DeprecatedDST>; + +let isCodeGenOnly = 1 in { + // The very same instructions as above, but formally matching 64bit registers. + def DST64 : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB), + "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, + [(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM)]>, + Deprecated<DeprecatedDST>; + + def DSTT64 : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB), + "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, + [(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM)]>, + Deprecated<DeprecatedDST>; + + def DSTST64 : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB), + "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, + [(int_ppc_altivec_dstst i64:$rA, i32:$rB, + imm:$STRM)]>, + Deprecated<DeprecatedDST>; + + def DSTSTT64 : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB), + "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, + [(int_ppc_altivec_dststt i64:$rA, i32:$rB, + imm:$STRM)]>, + Deprecated<DeprecatedDST>; +} + +def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins), + "mfvscr $vD", IIC_LdStStore, + [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; +def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB), + "mtvscr $vB", IIC_LdStLoad, + [(int_ppc_altivec_mtvscr v4i32:$vB)]>; + +let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads. +def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src), + "lvebx $vD, $src", IIC_LdStLoad, + [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; +def LVEHX: XForm_1_memOp<31, 39, (outs vrrc:$vD), (ins memrr:$src), + "lvehx $vD, $src", IIC_LdStLoad, + [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; +def LVEWX: XForm_1_memOp<31, 71, (outs vrrc:$vD), (ins memrr:$src), + "lvewx $vD, $src", IIC_LdStLoad, + [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; +def LVX : XForm_1_memOp<31, 103, (outs vrrc:$vD), (ins memrr:$src), + "lvx $vD, $src", IIC_LdStLoad, + [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; +def LVXL : XForm_1_memOp<31, 359, (outs vrrc:$vD), (ins memrr:$src), + "lvxl $vD, $src", IIC_LdStLoad, + [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; +} + +def LVSL : XForm_1_memOp<31, 6, (outs vrrc:$vD), (ins memrr:$src), + "lvsl $vD, $src", IIC_LdStLoad, + [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, + PPC970_Unit_LSU; +def LVSR : XForm_1_memOp<31, 38, (outs vrrc:$vD), (ins memrr:$src), + "lvsr $vD, $src", IIC_LdStLoad, + [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, + PPC970_Unit_LSU; + +let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { // Stores. +def STVEBX: XForm_8_memOp<31, 135, (outs), (ins vrrc:$rS, memrr:$dst), + "stvebx $rS, $dst", IIC_LdStStore, + [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>; +def STVEHX: XForm_8_memOp<31, 167, (outs), (ins vrrc:$rS, memrr:$dst), + "stvehx $rS, $dst", IIC_LdStStore, + [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>; +def STVEWX: XForm_8_memOp<31, 199, (outs), (ins vrrc:$rS, memrr:$dst), + "stvewx $rS, $dst", IIC_LdStStore, + [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>; +def STVX : XForm_8_memOp<31, 231, (outs), (ins vrrc:$rS, memrr:$dst), + "stvx $rS, $dst", IIC_LdStStore, + [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>; +def STVXL : XForm_8_memOp<31, 487, (outs), (ins vrrc:$rS, memrr:$dst), + "stvxl $rS, $dst", IIC_LdStStore, + [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>; +} + +let PPC970_Unit = 5 in { // VALU Operations. +// VA-Form instructions. 3-input AltiVec ops. +let isCommutable = 1 in { +def VMADDFP : VAForm_1<46, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB), + "vmaddfp $vD, $vA, $vC, $vB", IIC_VecFP, + [(set v4f32:$vD, + (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>; + +// FIXME: The fma+fneg pattern won't match because fneg is not legal. +def VNMSUBFP: VAForm_1<47, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB), + "vnmsubfp $vD, $vA, $vC, $vB", IIC_VecFP, + [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC, + (fneg v4f32:$vB))))]>; + +def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>; +def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs, + v8i16>; +def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>; +} // isCommutable + +def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm, + v4i32, v4i32, v16i8>; +def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>; + +// Shuffles. +def VSLDOI : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u4imm:$SH), + "vsldoi $vD, $vA, $vB, $SH", IIC_VecFP, + [(set v16i8:$vD, + (PPCvecshl v16i8:$vA, v16i8:$vB, imm32SExt16:$SH))]>; + +// VX-Form instructions. AltiVec arithmetic ops. +let isCommutable = 1 in { +def VADDFP : VXForm_1<10, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vaddfp $vD, $vA, $vB", IIC_VecFP, + [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>; + +def VADDUBM : VXForm_1<0, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vaddubm $vD, $vA, $vB", IIC_VecGeneral, + [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>; +def VADDUHM : VXForm_1<64, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vadduhm $vD, $vA, $vB", IIC_VecGeneral, + [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>; +def VADDUWM : VXForm_1<128, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vadduwm $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>; + +def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>; +def VADDSBS : VX1_Int_Ty<768, "vaddsbs", int_ppc_altivec_vaddsbs, v16i8>; +def VADDSHS : VX1_Int_Ty<832, "vaddshs", int_ppc_altivec_vaddshs, v8i16>; +def VADDSWS : VX1_Int_Ty<896, "vaddsws", int_ppc_altivec_vaddsws, v4i32>; +def VADDUBS : VX1_Int_Ty<512, "vaddubs", int_ppc_altivec_vaddubs, v16i8>; +def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>; +def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>; +} // isCommutable + +let isCommutable = 1 in +def VAND : VXForm_1<1028, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vand $vD, $vA, $vB", IIC_VecFP, + [(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>; +def VANDC : VXForm_1<1092, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vandc $vD, $vA, $vB", IIC_VecFP, + [(set v4i32:$vD, (and v4i32:$vA, + (vnot_ppc v4i32:$vB)))]>; + +def VCFSX : VXForm_1<842, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), + "vcfsx $vD, $vB, $UIMM", IIC_VecFP, + [(set v4f32:$vD, + (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>; +def VCFUX : VXForm_1<778, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), + "vcfux $vD, $vB, $UIMM", IIC_VecFP, + [(set v4f32:$vD, + (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>; +def VCTSXS : VXForm_1<970, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), + "vctsxs $vD, $vB, $UIMM", IIC_VecFP, + [(set v4i32:$vD, + (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>; +def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), + "vctuxs $vD, $vB, $UIMM", IIC_VecFP, + [(set v4i32:$vD, + (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>; + +// Defines with the UIM field set to 0 for floating-point +// to integer (fp_to_sint/fp_to_uint) conversions and integer +// to floating-point (sint_to_fp/uint_to_fp) conversions. +let isCodeGenOnly = 1, VA = 0 in { +def VCFSX_0 : VXForm_1<842, (outs vrrc:$vD), (ins vrrc:$vB), + "vcfsx $vD, $vB, 0", IIC_VecFP, + [(set v4f32:$vD, + (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>; +def VCTUXS_0 : VXForm_1<906, (outs vrrc:$vD), (ins vrrc:$vB), + "vctuxs $vD, $vB, 0", IIC_VecFP, + [(set v4i32:$vD, + (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>; +def VCFUX_0 : VXForm_1<778, (outs vrrc:$vD), (ins vrrc:$vB), + "vcfux $vD, $vB, 0", IIC_VecFP, + [(set v4f32:$vD, + (int_ppc_altivec_vcfux v4i32:$vB, 0))]>; +def VCTSXS_0 : VXForm_1<970, (outs vrrc:$vD), (ins vrrc:$vB), + "vctsxs $vD, $vB, 0", IIC_VecFP, + [(set v4i32:$vD, + (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>; +} +def VEXPTEFP : VX2_Int_SP<394, "vexptefp", int_ppc_altivec_vexptefp>; +def VLOGEFP : VX2_Int_SP<458, "vlogefp", int_ppc_altivec_vlogefp>; + +let isCommutable = 1 in { +def VAVGSB : VX1_Int_Ty<1282, "vavgsb", int_ppc_altivec_vavgsb, v16i8>; +def VAVGSH : VX1_Int_Ty<1346, "vavgsh", int_ppc_altivec_vavgsh, v8i16>; +def VAVGSW : VX1_Int_Ty<1410, "vavgsw", int_ppc_altivec_vavgsw, v4i32>; +def VAVGUB : VX1_Int_Ty<1026, "vavgub", int_ppc_altivec_vavgub, v16i8>; +def VAVGUH : VX1_Int_Ty<1090, "vavguh", int_ppc_altivec_vavguh, v8i16>; +def VAVGUW : VX1_Int_Ty<1154, "vavguw", int_ppc_altivec_vavguw, v4i32>; + +def VMAXFP : VX1_Int_Ty<1034, "vmaxfp", int_ppc_altivec_vmaxfp, v4f32>; +def VMAXSB : VX1_Int_Ty< 258, "vmaxsb", int_ppc_altivec_vmaxsb, v16i8>; +def VMAXSH : VX1_Int_Ty< 322, "vmaxsh", int_ppc_altivec_vmaxsh, v8i16>; +def VMAXSW : VX1_Int_Ty< 386, "vmaxsw", int_ppc_altivec_vmaxsw, v4i32>; +def VMAXUB : VX1_Int_Ty< 2, "vmaxub", int_ppc_altivec_vmaxub, v16i8>; +def VMAXUH : VX1_Int_Ty< 66, "vmaxuh", int_ppc_altivec_vmaxuh, v8i16>; +def VMAXUW : VX1_Int_Ty< 130, "vmaxuw", int_ppc_altivec_vmaxuw, v4i32>; +def VMINFP : VX1_Int_Ty<1098, "vminfp", int_ppc_altivec_vminfp, v4f32>; +def VMINSB : VX1_Int_Ty< 770, "vminsb", int_ppc_altivec_vminsb, v16i8>; +def VMINSH : VX1_Int_Ty< 834, "vminsh", int_ppc_altivec_vminsh, v8i16>; +def VMINSW : VX1_Int_Ty< 898, "vminsw", int_ppc_altivec_vminsw, v4i32>; +def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>; +def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>; +def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>; +} // isCommutable + +def VMRGHB : VXForm_1< 12, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmrghb $vD, $vA, $vB", IIC_VecFP, + [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>; +def VMRGHH : VXForm_1< 76, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmrghh $vD, $vA, $vB", IIC_VecFP, + [(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>; +def VMRGHW : VXForm_1<140, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmrghw $vD, $vA, $vB", IIC_VecFP, + [(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>; +def VMRGLB : VXForm_1<268, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmrglb $vD, $vA, $vB", IIC_VecFP, + [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>; +def VMRGLH : VXForm_1<332, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmrglh $vD, $vA, $vB", IIC_VecFP, + [(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>; +def VMRGLW : VXForm_1<396, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmrglw $vD, $vA, $vB", IIC_VecFP, + [(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>; + +def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm, + v4i32, v16i8, v4i32>; +def VMSUMSHM : VA1a_Int_Ty3<40, "vmsumshm", int_ppc_altivec_vmsumshm, + v4i32, v8i16, v4i32>; +def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs, + v4i32, v8i16, v4i32>; +def VMSUMUBM : VA1a_Int_Ty3<36, "vmsumubm", int_ppc_altivec_vmsumubm, + v4i32, v16i8, v4i32>; +def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm, + v4i32, v8i16, v4i32>; +def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs, + v4i32, v8i16, v4i32>; + +let isCommutable = 1 in { +def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb, + v8i16, v16i8>; +def VMULESH : VX1_Int_Ty2<840, "vmulesh", int_ppc_altivec_vmulesh, + v4i32, v8i16>; +def VMULEUB : VX1_Int_Ty2<520, "vmuleub", int_ppc_altivec_vmuleub, + v8i16, v16i8>; +def VMULEUH : VX1_Int_Ty2<584, "vmuleuh", int_ppc_altivec_vmuleuh, + v4i32, v8i16>; +def VMULOSB : VX1_Int_Ty2<264, "vmulosb", int_ppc_altivec_vmulosb, + v8i16, v16i8>; +def VMULOSH : VX1_Int_Ty2<328, "vmulosh", int_ppc_altivec_vmulosh, + v4i32, v8i16>; +def VMULOUB : VX1_Int_Ty2< 8, "vmuloub", int_ppc_altivec_vmuloub, + v8i16, v16i8>; +def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh, + v4i32, v8i16>; +} // isCommutable + +def VREFP : VX2_Int_SP<266, "vrefp", int_ppc_altivec_vrefp>; +def VRFIM : VX2_Int_SP<714, "vrfim", int_ppc_altivec_vrfim>; +def VRFIN : VX2_Int_SP<522, "vrfin", int_ppc_altivec_vrfin>; +def VRFIP : VX2_Int_SP<650, "vrfip", int_ppc_altivec_vrfip>; +def VRFIZ : VX2_Int_SP<586, "vrfiz", int_ppc_altivec_vrfiz>; +def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>; + +def VSUBCUW : VX1_Int_Ty<1408, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>; + +def VSUBFP : VXForm_1<74, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsubfp $vD, $vA, $vB", IIC_VecGeneral, + [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>; +def VSUBUBM : VXForm_1<1024, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsububm $vD, $vA, $vB", IIC_VecGeneral, + [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>; +def VSUBUHM : VXForm_1<1088, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsubuhm $vD, $vA, $vB", IIC_VecGeneral, + [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>; +def VSUBUWM : VXForm_1<1152, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsubuwm $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>; + +def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>; +def VSUBSHS : VX1_Int_Ty<1856, "vsubshs" , int_ppc_altivec_vsubshs, v8i16>; +def VSUBSWS : VX1_Int_Ty<1920, "vsubsws" , int_ppc_altivec_vsubsws, v4i32>; +def VSUBUBS : VX1_Int_Ty<1536, "vsububs" , int_ppc_altivec_vsububs, v16i8>; +def VSUBUHS : VX1_Int_Ty<1600, "vsubuhs" , int_ppc_altivec_vsubuhs, v8i16>; +def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>; + +def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>; +def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>; + +def VSUM4SBS: VX1_Int_Ty3<1800, "vsum4sbs", int_ppc_altivec_vsum4sbs, + v4i32, v16i8, v4i32>; +def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs, + v4i32, v8i16, v4i32>; +def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs, + v4i32, v16i8, v4i32>; + +def VNOR : VXForm_1<1284, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vnor $vD, $vA, $vB", IIC_VecFP, + [(set v4i32:$vD, (vnot_ppc (or v4i32:$vA, + v4i32:$vB)))]>; +let isCommutable = 1 in { +def VOR : VXForm_1<1156, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vor $vD, $vA, $vB", IIC_VecFP, + [(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>; +def VXOR : VXForm_1<1220, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vxor $vD, $vA, $vB", IIC_VecFP, + [(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>; +} // isCommutable + +def VRLB : VX1_Int_Ty< 4, "vrlb", int_ppc_altivec_vrlb, v16i8>; +def VRLH : VX1_Int_Ty< 68, "vrlh", int_ppc_altivec_vrlh, v8i16>; +def VRLW : VX1_Int_Ty< 132, "vrlw", int_ppc_altivec_vrlw, v4i32>; + +def VSL : VX1_Int_Ty< 452, "vsl" , int_ppc_altivec_vsl, v4i32 >; +def VSLO : VX1_Int_Ty<1036, "vslo", int_ppc_altivec_vslo, v4i32>; + +def VSLB : VX1_Int_Ty< 260, "vslb", int_ppc_altivec_vslb, v16i8>; +def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>; +def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>; + +def VSPLTB : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), + "vspltb $vD, $vB, $UIMM", IIC_VecPerm, + [(set v16i8:$vD, + (vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>; +def VSPLTH : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), + "vsplth $vD, $vB, $UIMM", IIC_VecPerm, + [(set v16i8:$vD, + (vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>; +def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), + "vspltw $vD, $vB, $UIMM", IIC_VecPerm, + [(set v16i8:$vD, + (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>; +let isCodeGenOnly = 1 in { + def VSPLTBs : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB), + "vspltb $vD, $vB, $UIMM", IIC_VecPerm, []>; + def VSPLTHs : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB), + "vsplth $vD, $vB, $UIMM", IIC_VecPerm, []>; +} + +def VSR : VX1_Int_Ty< 708, "vsr" , int_ppc_altivec_vsr, v4i32>; +def VSRO : VX1_Int_Ty<1100, "vsro" , int_ppc_altivec_vsro, v4i32>; + +def VSRAB : VX1_Int_Ty< 772, "vsrab", int_ppc_altivec_vsrab, v16i8>; +def VSRAH : VX1_Int_Ty< 836, "vsrah", int_ppc_altivec_vsrah, v8i16>; +def VSRAW : VX1_Int_Ty< 900, "vsraw", int_ppc_altivec_vsraw, v4i32>; +def VSRB : VX1_Int_Ty< 516, "vsrb" , int_ppc_altivec_vsrb , v16i8>; +def VSRH : VX1_Int_Ty< 580, "vsrh" , int_ppc_altivec_vsrh , v8i16>; +def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>; + + +def VSPLTISB : VXForm_3<780, (outs vrrc:$vD), (ins s5imm:$SIMM), + "vspltisb $vD, $SIMM", IIC_VecPerm, + [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>; +def VSPLTISH : VXForm_3<844, (outs vrrc:$vD), (ins s5imm:$SIMM), + "vspltish $vD, $SIMM", IIC_VecPerm, + [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>; +def VSPLTISW : VXForm_3<908, (outs vrrc:$vD), (ins s5imm:$SIMM), + "vspltisw $vD, $SIMM", IIC_VecPerm, + [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>; + +// Vector Pack. +def VPKPX : VX1_Int_Ty2<782, "vpkpx", int_ppc_altivec_vpkpx, + v8i16, v4i32>; +def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss, + v16i8, v8i16>; +def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus, + v16i8, v8i16>; +def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss, + v8i16, v4i32>; +def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus, + v8i16, v4i32>; +def VPKUHUM : VXForm_1<14, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vpkuhum $vD, $vA, $vB", IIC_VecFP, + [(set v16i8:$vD, + (vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>; +def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus, + v16i8, v8i16>; +def VPKUWUM : VXForm_1<78, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vpkuwum $vD, $vA, $vB", IIC_VecFP, + [(set v16i8:$vD, + (vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>; +def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus, + v8i16, v4i32>; + +// Vector Unpack. +def VUPKHPX : VX2_Int_Ty2<846, "vupkhpx", int_ppc_altivec_vupkhpx, + v4i32, v8i16>; +def VUPKHSB : VX2_Int_Ty2<526, "vupkhsb", int_ppc_altivec_vupkhsb, + v8i16, v16i8>; +def VUPKHSH : VX2_Int_Ty2<590, "vupkhsh", int_ppc_altivec_vupkhsh, + v4i32, v8i16>; +def VUPKLPX : VX2_Int_Ty2<974, "vupklpx", int_ppc_altivec_vupklpx, + v4i32, v8i16>; +def VUPKLSB : VX2_Int_Ty2<654, "vupklsb", int_ppc_altivec_vupklsb, + v8i16, v16i8>; +def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh, + v4i32, v8i16>; + + +// Altivec Comparisons. + +class VCMP<bits<10> xo, string asmstr, ValueType Ty> + : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr, + IIC_VecFPCompare, + [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>; +class VCMPo<bits<10> xo, string asmstr, ValueType Ty> + : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr, + IIC_VecFPCompare, + [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> { + let Defs = [CR6]; + let RC = 1; +} + +// f32 element comparisons.0 +def VCMPBFP : VCMP <966, "vcmpbfp $vD, $vA, $vB" , v4f32>; +def VCMPBFPo : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>; +def VCMPEQFP : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>; +def VCMPEQFPo : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>; +def VCMPGEFP : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>; +def VCMPGEFPo : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>; +def VCMPGTFP : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>; +def VCMPGTFPo : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>; + +// i8 element comparisons. +def VCMPEQUB : VCMP < 6, "vcmpequb $vD, $vA, $vB" , v16i8>; +def VCMPEQUBo : VCMPo< 6, "vcmpequb. $vD, $vA, $vB", v16i8>; +def VCMPGTSB : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>; +def VCMPGTSBo : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>; +def VCMPGTUB : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>; +def VCMPGTUBo : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>; + +// i16 element comparisons. +def VCMPEQUH : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>; +def VCMPEQUHo : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>; +def VCMPGTSH : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>; +def VCMPGTSHo : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>; +def VCMPGTUH : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>; +def VCMPGTUHo : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>; + +// i32 element comparisons. +def VCMPEQUW : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>; +def VCMPEQUWo : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>; +def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>; +def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>; +def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>; +def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; + +let isCodeGenOnly = 1 in { +def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins), + "vxor $vD, $vD, $vD", IIC_VecFP, + [(set v16i8:$vD, (v16i8 immAllZerosV))]>; +def V_SET0H : VXForm_setzero<1220, (outs vrrc:$vD), (ins), + "vxor $vD, $vD, $vD", IIC_VecFP, + [(set v8i16:$vD, (v8i16 immAllZerosV))]>; +def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins), + "vxor $vD, $vD, $vD", IIC_VecFP, + [(set v4i32:$vD, (v4i32 immAllZerosV))]>; + +let IMM=-1 in { +def V_SETALLONESB : VXForm_3<908, (outs vrrc:$vD), (ins), + "vspltisw $vD, -1", IIC_VecFP, + [(set v16i8:$vD, (v16i8 immAllOnesV))]>; +def V_SETALLONESH : VXForm_3<908, (outs vrrc:$vD), (ins), + "vspltisw $vD, -1", IIC_VecFP, + [(set v8i16:$vD, (v8i16 immAllOnesV))]>; +def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins), + "vspltisw $vD, -1", IIC_VecFP, + [(set v4i32:$vD, (v4i32 immAllOnesV))]>; +} +} +} // VALU Operations. + +//===----------------------------------------------------------------------===// +// Additional Altivec Patterns +// + +// Extended mnemonics +def : InstAlias<"vmr $vD, $vA", (VOR vrrc:$vD, vrrc:$vA, vrrc:$vA)>; +def : InstAlias<"vnot $vD, $vA", (VNOR vrrc:$vD, vrrc:$vA, vrrc:$vA)>; + +// Loads. +def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>; + +// Stores. +def : Pat<(store v4i32:$rS, xoaddr:$dst), + (STVX $rS, xoaddr:$dst)>; + +// Bit conversions. +def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>; +def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>; +def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>; +def : Pat<(v16i8 (bitconvert (v2i64 VRRC:$src))), (v16i8 VRRC:$src)>; +def : Pat<(v16i8 (bitconvert (v1i128 VRRC:$src))), (v16i8 VRRC:$src)>; + +def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>; +def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>; +def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>; +def : Pat<(v8i16 (bitconvert (v2i64 VRRC:$src))), (v8i16 VRRC:$src)>; +def : Pat<(v8i16 (bitconvert (v1i128 VRRC:$src))), (v8i16 VRRC:$src)>; + +def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>; +def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>; +def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>; +def : Pat<(v4i32 (bitconvert (v2i64 VRRC:$src))), (v4i32 VRRC:$src)>; +def : Pat<(v4i32 (bitconvert (v1i128 VRRC:$src))), (v4i32 VRRC:$src)>; + +def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>; +def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>; +def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>; +def : Pat<(v4f32 (bitconvert (v2i64 VRRC:$src))), (v4f32 VRRC:$src)>; +def : Pat<(v4f32 (bitconvert (v1i128 VRRC:$src))), (v4f32 VRRC:$src)>; + +def : Pat<(v2i64 (bitconvert (v16i8 VRRC:$src))), (v2i64 VRRC:$src)>; +def : Pat<(v2i64 (bitconvert (v8i16 VRRC:$src))), (v2i64 VRRC:$src)>; +def : Pat<(v2i64 (bitconvert (v4i32 VRRC:$src))), (v2i64 VRRC:$src)>; +def : Pat<(v2i64 (bitconvert (v4f32 VRRC:$src))), (v2i64 VRRC:$src)>; +def : Pat<(v2i64 (bitconvert (v1i128 VRRC:$src))), (v2i64 VRRC:$src)>; + +def : Pat<(v1i128 (bitconvert (v16i8 VRRC:$src))), (v1i128 VRRC:$src)>; +def : Pat<(v1i128 (bitconvert (v8i16 VRRC:$src))), (v1i128 VRRC:$src)>; +def : Pat<(v1i128 (bitconvert (v4i32 VRRC:$src))), (v1i128 VRRC:$src)>; +def : Pat<(v1i128 (bitconvert (v4f32 VRRC:$src))), (v1i128 VRRC:$src)>; +def : Pat<(v1i128 (bitconvert (v2i64 VRRC:$src))), (v1i128 VRRC:$src)>; + +// Shuffles. + +// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x) +def:Pat<(vsldoi_unary_shuffle:$in v16i8:$vA, undef), + (VSLDOI $vA, $vA, (VSLDOI_unary_get_imm $in))>; +def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef), + (VPKUWUM $vA, $vA)>; +def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef), + (VPKUHUM $vA, $vA)>; +def:Pat<(vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB), + (VSLDOI v16i8:$vA, v16i8:$vB, (VSLDOI_get_imm $SH))>; + + +// Match vsldoi(y,x), vpkuwum(y,x), vpkuhum(y,x), i.e., swapped operands. +// These fragments are matched for little-endian, where the inputs must +// be swapped for correct semantics. +def:Pat<(vsldoi_swapped_shuffle:$in v16i8:$vA, v16i8:$vB), + (VSLDOI $vB, $vA, (VSLDOI_swapped_get_imm $in))>; +def:Pat<(vpkuwum_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VPKUWUM $vB, $vA)>; +def:Pat<(vpkuhum_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VPKUHUM $vB, $vA)>; + +// Match vmrg*(x,x) +def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef), + (VMRGLB $vA, $vA)>; +def:Pat<(vmrglh_unary_shuffle v16i8:$vA, undef), + (VMRGLH $vA, $vA)>; +def:Pat<(vmrglw_unary_shuffle v16i8:$vA, undef), + (VMRGLW $vA, $vA)>; +def:Pat<(vmrghb_unary_shuffle v16i8:$vA, undef), + (VMRGHB $vA, $vA)>; +def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef), + (VMRGHH $vA, $vA)>; +def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef), + (VMRGHW $vA, $vA)>; + +// Match vmrg*(y,x), i.e., swapped operands. These fragments +// are matched for little-endian, where the inputs must be +// swapped for correct semantics. +def:Pat<(vmrglb_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGLB $vB, $vA)>; +def:Pat<(vmrglh_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGLH $vB, $vA)>; +def:Pat<(vmrglw_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGLW $vB, $vA)>; +def:Pat<(vmrghb_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGHB $vB, $vA)>; +def:Pat<(vmrghh_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGHH $vB, $vA)>; +def:Pat<(vmrghw_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGHW $vB, $vA)>; + +// Logical Operations +def : Pat<(vnot_ppc v4i32:$vA), (VNOR $vA, $vA)>; + +def : Pat<(vnot_ppc (or v4i32:$A, v4i32:$B)), + (VNOR $A, $B)>; +def : Pat<(and v4i32:$A, (vnot_ppc v4i32:$B)), + (VANDC $A, $B)>; + +def : Pat<(fmul v4f32:$vA, v4f32:$vB), + (VMADDFP $vA, $vB, + (v4i32 (VSLW (v4i32 (V_SETALLONES)), (v4i32 (V_SETALLONES)))))>; + +// Fused multiply add and multiply sub for packed float. These are represented +// separately from the real instructions above, for operations that must have +// the additional precision, such as Newton-Rhapson (used by divide, sqrt) +def : Pat<(PPCvmaddfp v4f32:$A, v4f32:$B, v4f32:$C), + (VMADDFP $A, $B, $C)>; +def : Pat<(PPCvnmsubfp v4f32:$A, v4f32:$B, v4f32:$C), + (VNMSUBFP $A, $B, $C)>; + +def : Pat<(int_ppc_altivec_vmaddfp v4f32:$A, v4f32:$B, v4f32:$C), + (VMADDFP $A, $B, $C)>; +def : Pat<(int_ppc_altivec_vnmsubfp v4f32:$A, v4f32:$B, v4f32:$C), + (VNMSUBFP $A, $B, $C)>; + +def : Pat<(PPCvperm v16i8:$vA, v16i8:$vB, v16i8:$vC), + (VPERM $vA, $vB, $vC)>; + +def : Pat<(PPCfre v4f32:$A), (VREFP $A)>; +def : Pat<(PPCfrsqrte v4f32:$A), (VRSQRTEFP $A)>; + +// Vector shifts +def : Pat<(v16i8 (shl v16i8:$vA, v16i8:$vB)), + (v16i8 (VSLB $vA, $vB))>; +def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)), + (v8i16 (VSLH $vA, $vB))>; +def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)), + (v4i32 (VSLW $vA, $vB))>; +def : Pat<(v1i128 (shl v1i128:$vA, v1i128:$vB)), + (v1i128 (VSL (v16i8 (VSLO $vA, $vB)), (v16i8 (VSPLTB 15, $vB))))>; +def : Pat<(v16i8 (PPCshl v16i8:$vA, v16i8:$vB)), + (v16i8 (VSLB $vA, $vB))>; +def : Pat<(v8i16 (PPCshl v8i16:$vA, v8i16:$vB)), + (v8i16 (VSLH $vA, $vB))>; +def : Pat<(v4i32 (PPCshl v4i32:$vA, v4i32:$vB)), + (v4i32 (VSLW $vA, $vB))>; +def : Pat<(v1i128 (PPCshl v1i128:$vA, v1i128:$vB)), + (v1i128 (VSL (v16i8 (VSLO $vA, $vB)), (v16i8 (VSPLTB 15, $vB))))>; + +def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)), + (v16i8 (VSRB $vA, $vB))>; +def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)), + (v8i16 (VSRH $vA, $vB))>; +def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)), + (v4i32 (VSRW $vA, $vB))>; +def : Pat<(v1i128 (srl v1i128:$vA, v1i128:$vB)), + (v1i128 (VSR (v16i8 (VSRO $vA, $vB)), (v16i8 (VSPLTB 15, $vB))))>; +def : Pat<(v16i8 (PPCsrl v16i8:$vA, v16i8:$vB)), + (v16i8 (VSRB $vA, $vB))>; +def : Pat<(v8i16 (PPCsrl v8i16:$vA, v8i16:$vB)), + (v8i16 (VSRH $vA, $vB))>; +def : Pat<(v4i32 (PPCsrl v4i32:$vA, v4i32:$vB)), + (v4i32 (VSRW $vA, $vB))>; +def : Pat<(v1i128 (PPCsrl v1i128:$vA, v1i128:$vB)), + (v1i128 (VSR (v16i8 (VSRO $vA, $vB)), (v16i8 (VSPLTB 15, $vB))))>; + +def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)), + (v16i8 (VSRAB $vA, $vB))>; +def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)), + (v8i16 (VSRAH $vA, $vB))>; +def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)), + (v4i32 (VSRAW $vA, $vB))>; +def : Pat<(v16i8 (PPCsra v16i8:$vA, v16i8:$vB)), + (v16i8 (VSRAB $vA, $vB))>; +def : Pat<(v8i16 (PPCsra v8i16:$vA, v8i16:$vB)), + (v8i16 (VSRAH $vA, $vB))>; +def : Pat<(v4i32 (PPCsra v4i32:$vA, v4i32:$vB)), + (v4i32 (VSRAW $vA, $vB))>; + +// Float to integer and integer to float conversions +def : Pat<(v4i32 (fp_to_sint v4f32:$vA)), + (VCTSXS_0 $vA)>; +def : Pat<(v4i32 (fp_to_uint v4f32:$vA)), + (VCTUXS_0 $vA)>; +def : Pat<(v4f32 (sint_to_fp v4i32:$vA)), + (VCFSX_0 $vA)>; +def : Pat<(v4f32 (uint_to_fp v4i32:$vA)), + (VCFUX_0 $vA)>; + +// Floating-point rounding +def : Pat<(v4f32 (ffloor v4f32:$vA)), + (VRFIM $vA)>; +def : Pat<(v4f32 (fceil v4f32:$vA)), + (VRFIP $vA)>; +def : Pat<(v4f32 (ftrunc v4f32:$vA)), + (VRFIZ $vA)>; +def : Pat<(v4f32 (fnearbyint v4f32:$vA)), + (VRFIN $vA)>; + +} // end HasAltivec + +def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">; +def HasP8Crypto : Predicate<"PPCSubTarget->hasP8Crypto()">; +let Predicates = [HasP8Altivec] in { + +let isCommutable = 1 in { +def VMULESW : VX1_Int_Ty2<904, "vmulesw", int_ppc_altivec_vmulesw, + v2i64, v4i32>; +def VMULEUW : VX1_Int_Ty2<648, "vmuleuw", int_ppc_altivec_vmuleuw, + v2i64, v4i32>; +def VMULOSW : VX1_Int_Ty2<392, "vmulosw", int_ppc_altivec_vmulosw, + v2i64, v4i32>; +def VMULOUW : VX1_Int_Ty2<136, "vmulouw", int_ppc_altivec_vmulouw, + v2i64, v4i32>; +def VMULUWM : VXForm_1<137, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmuluwm $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (mul v4i32:$vA, v4i32:$vB))]>; +def VMAXSD : VX1_Int_Ty<450, "vmaxsd", int_ppc_altivec_vmaxsd, v2i64>; +def VMAXUD : VX1_Int_Ty<194, "vmaxud", int_ppc_altivec_vmaxud, v2i64>; +def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>; +def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>; +} // isCommutable + +// Vector merge +def VMRGEW : VXForm_1<1932, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmrgew $vD, $vA, $vB", IIC_VecFP, + [(set v16i8:$vD, + (v16i8 (vmrgew_shuffle v16i8:$vA, v16i8:$vB)))]>; +def VMRGOW : VXForm_1<1676, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmrgow $vD, $vA, $vB", IIC_VecFP, + [(set v16i8:$vD, + (v16i8 (vmrgow_shuffle v16i8:$vA, v16i8:$vB)))]>; + +// Match vmrgew(x,x) and vmrgow(x,x) +def:Pat<(vmrgew_unary_shuffle v16i8:$vA, undef), + (VMRGEW $vA, $vA)>; +def:Pat<(vmrgow_unary_shuffle v16i8:$vA, undef), + (VMRGOW $vA, $vA)>; + +// Match vmrgew(y,x) and vmrgow(y,x), i.e., swapped operands. These fragments +// are matched for little-endian, where the inputs must be swapped for correct +// semantics.w +def:Pat<(vmrgew_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGEW $vB, $vA)>; +def:Pat<(vmrgow_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VMRGOW $vB, $vA)>; + + +// Vector shifts +def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>; +def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsld $vD, $vA, $vB", IIC_VecGeneral, []>; +def VSRD : VXForm_1<1732, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsrd $vD, $vA, $vB", IIC_VecGeneral, []>; +def VSRAD : VXForm_1<964, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsrad $vD, $vA, $vB", IIC_VecGeneral, []>; + +def : Pat<(v2i64 (shl v2i64:$vA, v2i64:$vB)), + (v2i64 (VSLD $vA, $vB))>; +def : Pat<(v2i64 (PPCshl v2i64:$vA, v2i64:$vB)), + (v2i64 (VSLD $vA, $vB))>; +def : Pat<(v2i64 (srl v2i64:$vA, v2i64:$vB)), + (v2i64 (VSRD $vA, $vB))>; +def : Pat<(v2i64 (PPCsrl v2i64:$vA, v2i64:$vB)), + (v2i64 (VSRD $vA, $vB))>; +def : Pat<(v2i64 (sra v2i64:$vA, v2i64:$vB)), + (v2i64 (VSRAD $vA, $vB))>; +def : Pat<(v2i64 (PPCsra v2i64:$vA, v2i64:$vB)), + (v2i64 (VSRAD $vA, $vB))>; + +// Vector Integer Arithmetic Instructions +let isCommutable = 1 in { +def VADDUDM : VXForm_1<192, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vaddudm $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (add v2i64:$vA, v2i64:$vB))]>; +def VADDUQM : VXForm_1<256, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vadduqm $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (add v1i128:$vA, v1i128:$vB))]>; +} // isCommutable + +// Vector Quadword Add +def VADDEUQM : VA1a_Int_Ty<60, "vaddeuqm", int_ppc_altivec_vaddeuqm, v1i128>; +def VADDCUQ : VX1_Int_Ty<320, "vaddcuq", int_ppc_altivec_vaddcuq, v1i128>; +def VADDECUQ : VA1a_Int_Ty<61, "vaddecuq", int_ppc_altivec_vaddecuq, v1i128>; + +// Vector Doubleword Subtract +def VSUBUDM : VXForm_1<1216, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsubudm $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (sub v2i64:$vA, v2i64:$vB))]>; + +// Vector Quadword Subtract +def VSUBUQM : VXForm_1<1280, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vsubuqm $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (sub v1i128:$vA, v1i128:$vB))]>; +def VSUBEUQM : VA1a_Int_Ty<62, "vsubeuqm", int_ppc_altivec_vsubeuqm, v1i128>; +def VSUBCUQ : VX1_Int_Ty<1344, "vsubcuq", int_ppc_altivec_vsubcuq, v1i128>; +def VSUBECUQ : VA1a_Int_Ty<63, "vsubecuq", int_ppc_altivec_vsubecuq, v1i128>; + +// Count Leading Zeros +def VCLZB : VXForm_2<1794, (outs vrrc:$vD), (ins vrrc:$vB), + "vclzb $vD, $vB", IIC_VecGeneral, + [(set v16i8:$vD, (ctlz v16i8:$vB))]>; +def VCLZH : VXForm_2<1858, (outs vrrc:$vD), (ins vrrc:$vB), + "vclzh $vD, $vB", IIC_VecGeneral, + [(set v8i16:$vD, (ctlz v8i16:$vB))]>; +def VCLZW : VXForm_2<1922, (outs vrrc:$vD), (ins vrrc:$vB), + "vclzw $vD, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (ctlz v4i32:$vB))]>; +def VCLZD : VXForm_2<1986, (outs vrrc:$vD), (ins vrrc:$vB), + "vclzd $vD, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (ctlz v2i64:$vB))]>; + +// Population Count +def VPOPCNTB : VXForm_2<1795, (outs vrrc:$vD), (ins vrrc:$vB), + "vpopcntb $vD, $vB", IIC_VecGeneral, + [(set v16i8:$vD, (ctpop v16i8:$vB))]>; +def VPOPCNTH : VXForm_2<1859, (outs vrrc:$vD), (ins vrrc:$vB), + "vpopcnth $vD, $vB", IIC_VecGeneral, + [(set v8i16:$vD, (ctpop v8i16:$vB))]>; +def VPOPCNTW : VXForm_2<1923, (outs vrrc:$vD), (ins vrrc:$vB), + "vpopcntw $vD, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (ctpop v4i32:$vB))]>; +def VPOPCNTD : VXForm_2<1987, (outs vrrc:$vD), (ins vrrc:$vB), + "vpopcntd $vD, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (ctpop v2i64:$vB))]>; + +let isCommutable = 1 in { +// FIXME: Use AddedComplexity > 400 to ensure these patterns match before the +// VSX equivalents. We need to fix this up at some point. Two possible +// solutions for this problem: +// 1. Disable Altivec patterns that compete with VSX patterns using the +// !HasVSX predicate. This essentially favours VSX over Altivec, in +// hopes of reducing register pressure (larger register set using VSX +// instructions than VMX instructions) +// 2. Employ a more disciplined use of AddedComplexity, which would provide +// more fine-grained control than option 1. This would be beneficial +// if we find situations where Altivec is really preferred over VSX. +def VEQV : VXForm_1<1668, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "veqv $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (vnot_ppc (xor v4i32:$vA, v4i32:$vB)))]>; +def VNAND : VXForm_1<1412, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vnand $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (vnot_ppc (and v4i32:$vA, v4i32:$vB)))]>; +} // isCommutable + +def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vorc $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (or v4i32:$vA, + (vnot_ppc v4i32:$vB)))]>; + +// i64 element comparisons. +def VCMPEQUD : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>; +def VCMPEQUDo : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>; +def VCMPGTSD : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>; +def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>; +def VCMPGTUD : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>; +def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>; + +// The cryptography instructions that do not require Category:Vector.Crypto +def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb", + int_ppc_altivec_crypto_vpmsumb, v16i8>; +def VPMSUMH : VX1_Int_Ty<1096, "vpmsumh", + int_ppc_altivec_crypto_vpmsumh, v8i16>; +def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw", + int_ppc_altivec_crypto_vpmsumw, v4i32>; +def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd", + int_ppc_altivec_crypto_vpmsumd, v2i64>; +def VPERMXOR : VA1a_Int_Ty<45, "vpermxor", + int_ppc_altivec_crypto_vpermxor, v16i8>; + +// Vector doubleword integer pack and unpack. +def VPKSDSS : VX1_Int_Ty2<1486, "vpksdss", int_ppc_altivec_vpksdss, + v4i32, v2i64>; +def VPKSDUS : VX1_Int_Ty2<1358, "vpksdus", int_ppc_altivec_vpksdus, + v4i32, v2i64>; +def VPKUDUM : VXForm_1<1102, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vpkudum $vD, $vA, $vB", IIC_VecFP, + [(set v16i8:$vD, + (vpkudum_shuffle v16i8:$vA, v16i8:$vB))]>; +def VPKUDUS : VX1_Int_Ty2<1230, "vpkudus", int_ppc_altivec_vpkudus, + v4i32, v2i64>; +def VUPKHSW : VX2_Int_Ty2<1614, "vupkhsw", int_ppc_altivec_vupkhsw, + v2i64, v4i32>; +def VUPKLSW : VX2_Int_Ty2<1742, "vupklsw", int_ppc_altivec_vupklsw, + v2i64, v4i32>; + +// Shuffle patterns for unary and swapped (LE) vector pack modulo. +def:Pat<(vpkudum_unary_shuffle v16i8:$vA, undef), + (VPKUDUM $vA, $vA)>; +def:Pat<(vpkudum_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VPKUDUM $vB, $vA)>; + +def VGBBD : VX2_Int_Ty2<1292, "vgbbd", int_ppc_altivec_vgbbd, v16i8, v16i8>; +def VBPERMQ : VX1_Int_Ty2<1356, "vbpermq", int_ppc_altivec_vbpermq, + v2i64, v16i8>; +} // end HasP8Altivec + +// Crypto instructions (from builtins) +let Predicates = [HasP8Crypto] in { +def VSHASIGMAW : VXCR_Int_Ty<1666, "vshasigmaw", + int_ppc_altivec_crypto_vshasigmaw, v4i32>; +def VSHASIGMAD : VXCR_Int_Ty<1730, "vshasigmad", + int_ppc_altivec_crypto_vshasigmad, v2i64>; +def VCIPHER : VX1_Int_Ty<1288, "vcipher", int_ppc_altivec_crypto_vcipher, + v2i64>; +def VCIPHERLAST : VX1_Int_Ty<1289, "vcipherlast", + int_ppc_altivec_crypto_vcipherlast, v2i64>; +def VNCIPHER : VX1_Int_Ty<1352, "vncipher", + int_ppc_altivec_crypto_vncipher, v2i64>; +def VNCIPHERLAST : VX1_Int_Ty<1353, "vncipherlast", + int_ppc_altivec_crypto_vncipherlast, v2i64>; +def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>; +} // HasP8Crypto + +// The following altivec instructions were introduced in Power ISA 3.0 +def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">; +let Predicates = [HasP9Altivec] in { + +// i8 element comparisons. +def VCMPNEB : VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>; +def VCMPNEBo : VCMPo < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>; +def VCMPNEZB : VCMP <263, "vcmpnezb $vD, $vA, $vB" , v16i8>; +def VCMPNEZBo : VCMPo<263, "vcmpnezb. $vD, $vA, $vB", v16i8>; + +// i16 element comparisons. +def VCMPNEH : VCMP < 71, "vcmpneh $vD, $vA, $vB" , v8i16>; +def VCMPNEHo : VCMPo< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>; +def VCMPNEZH : VCMP <327, "vcmpnezh $vD, $vA, $vB" , v8i16>; +def VCMPNEZHo : VCMPo<327, "vcmpnezh. $vD, $vA, $vB", v8i16>; + +// i32 element comparisons. +def VCMPNEW : VCMP <135, "vcmpnew $vD, $vA, $vB" , v4i32>; +def VCMPNEWo : VCMPo<135, "vcmpnew. $vD, $vA, $vB" , v4i32>; +def VCMPNEZW : VCMP <391, "vcmpnezw $vD, $vA, $vB" , v4i32>; +def VCMPNEZWo : VCMPo<391, "vcmpnezw. $vD, $vA, $vB", v4i32>; + +// VX-Form: [PO VRT / UIM VRB XO]. +// We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent +// "/ UIM" (1 + 4 bit) +class VX1_VT5_UIM5_VB5<bits<11> xo, string opc, list<dag> pattern> + : VXForm_1<xo, (outs vrrc:$vD), (ins u4imm:$UIMM, vrrc:$vB), + !strconcat(opc, " $vD, $vB, $UIMM"), IIC_VecGeneral, pattern>; + +class VX1_RT5_RA5_VB5<bits<11> xo, string opc, list<dag> pattern> + : VXForm_1<xo, (outs g8rc:$rD), (ins g8rc:$rA, vrrc:$vB), + !strconcat(opc, " $rD, $rA, $vB"), IIC_VecGeneral, pattern>; + +// Vector Extract Unsigned +def VEXTRACTUB : VX1_VT5_UIM5_VB5<525, "vextractub", []>; +def VEXTRACTUH : VX1_VT5_UIM5_VB5<589, "vextractuh", []>; +def VEXTRACTUW : VX1_VT5_UIM5_VB5<653, "vextractuw", []>; +def VEXTRACTD : VX1_VT5_UIM5_VB5<717, "vextractd" , []>; + +// Vector Extract Unsigned Byte/Halfword/Word Left/Right-Indexed +def VEXTUBLX : VX1_RT5_RA5_VB5<1549, "vextublx", []>; +def VEXTUBRX : VX1_RT5_RA5_VB5<1805, "vextubrx", []>; +def VEXTUHLX : VX1_RT5_RA5_VB5<1613, "vextuhlx", []>; +def VEXTUHRX : VX1_RT5_RA5_VB5<1869, "vextuhrx", []>; +def VEXTUWLX : VX1_RT5_RA5_VB5<1677, "vextuwlx", []>; +def VEXTUWRX : VX1_RT5_RA5_VB5<1933, "vextuwrx", []>; + +// Vector Insert Element Instructions +def VINSERTB : VXForm_1<781, (outs vrrc:$vD), + (ins vrrc:$vDi, u4imm:$UIM, vrrc:$vB), + "vinsertb $vD, $vB, $UIM", IIC_VecGeneral, + [(set v16i8:$vD, (PPCvecinsert v16i8:$vDi, v16i8:$vB, + imm32SExt16:$UIM))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; +def VINSERTH : VXForm_1<845, (outs vrrc:$vD), + (ins vrrc:$vDi, u4imm:$UIM, vrrc:$vB), + "vinserth $vD, $vB, $UIM", IIC_VecGeneral, + [(set v8i16:$vD, (PPCvecinsert v8i16:$vDi, v8i16:$vB, + imm32SExt16:$UIM))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; +def VINSERTW : VX1_VT5_UIM5_VB5<909, "vinsertw", []>; +def VINSERTD : VX1_VT5_UIM5_VB5<973, "vinsertd", []>; + +class VX_VT5_EO5_VB5<bits<11> xo, bits<5> eo, string opc, list<dag> pattern> + : VXForm_RD5_XO5_RS5<xo, eo, (outs vrrc:$vD), (ins vrrc:$vB), + !strconcat(opc, " $vD, $vB"), IIC_VecGeneral, pattern>; +class VX_VT5_EO5_VB5s<bits<11> xo, bits<5> eo, string opc, list<dag> pattern> + : VXForm_RD5_XO5_RS5<xo, eo, (outs vfrc:$vD), (ins vfrc:$vB), + !strconcat(opc, " $vD, $vB"), IIC_VecGeneral, pattern>; + +// Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD] +def VCLZLSBB : VXForm_RD5_XO5_RS5<1538, 0, (outs gprc:$rD), (ins vrrc:$vB), + "vclzlsbb $rD, $vB", IIC_VecGeneral, + [(set i32:$rD, (int_ppc_altivec_vclzlsbb + v16i8:$vB))]>; +def VCTZLSBB : VXForm_RD5_XO5_RS5<1538, 1, (outs gprc:$rD), (ins vrrc:$vB), + "vctzlsbb $rD, $vB", IIC_VecGeneral, + [(set i32:$rD, (int_ppc_altivec_vctzlsbb + v16i8:$vB))]>; +// Vector Count Trailing Zeros +def VCTZB : VX_VT5_EO5_VB5<1538, 28, "vctzb", + [(set v16i8:$vD, (cttz v16i8:$vB))]>; +def VCTZH : VX_VT5_EO5_VB5<1538, 29, "vctzh", + [(set v8i16:$vD, (cttz v8i16:$vB))]>; +def VCTZW : VX_VT5_EO5_VB5<1538, 30, "vctzw", + [(set v4i32:$vD, (cttz v4i32:$vB))]>; +def VCTZD : VX_VT5_EO5_VB5<1538, 31, "vctzd", + [(set v2i64:$vD, (cttz v2i64:$vB))]>; + +// Vector Extend Sign +def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w", []>; +def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w", []>; +def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d", []>; +def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d", []>; +def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d", []>; +let isCodeGenOnly = 1 in { + def VEXTSB2Ws : VX_VT5_EO5_VB5s<1538, 16, "vextsb2w", []>; + def VEXTSH2Ws : VX_VT5_EO5_VB5s<1538, 17, "vextsh2w", []>; + def VEXTSB2Ds : VX_VT5_EO5_VB5s<1538, 24, "vextsb2d", []>; + def VEXTSH2Ds : VX_VT5_EO5_VB5s<1538, 25, "vextsh2d", []>; + def VEXTSW2Ds : VX_VT5_EO5_VB5s<1538, 26, "vextsw2d", []>; +} + +// Vector Integer Negate +def VNEGW : VX_VT5_EO5_VB5<1538, 6, "vnegw", + [(set v4i32:$vD, + (sub (v4i32 immAllZerosV), v4i32:$vB))]>; + +def VNEGD : VX_VT5_EO5_VB5<1538, 7, "vnegd", + [(set v2i64:$vD, + (sub (v2i64 (bitconvert (v4i32 immAllZerosV))), + v2i64:$vB))]>; + +// Vector Parity Byte +def VPRTYBW : VX_VT5_EO5_VB5<1538, 8, "vprtybw", [(set v4i32:$vD, + (int_ppc_altivec_vprtybw v4i32:$vB))]>; +def VPRTYBD : VX_VT5_EO5_VB5<1538, 9, "vprtybd", [(set v2i64:$vD, + (int_ppc_altivec_vprtybd v2i64:$vB))]>; +def VPRTYBQ : VX_VT5_EO5_VB5<1538, 10, "vprtybq", [(set v1i128:$vD, + (int_ppc_altivec_vprtybq v1i128:$vB))]>; + +// Vector (Bit) Permute (Right-indexed) +def VBPERMD : VXForm_1<1484, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vbpermd $vD, $vA, $vB", IIC_VecFP, []>; +def VPERMR : VAForm_1a<59, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), + "vpermr $vD, $vA, $vB, $vC", IIC_VecFP, []>; + +class VX1_VT5_VA5_VB5<bits<11> xo, string opc, list<dag> pattern> + : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern>; + +// Vector Rotate Left Mask/Mask-Insert +def VRLWNM : VX1_VT5_VA5_VB5<389, "vrlwnm", + [(set v4i32:$vD, + (int_ppc_altivec_vrlwnm v4i32:$vA, + v4i32:$vB))]>; +def VRLWMI : VXForm_1<133, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi), + "vrlwmi $vD, $vA, $vB", IIC_VecFP, + [(set v4i32:$vD, + (int_ppc_altivec_vrlwmi v4i32:$vA, v4i32:$vB, + v4i32:$vDi))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; +def VRLDNM : VX1_VT5_VA5_VB5<453, "vrldnm", + [(set v2i64:$vD, + (int_ppc_altivec_vrldnm v2i64:$vA, + v2i64:$vB))]>; +def VRLDMI : VXForm_1<197, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi), + "vrldmi $vD, $vA, $vB", IIC_VecFP, + [(set v2i64:$vD, + (int_ppc_altivec_vrldmi v2i64:$vA, v2i64:$vB, + v2i64:$vDi))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; + +// Vector Shift Left/Right +def VSLV : VX1_VT5_VA5_VB5<1860, "vslv", + [(set v16i8 : $vD, (int_ppc_altivec_vslv v16i8 : $vA, v16i8 : $vB))]>; +def VSRV : VX1_VT5_VA5_VB5<1796, "vsrv", + [(set v16i8 : $vD, (int_ppc_altivec_vsrv v16i8 : $vA, v16i8 : $vB))]>; + +// Vector Multiply-by-10 (& Write Carry) Unsigned Quadword +def VMUL10UQ : VXForm_BX<513, (outs vrrc:$vD), (ins vrrc:$vA), + "vmul10uq $vD, $vA", IIC_VecFP, []>; +def VMUL10CUQ : VXForm_BX< 1, (outs vrrc:$vD), (ins vrrc:$vA), + "vmul10cuq $vD, $vA", IIC_VecFP, []>; + +// Vector Multiply-by-10 Extended (& Write Carry) Unsigned Quadword +def VMUL10EUQ : VX1_VT5_VA5_VB5<577, "vmul10euq" , []>; +def VMUL10ECUQ : VX1_VT5_VA5_VB5< 65, "vmul10ecuq", []>; + +// Decimal Integer Format Conversion Instructions + +// [PO VRT EO VRB 1 PS XO], "_o" means CR6 is set. +class VX_VT5_EO5_VB5_PS1_XO9_o<bits<5> eo, bits<9> xo, string opc, + list<dag> pattern> + : VX_RD5_EO5_RS5_PS1_XO9<eo, xo, (outs vrrc:$vD), (ins vrrc:$vB, u1imm:$PS), + !strconcat(opc, " $vD, $vB, $PS"), IIC_VecFP, pattern> { + let Defs = [CR6]; +} + +// [PO VRT EO VRB 1 / XO] +class VX_VT5_EO5_VB5_XO9_o<bits<5> eo, bits<9> xo, string opc, + list<dag> pattern> + : VX_RD5_EO5_RS5_PS1_XO9<eo, xo, (outs vrrc:$vD), (ins vrrc:$vB), + !strconcat(opc, " $vD, $vB"), IIC_VecFP, pattern> { + let Defs = [CR6]; + let PS = 0; +} + +// Decimal Convert From/to National/Zoned/Signed-QWord +def BCDCFNo : VX_VT5_EO5_VB5_PS1_XO9_o<7, 385, "bcdcfn." , []>; +def BCDCFZo : VX_VT5_EO5_VB5_PS1_XO9_o<6, 385, "bcdcfz." , []>; +def BCDCTNo : VX_VT5_EO5_VB5_XO9_o <5, 385, "bcdctn." , []>; +def BCDCTZo : VX_VT5_EO5_VB5_PS1_XO9_o<4, 385, "bcdctz." , []>; +def BCDCFSQo : VX_VT5_EO5_VB5_PS1_XO9_o<2, 385, "bcdcfsq.", []>; +def BCDCTSQo : VX_VT5_EO5_VB5_XO9_o <0, 385, "bcdctsq.", []>; + +// Decimal Copy-Sign/Set-Sign +let Defs = [CR6] in +def BCDCPSGNo : VX1_VT5_VA5_VB5<833, "bcdcpsgn.", []>; + +def BCDSETSGNo : VX_VT5_EO5_VB5_PS1_XO9_o<31, 385, "bcdsetsgn.", []>; + +// [PO VRT VRA VRB 1 PS XO], "_o" means CR6 is set. +class VX_VT5_VA5_VB5_PS1_XO9_o<bits<9> xo, string opc, list<dag> pattern> + : VX_RD5_RSp5_PS1_XO9<xo, + (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u1imm:$PS), + !strconcat(opc, " $vD, $vA, $vB, $PS"), IIC_VecFP, pattern> { + let Defs = [CR6]; +} + +// [PO VRT VRA VRB 1 / XO] +class VX_VT5_VA5_VB5_XO9_o<bits<9> xo, string opc, list<dag> pattern> + : VX_RD5_RSp5_PS1_XO9<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern> { + let Defs = [CR6]; + let PS = 0; +} + +// Decimal Shift/Unsigned-Shift/Shift-and-Round +def BCDSo : VX_VT5_VA5_VB5_PS1_XO9_o<193, "bcds." , []>; +def BCDUSo : VX_VT5_VA5_VB5_XO9_o <129, "bcdus.", []>; +def BCDSRo : VX_VT5_VA5_VB5_PS1_XO9_o<449, "bcdsr.", []>; + +// Decimal (Unsigned) Truncate +def BCDTRUNCo : VX_VT5_VA5_VB5_PS1_XO9_o<257, "bcdtrunc." , []>; +def BCDUTRUNCo : VX_VT5_VA5_VB5_XO9_o <321, "bcdutrunc.", []>; + +// Absolute Difference +def VABSDUB : VXForm_1<1027, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vabsdub $vD, $vA, $vB", IIC_VecGeneral, + [(set v16i8:$vD, (int_ppc_altivec_vabsdub v16i8:$vA, v16i8:$vB))]>; +def VABSDUH : VXForm_1<1091, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vabsduh $vD, $vA, $vB", IIC_VecGeneral, + [(set v8i16:$vD, (int_ppc_altivec_vabsduh v8i16:$vA, v8i16:$vB))]>; +def VABSDUW : VXForm_1<1155, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vabsduw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (int_ppc_altivec_vabsduw v4i32:$vA, v4i32:$vB))]>; + +} // end HasP9Altivec diff --git a/capstone/suite/synctools/tablegen/PPC/PPCInstrFormats.td b/capstone/suite/synctools/tablegen/PPC/PPCInstrFormats.td new file mode 100644 index 000000000..f5f4b4634 --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCInstrFormats.td @@ -0,0 +1,2167 @@ +//===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// PowerPC instruction formats + +class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> + : Instruction { + field bits<32> Inst; + field bits<32> SoftFail = 0; + let Size = 4; + + bit PPC64 = 0; // Default value, override with isPPC64 + + let Namespace = "PPC"; + let Inst{0-5} = opcode; + let OutOperandList = OOL; + let InOperandList = IOL; + let AsmString = asmstr; + let Itinerary = itin; + + bits<1> PPC970_First = 0; + bits<1> PPC970_Single = 0; + bits<1> PPC970_Cracked = 0; + bits<3> PPC970_Unit = 0; + + /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to + /// these must be reflected there! See comments there for what these are. + let TSFlags{0} = PPC970_First; + let TSFlags{1} = PPC970_Single; + let TSFlags{2} = PPC970_Cracked; + let TSFlags{5-3} = PPC970_Unit; + + /// Indicate that the VSX instruction is to use VSX numbering/encoding. + /// Since ISA 3.0, there are scalar instructions that use the upper + /// half of the VSX register set only. Rather than adding further complexity + /// to the register class set, the VSX registers just include the Altivec + /// registers and this flag decides the numbering to be used for them. + bits<1> UseVSXReg = 0; + let TSFlags{6} = UseVSXReg; + + // Indicate that this instruction is of type X-Form Load or Store + bits<1> XFormMemOp = 0; + let TSFlags{7} = XFormMemOp; + + // Fields used for relation models. + string BaseName = ""; + + // For cases where multiple instruction definitions really represent the + // same underlying instruction but with one definition for 64-bit arguments + // and one for 32-bit arguments, this bit breaks the degeneracy between + // the two forms and allows TableGen to generate mapping tables. + bit Interpretation64Bit = 0; +} + +class PPC970_DGroup_First { bits<1> PPC970_First = 1; } +class PPC970_DGroup_Single { bits<1> PPC970_Single = 1; } +class PPC970_DGroup_Cracked { bits<1> PPC970_Cracked = 1; } +class PPC970_MicroCode; + +class PPC970_Unit_Pseudo { bits<3> PPC970_Unit = 0; } +class PPC970_Unit_FXU { bits<3> PPC970_Unit = 1; } +class PPC970_Unit_LSU { bits<3> PPC970_Unit = 2; } +class PPC970_Unit_FPU { bits<3> PPC970_Unit = 3; } +class PPC970_Unit_CRU { bits<3> PPC970_Unit = 4; } +class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; } +class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; } +class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; } + +class UseVSXReg { bits<1> UseVSXReg = 1; } +class XFormMemOp { bits<1> XFormMemOp = 1; } + +// Two joined instructions; used to emit two adjacent instructions as one. +// The itinerary from the first instruction is used for scheduling and +// classification. +class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : Instruction { + field bits<64> Inst; + field bits<64> SoftFail = 0; + let Size = 8; + + bit PPC64 = 0; // Default value, override with isPPC64 + + let Namespace = "PPC"; + let Inst{0-5} = opcode1; + let Inst{32-37} = opcode2; + let OutOperandList = OOL; + let InOperandList = IOL; + let AsmString = asmstr; + let Itinerary = itin; + + bits<1> PPC970_First = 0; + bits<1> PPC970_Single = 0; + bits<1> PPC970_Cracked = 0; + bits<3> PPC970_Unit = 0; + + /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to + /// these must be reflected there! See comments there for what these are. + let TSFlags{0} = PPC970_First; + let TSFlags{1} = PPC970_Single; + let TSFlags{2} = PPC970_Cracked; + let TSFlags{5-3} = PPC970_Unit; + + // Fields used for relation models. + string BaseName = ""; + bit Interpretation64Bit = 0; +} + +// Base class for all X-Form memory instructions +class IXFormMemOp<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + :I<opcode, OOL, IOL, asmstr, itin>, XFormMemOp; + +// 1.7.1 I-Form +class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + let Pattern = pattern; + bits<24> LI; + + let Inst{6-29} = LI; + let Inst{30} = aa; + let Inst{31} = lk; +} + +// 1.7.2 B-Form +class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr> + : I<opcode, OOL, IOL, asmstr, IIC_BrB> { + bits<7> BIBO; // 2 bits of BI and 5 bits of BO. + bits<3> CR; + bits<14> BD; + + bits<5> BI; + let BI{0-1} = BIBO{5-6}; + let BI{2-4} = CR{0-2}; + + let Inst{6-10} = BIBO{4-0}; + let Inst{11-15} = BI; + let Inst{16-29} = BD; + let Inst{30} = aa; + let Inst{31} = lk; +} + +class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL, + string asmstr> + : BForm<opcode, aa, lk, OOL, IOL, asmstr> { + let BIBO{4-0} = bo; + let BIBO{6-5} = 0; + let CR = 0; +} + +class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk, + dag OOL, dag IOL, string asmstr> + : I<opcode, OOL, IOL, asmstr, IIC_BrB> { + bits<14> BD; + + let Inst{6-10} = bo; + let Inst{11-15} = bi; + let Inst{16-29} = BD; + let Inst{30} = aa; + let Inst{31} = lk; +} + +class BForm_3<bits<6> opcode, bit aa, bit lk, + dag OOL, dag IOL, string asmstr> + : I<opcode, OOL, IOL, asmstr, IIC_BrB> { + bits<5> BO; + bits<5> BI; + bits<14> BD; + + let Inst{6-10} = BO; + let Inst{11-15} = BI; + let Inst{16-29} = BD; + let Inst{30} = aa; + let Inst{31} = lk; +} + +class BForm_3_at<bits<6> opcode, bit aa, bit lk, + dag OOL, dag IOL, string asmstr> + : I<opcode, OOL, IOL, asmstr, IIC_BrB> { + bits<5> BO; + bits<2> at; + bits<5> BI; + bits<14> BD; + + let Inst{6-8} = BO{4-2}; + let Inst{9-10} = at; + let Inst{11-15} = BI; + let Inst{16-29} = BD; + let Inst{30} = aa; + let Inst{31} = lk; +} + +class BForm_4<bits<6> opcode, bits<5> bo, bit aa, bit lk, + dag OOL, dag IOL, string asmstr> + : I<opcode, OOL, IOL, asmstr, IIC_BrB> { + bits<5> BI; + bits<14> BD; + + let Inst{6-10} = bo; + let Inst{11-15} = BI; + let Inst{16-29} = BD; + let Inst{30} = aa; + let Inst{31} = lk; +} + +// 1.7.3 SC-Form +class SCForm<bits<6> opcode, bits<1> xo, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, + list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<7> LEV; + + let Pattern = pattern; + + let Inst{20-26} = LEV; + let Inst{30} = xo; +} + +// 1.7.4 D-Form +class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<5> B; + bits<16> C; + + let Pattern = pattern; + + let Inst{6-10} = A; + let Inst{11-15} = B; + let Inst{16-31} = C; +} + +class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<21> Addr; + + let Pattern = pattern; + + let Inst{6-10} = A; + let Inst{11-15} = Addr{20-16}; // Base Reg + let Inst{16-31} = Addr{15-0}; // Displacement +} + +class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<16> C; + bits<5> B; + + let Pattern = pattern; + + let Inst{6-10} = A; + let Inst{11-15} = B; + let Inst{16-31} = C; +} + + +class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern> { + + // Even though ADDICo does not really have an RC bit, provide + // the declaration of one here so that isDOT has something to set. + bit RC = 0; +} + +class DForm_2_r0<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<16> B; + + let Pattern = pattern; + + let Inst{6-10} = A; + let Inst{11-15} = 0; + let Inst{16-31} = B; +} + +class DForm_4<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> B; + bits<5> A; + bits<16> C; + + let Pattern = pattern; + + let Inst{6-10} = A; + let Inst{11-15} = B; + let Inst{16-31} = C; +} + +class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : DForm_1<opcode, OOL, IOL, asmstr, itin, pattern> { + let A = 0; + let Addr = 0; +} + +class DForm_4_fixedreg_zero<bits<6> opcode, bits<5> R, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list<dag> pattern> + : DForm_4<opcode, OOL, IOL, asmstr, itin, pattern> { + let A = R; + let B = R; + let C = 0; +} + +class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I2<opcode1, opcode2, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<21> Addr; + + let Pattern = pattern; + bits<24> LI; + + let Inst{6-29} = LI; + let Inst{30} = aa; + let Inst{31} = lk; + + let Inst{38-42} = A; + let Inst{43-47} = Addr{20-16}; // Base Reg + let Inst{48-63} = Addr{15-0}; // Displacement +} + +// This is used to emit BL8+NOP. +class IForm_and_DForm_4_zero<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : IForm_and_DForm_1<opcode1, aa, lk, opcode2, + OOL, IOL, asmstr, itin, pattern> { + let A = 0; + let Addr = 0; +} + +class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<1> L; + bits<5> RA; + bits<16> I; + + let Inst{6-8} = BF; + let Inst{9} = 0; + let Inst{10} = L; + let Inst{11-15} = RA; + let Inst{16-31} = I; +} + +class DForm_5_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : DForm_5<opcode, OOL, IOL, asmstr, itin> { + let L = PPC64; +} + +class DForm_6<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : DForm_5<opcode, OOL, IOL, asmstr, itin>; + +class DForm_6_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : DForm_6<opcode, OOL, IOL, asmstr, itin> { + let L = PPC64; +} + + +// 1.7.5 DS-Form +class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RST; + bits<19> DS_RA; + + let Pattern = pattern; + + let Inst{6-10} = RST; + let Inst{11-15} = DS_RA{18-14}; // Register # + let Inst{16-29} = DS_RA{13-0}; // Displacement. + let Inst{30-31} = xo; +} + +// ISA V3.0B 1.6.6 DX-Form +class DXForm<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<16> D; + + let Pattern = pattern; + + let Inst{6-10} = RT; + let Inst{11-15} = D{5-1}; // d1 + let Inst{16-25} = D{15-6}; // d0 + let Inst{26-30} = xo; + let Inst{31} = D{0}; // d2 +} + +// DQ-Form: [PO T RA DQ TX XO] or [PO S RA DQ SX XO] +class DQ_RD6_RS5_DQ12<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<17> DS_RA; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = DS_RA{16-12}; // Register # + let Inst{16-27} = DS_RA{11-0}; // Displacement. + let Inst{28} = XT{5}; + let Inst{29-31} = xo; +} + +// 1.7.6 X-Form +class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RST; + bits<5> A; + bits<5> B; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RST; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class XForm_base_r3xo_memOp<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>, XFormMemOp; + +class XForm_tlb<bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> : XForm_base_r3xo<31, xo, OOL, IOL, asmstr, itin, []> { + let RST = 0; +} + +class XForm_attn<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + let Inst{21-30} = xo; +} + +// This is the same as XForm_base_r3xo, but the first two operands are swapped +// when code is emitted. +class XForm_base_r3xo_swapped + <bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<5> RST; + bits<5> B; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RST; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + + +class XForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>; + +class XForm_1_memOp<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo_memOp<opcode, xo, OOL, IOL, asmstr, itin, pattern>; + +class XForm_1a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let RST = 0; +} + +class XForm_rs<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let A = 0; + let B = 0; +} + +class XForm_tlbws<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RST; + bits<5> A; + bits<1> WS; + + let Pattern = pattern; + + let Inst{6-10} = RST; + let Inst{11-15} = A; + let Inst{20} = WS; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> { + let Pattern = pattern; +} + +class XForm_8<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>; + +class XForm_8_memOp<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo_memOp<opcode, xo, OOL, IOL, asmstr, itin, pattern>; + +class XForm_10<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> { + let Pattern = pattern; +} + +class XForm_11<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> { + let B = 0; + let Pattern = pattern; +} + +class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<1> L; + bits<5> RA; + bits<5> RB; + + let Inst{6-8} = BF; + let Inst{9} = 0; + let Inst{10} = L; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_icbt<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<4> CT; + bits<5> RA; + bits<5> RB; + + let Inst{6} = 0; + let Inst{7-10} = CT; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_sr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RS; + bits<4> SR; + + let Inst{6-10} = RS; + let Inst{12-15} = SR; + let Inst{21-30} = xo; +} + +class XForm_mbar<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> MO; + + let Inst{6-10} = MO; + let Inst{21-30} = xo; +} + +class XForm_srin<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RS; + bits<5> RB; + + let Inst{6-10} = RS; + let Inst{16-20} = RB; + let Inst{21-30} = xo; +} + +class XForm_mtmsr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RS; + bits<1> L; + + let Inst{6-10} = RS; + let Inst{15} = L; + let Inst{21-30} = xo; +} + +class XForm_16_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : XForm_16<opcode, xo, OOL, IOL, asmstr, itin> { + let L = PPC64; +} + +class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<5> FRA; + bits<5> FRB; + + let Inst{6-8} = BF; + let Inst{9-10} = 0; + let Inst{11-15} = FRA; + let Inst{16-20} = FRB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_17a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : XForm_17<opcode, xo, OOL, IOL, asmstr, itin > { + let FRA = 0; +} + +// Used for QPX +class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> FRT; + bits<5> FRA; + bits<5> FRB; + + let Pattern = pattern; + + let Inst{6-10} = FRT; + let Inst{11-15} = FRA; + let Inst{16-20} = FRB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_19<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_18<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let FRA = 0; +} + +class XForm_20<bits<6> opcode, bits<6> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> FRT; + bits<5> FRA; + bits<5> FRB; + bits<4> tttt; + + let Pattern = pattern; + + let Inst{6-10} = FRT; + let Inst{11-15} = FRA; + let Inst{16-20} = FRB; + let Inst{21-24} = tttt; + let Inst{25-30} = xo; + let Inst{31} = 0; +} + +class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + let Pattern = pattern; + let Inst{6-10} = 31; + let Inst{11-15} = 0; + let Inst{16-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_24_sync<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<2> L; + + let Pattern = pattern; + let Inst{6-8} = 0; + let Inst{9-10} = L; + let Inst{11-15} = 0; + let Inst{16-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_24_eieio<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : XForm_24_sync<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let L = 0; +} + +class XForm_25<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { +} + +class XForm_25_memOp<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo_memOp<opcode, xo, OOL, IOL, asmstr, itin, pattern> { +} + +class XForm_26<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let A = 0; +} + +class XForm_28_memOp<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo_memOp<opcode, xo, OOL, IOL, asmstr, itin, pattern> { +} + +class XForm_28<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { +} + +// This is used for MFFS, MTFSB0, MTFSB1. 42 is arbitrary; this series of +// numbers presumably relates to some document, but I haven't found it. +class XForm_42<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RST; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} +class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let Pattern = pattern; + bits<5> FM; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = FM; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class XForm_44<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<3> BFA; + + let Inst{6-10} = RT; + let Inst{11-13} = BFA; + let Inst{14-15} = 0; + let Inst{16-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_45<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<2> L; + + let Inst{6-10} = RT; + let Inst{11-13} = 0; + let Inst{14-15} = L; + let Inst{16-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class X_FRT5_XO2_XO3_XO10<bits<6> opcode, bits<2> xo1, bits<3> xo2, bits<10> xo, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, + list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let Pattern = pattern; + + let Inst{6-10} = RST; + let Inst{11-12} = xo1; + let Inst{13-15} = xo2; + let Inst{16-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class X_FRT5_XO2_XO3_FRB5_XO10<bits<6> opcode, bits<2> xo1, bits<3> xo2, + bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let Pattern = pattern; + bits<5> FRB; + + let Inst{6-10} = RST; + let Inst{11-12} = xo1; + let Inst{13-15} = xo2; + let Inst{16-20} = FRB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class X_FRT5_XO2_XO3_DRM3_XO10<bits<6> opcode, bits<2> xo1, bits<3> xo2, + bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let Pattern = pattern; + bits<3> DRM; + + let Inst{6-10} = RST; + let Inst{11-12} = xo1; + let Inst{13-15} = xo2; + let Inst{16-17} = 0; + let Inst{18-20} = DRM; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class X_FRT5_XO2_XO3_RM2_X10<bits<6> opcode, bits<2> xo1, bits<3> xo2, + bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let Pattern = pattern; + bits<2> RM; + + let Inst{6-10} = RST; + let Inst{11-12} = xo1; + let Inst{13-15} = xo2; + let Inst{16-18} = 0; + let Inst{19-20} = RM; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + + +class XForm_0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let RST = 0; + let A = 0; + let B = 0; +} + +class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let RST = 0; + let A = 0; +} + +class XForm_htm0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bit R; + + bit RC = 1; + + let Inst{6-9} = 0; + let Inst{10} = R; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class XForm_htm1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bit A; + + bit RC = 1; + + let Inst{6} = A; + let Inst{7-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class XForm_htm2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bit L; + + bit RC = 0; // set by isDOT + + let Inst{7-9} = 0; + let Inst{10} = L; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class XForm_htm3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + + bit RC = 0; + + let Inst{6-8} = BF; + let Inst{9-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +// [PO RT RA RB XO /] +class X_BF3_L1_RS5_RS5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<1> L; + bits<5> RA; + bits<5> RB; + + let Pattern = pattern; + + let Inst{6-8} = BF; + let Inst{9} = 0; + let Inst{10} = L; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +// Same as XForm_17 but with GPR's and new naming convention +class X_BF3_RS5_RS5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<5> RA; + bits<5> RB; + + let Pattern = pattern; + + let Inst{6-8} = BF; + let Inst{9-10} = 0; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +// e.g. [PO VRT XO VRB XO /] or [PO VRT XO VRB XO RO] +class X_RD5_XO5_RS5<bits<6> opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let A = xo2; +} + +class X_BF3_DCMX7_RS5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<7> DCMX; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-8} = BF; + let Inst{9-15} = DCMX; + let Inst{16-20} = VB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class X_RD6_IMM8<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<8> IMM8; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-12} = 0; + let Inst{13-20} = IMM8; + let Inst{21-30} = xo; + let Inst{31} = XT{5}; +} + +// XForm_base_r3xo for instructions such as P9 atomics where we don't want +// to specify an SDAG pattern for matching. +class X_RD5_RS5_IM5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin> + : XForm_base_r3xo_memOp<opcode, xo, OOL, IOL, asmstr, itin, []> { +} + +class X_BF3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : XForm_17<opcode, xo, OOL, IOL, asmstr, itin> { + let FRA = 0; + let FRB = 0; +} + +// [PO /// L RA RB XO /] +class X_L1_RS5_RS5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : XForm_16<opcode, xo, OOL, IOL, asmstr, itin> { + let BF = 0; + let Pattern = pattern; + + bit RC = 0; + let Inst{31} = RC; +} + +// XX*-Form (VSX) +class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<5> A; + bits<5> B; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = XT{5}; +} + +class XX1Form_memOp<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : XX1Form<opcode, xo, OOL, IOL, asmstr, itin, pattern>, XFormMemOp; + +class XX1_RS6_RD5_XO<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : XX1Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let B = 0; +} + +class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = 0; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX2Form_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> CR; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-8} = CR; + let Inst{9-15} = 0; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = 0; +} + +class XX2Form_2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XB; + bits<2> D; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-13} = 0; + let Inst{14-15} = D; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX2_RD6_UIM5_RS6<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XB; + bits<5> UIM5; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = UIM5; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +// [PO T XO B XO BX /] +class XX2_RD5_XO5_RS6<bits<6> opcode, bits<5> xo2, bits<9> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-10} = RT; + let Inst{11-15} = xo2; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = 0; +} + +// [PO T XO B XO BX TX] +class XX2_RD6_XO5_RS6<bits<6> opcode, bits<5> xo2, bits<9> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = xo2; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX2_BF3_DCMX7_RS6<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<7> DCMX; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-8} = BF; + let Inst{9-15} = DCMX; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = 0; +} + +class XX2_RD6_DCMX7_RS6<bits<6> opcode, bits<4> xo1, bits<3> xo2, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, + list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<7> DCMX; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = DCMX{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21-24} = xo1; + let Inst{25} = DCMX{5}; + let Inst{26-28} = xo2; + let Inst{29} = DCMX{6}; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XA; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21-28} = xo; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX3Form_Zero<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XX3Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let XA = XT; + let XB = XT; +} + +class XX3Form_SetZero<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XX3Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let XB = XT; + let XA = XT; +} + +class XX3Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> CR; + bits<6> XA; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-8} = CR; + let Inst{9-10} = 0; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21-28} = xo; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = 0; +} + +class XX3Form_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XA; + bits<6> XB; + bits<2> D; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21} = 0; + let Inst{22-23} = D; + let Inst{24-28} = xo; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX3Form_Rc<bits<6> opcode, bits<7> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XA; + bits<6> XB; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21} = RC; + let Inst{22-28} = xo; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +class XX4Form<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XA; + bits<6> XB; + bits<6> XC; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = XA{4-0}; + let Inst{16-20} = XB{4-0}; + let Inst{21-25} = XC{4-0}; + let Inst{26-27} = xo; + let Inst{28} = XC{5}; + let Inst{29} = XA{5}; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + +// DCB_Form - Form X instruction, used for dcb* instructions. +class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<31, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<5> B; + + let Pattern = pattern; + + let Inst{6-10} = immfield; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class DCB_Form_hint<bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<31, OOL, IOL, asmstr, itin> { + bits<5> TH; + bits<5> A; + bits<5> B; + + let Pattern = pattern; + + let Inst{6-10} = TH; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +// DSS_Form - Form X instruction, used for altivec dss* instructions. +class DSS_Form<bits<1> T, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<31, OOL, IOL, asmstr, itin> { + bits<2> STRM; + bits<5> A; + bits<5> B; + + let Pattern = pattern; + + let Inst{6} = T; + let Inst{7-8} = 0; + let Inst{9-10} = STRM; + let Inst{11-15} = A; + let Inst{16-20} = B; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +// 1.7.7 XL-Form +class XLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> CRD; + bits<5> CRA; + bits<5> CRB; + + let Pattern = pattern; + + let Inst{6-10} = CRD; + let Inst{11-15} = CRA; + let Inst{16-20} = CRB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XLForm_1_np<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XLForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let CRD = 0; + let CRA = 0; + let CRB = 0; +} + +class XLForm_1_gen<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XLForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + bits<5> RT; + bits<5> RB; + + let CRD = RT; + let CRA = 0; + let CRB = RB; +} + +class XLForm_1_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> CRD; + + let Pattern = pattern; + + let Inst{6-10} = CRD; + let Inst{11-15} = CRD; + let Inst{16-20} = CRD; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XLForm_2<bits<6> opcode, bits<10> xo, bit lk, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> BO; + bits<5> BI; + bits<2> BH; + + let Pattern = pattern; + + let Inst{6-10} = BO; + let Inst{11-15} = BI; + let Inst{16-18} = 0; + let Inst{19-20} = BH; + let Inst{21-30} = xo; + let Inst{31} = lk; +} + +class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> + : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> { + bits<7> BIBO; // 2 bits of BI and 5 bits of BO. + bits<3> CR; + + let BO = BIBO{4-0}; + let BI{0-1} = BIBO{5-6}; + let BI{2-4} = CR{0-2}; + let BH = 0; +} + +class XLForm_2_br2<bits<6> opcode, bits<10> xo, bits<5> bo, bit lk, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> + : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> { + let BO = bo; + let BH = 0; +} + +class XLForm_2_ext<bits<6> opcode, bits<10> xo, bits<5> bo, bits<5> bi, bit lk, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> + : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> { + let BO = bo; + let BI = bi; + let BH = 0; +} + +class XLForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + bits<3> BFA; + + let Inst{6-8} = BF; + let Inst{9-10} = 0; + let Inst{11-13} = BFA; + let Inst{14-15} = 0; + let Inst{16-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XLForm_4<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<3> BF; + bit W; + bits<4> U; + + bit RC = 0; + + let Inst{6-8} = BF; + let Inst{9-10} = 0; + let Inst{11-14} = 0; + let Inst{15} = W; + let Inst{16-19} = U; + let Inst{20} = 0; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class XLForm_S<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<1> S; + + let Pattern = pattern; + + let Inst{6-19} = 0; + let Inst{20} = S; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XLForm_2_and_DSForm_1<bits<6> opcode1, bits<10> xo1, bit lk, + bits<6> opcode2, bits<2> xo2, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I2<opcode1, opcode2, OOL, IOL, asmstr, itin> { + bits<5> BO; + bits<5> BI; + bits<2> BH; + + bits<5> RST; + bits<19> DS_RA; + + let Pattern = pattern; + + let Inst{6-10} = BO; + let Inst{11-15} = BI; + let Inst{16-18} = 0; + let Inst{19-20} = BH; + let Inst{21-30} = xo1; + let Inst{31} = lk; + + let Inst{38-42} = RST; + let Inst{43-47} = DS_RA{18-14}; // Register # + let Inst{48-61} = DS_RA{13-0}; // Displacement. + let Inst{62-63} = xo2; +} + +class XLForm_2_ext_and_DSForm_1<bits<6> opcode1, bits<10> xo1, + bits<5> bo, bits<5> bi, bit lk, + bits<6> opcode2, bits<2> xo2, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XLForm_2_and_DSForm_1<opcode1, xo1, lk, opcode2, xo2, + OOL, IOL, asmstr, itin, pattern> { + let BO = bo; + let BI = bi; + let BH = 0; +} + +// 1.7.8 XFX-Form +class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<10> SPR; + + let Inst{6-10} = RT; + let Inst{11} = SPR{4}; + let Inst{12} = SPR{3}; + let Inst{13} = SPR{2}; + let Inst{14} = SPR{1}; + let Inst{15} = SPR{0}; + let Inst{16} = SPR{9}; + let Inst{17} = SPR{8}; + let Inst{18} = SPR{7}; + let Inst{19} = SPR{6}; + let Inst{20} = SPR{5}; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XFXForm_1_ext<bits<6> opcode, bits<10> xo, bits<10> spr, + dag OOL, dag IOL, string asmstr, InstrItinClass itin> + : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin> { + let SPR = spr; +} + +class XFXForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + + let Inst{6-10} = RT; + let Inst{11-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XFXForm_3p<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<10> Entry; + let Pattern = pattern; + + let Inst{6-10} = RT; + let Inst{11-20} = Entry; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<8> FXM; + bits<5> rS; + + let Inst{6-10} = rS; + let Inst{11} = 0; + let Inst{12-19} = FXM; + let Inst{20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XFXForm_5a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> ST; + bits<8> FXM; + + let Inst{6-10} = ST; + let Inst{11} = 1; + let Inst{12-19} = FXM; + let Inst{20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XFXForm_7<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin>; + +class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr, + dag OOL, dag IOL, string asmstr, InstrItinClass itin> + : XFXForm_7<opcode, xo, OOL, IOL, asmstr, itin> { + let SPR = spr; +} + +// XFL-Form - MTFSF +// This is probably 1.7.9, but I don't have the reference that uses this +// numbering scheme... +class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag>pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<8> FM; + bits<5> rT; + + bit RC = 0; // set by isDOT + let Pattern = pattern; + + let Inst{6} = 0; + let Inst{7-14} = FM; + let Inst{15} = 0; + let Inst{16-20} = rT; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +class XFLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag>pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bit L; + bits<8> FLM; + bit W; + bits<5> FRB; + + bit RC = 0; // set by isDOT + let Pattern = pattern; + + let Inst{6} = L; + let Inst{7-14} = FLM; + let Inst{15} = W; + let Inst{16-20} = FRB; + let Inst{21-30} = xo; + let Inst{31} = RC; +} + +// 1.7.10 XS-Form - SRADI. +class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<5> RS; + bits<6> SH; + + bit RC = 0; // set by isDOT + let Pattern = pattern; + + let Inst{6-10} = RS; + let Inst{11-15} = A; + let Inst{16-20} = SH{4,3,2,1,0}; + let Inst{21-29} = xo; + let Inst{30} = SH{5}; + let Inst{31} = RC; +} + +// 1.7.11 XO-Form +class XOForm_1<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<5> RA; + bits<5> RB; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RT; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21} = oe; + let Inst{22-30} = xo; + let Inst{31} = RC; +} + +class XOForm_3<bits<6> opcode, bits<9> xo, bit oe, + dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> + : XOForm_1<opcode, xo, oe, OOL, IOL, asmstr, itin, pattern> { + let RB = 0; +} + +// 1.7.12 A-Form +class AForm_1<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> FRT; + bits<5> FRA; + bits<5> FRC; + bits<5> FRB; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = FRT; + let Inst{11-15} = FRA; + let Inst{16-20} = FRB; + let Inst{21-25} = FRC; + let Inst{26-30} = xo; + let Inst{31} = RC; +} + +class AForm_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let FRC = 0; +} + +class AForm_3<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let FRB = 0; +} + +class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<5> RA; + bits<5> RB; + bits<5> COND; + + let Pattern = pattern; + + let Inst{6-10} = RT; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-25} = COND; + let Inst{26-30} = xo; + let Inst{31} = 0; +} + +// Used for QPX +class AForm_4a<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let FRA = 0; + let FRC = 0; +} + +// 1.7.13 M-Form +class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RA; + bits<5> RS; + bits<5> RB; + bits<5> MB; + bits<5> ME; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RS; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-25} = MB; + let Inst{26-30} = ME; + let Inst{31} = RC; +} + +class MForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : MForm_1<opcode, OOL, IOL, asmstr, itin, pattern> { +} + +// 1.7.14 MD-Form +class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RA; + bits<5> RS; + bits<6> SH; + bits<6> MBE; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RS; + let Inst{11-15} = RA; + let Inst{16-20} = SH{4,3,2,1,0}; + let Inst{21-26} = MBE{4,3,2,1,0,5}; + let Inst{27-29} = xo; + let Inst{30} = SH{5}; + let Inst{31} = RC; +} + +class MDSForm_1<bits<6> opcode, bits<4> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RA; + bits<5> RS; + bits<5> RB; + bits<6> MBE; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RS; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-26} = MBE{4,3,2,1,0,5}; + let Inst{27-30} = xo; + let Inst{31} = RC; +} + + +// E-1 VA-Form + +// VAForm_1 - DACB ordering. +class VAForm_1<bits<6> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<5> VC; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21-25} = VC; + let Inst{26-31} = xo; +} + +// VAForm_1a - DABC ordering. +class VAForm_1a<bits<6> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<5> VB; + bits<5> VC; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21-25} = VC; + let Inst{26-31} = xo; +} + +class VAForm_2<bits<6> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<5> VB; + bits<4> SH; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21} = 0; + let Inst{22-25} = SH; + let Inst{26-31} = xo; +} + +// E-2 VX-Form +class VXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + +class VXForm_setzero<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : VXForm_1<xo, OOL, IOL, asmstr, itin, pattern> { + let VA = VD; + let VB = VD; +} + + +class VXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = 0; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + +class VXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> IMM; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = IMM; + let Inst{16-20} = 0; + let Inst{21-31} = xo; +} + +/// VXForm_4 - VX instructions with "VD,0,0" register fields, like mfvscr. +class VXForm_4<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = 0; + let Inst{16-20} = 0; + let Inst{21-31} = xo; +} + +/// VXForm_5 - VX instructions with "0,0,VB" register fields, like mtvscr. +class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-10} = 0; + let Inst{11-15} = 0; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + +// e.g. [PO VRT EO VRB XO] +class VXForm_RD5_XO5_RS5<bits<11> xo, bits<5> eo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> RD; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-10} = RD; + let Inst{11-15} = eo; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + +/// VXForm_CR - VX crypto instructions with "VRT, VRA, ST, SIX" +class VXForm_CR<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<1> ST; + bits<4> SIX; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16} = ST; + let Inst{17-20} = SIX; + let Inst{21-31} = xo; +} + +/// VXForm_BX - VX crypto instructions with "VRT, VRA, 0 - like vsbox" +class VXForm_BX<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = 0; + let Inst{21-31} = xo; +} + +// E-4 VXR-Form +class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<5> VB; + bit RC = 0; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21} = RC; + let Inst{22-31} = xo; +} + +// VX-Form: [PO VRT EO VRB 1 PS XO] +class VX_RD5_EO5_RS5_PS1_XO9<bits<5> eo, bits<9> xo, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VB; + bit PS; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = eo; + let Inst{16-20} = VB; + let Inst{21} = 1; + let Inst{22} = PS; + let Inst{23-31} = xo; +} + +// VX-Form: [PO VRT VRA VRB 1 PS XO] or [PO VRT VRA VRB 1 / XO] +class VX_RD5_RSp5_PS1_XO9<bits<9> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<5> VB; + bit PS; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = VB; + let Inst{21} = 1; + let Inst{22} = PS; + let Inst{23-31} = xo; +} + +// Z23-Form (used by QPX) +class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> FRT; + bits<5> FRA; + bits<5> FRB; + bits<2> idx; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = FRT; + let Inst{11-15} = FRA; + let Inst{16-20} = FRB; + let Inst{21-22} = idx; + let Inst{23-30} = xo; + let Inst{31} = RC; +} + +class Z23Form_2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : Z23Form_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let FRB = 0; +} + +class Z23Form_3<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> FRT; + bits<12> idx; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = FRT; + let Inst{11-22} = idx; + let Inst{23-30} = xo; + let Inst{31} = RC; +} + +class Z23Form_8<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> VRT; + bit R; + bits<5> VRB; + bits<2> idx; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = VRT; + let Inst{11-14} = 0; + let Inst{15} = R; + let Inst{16-20} = VRB; + let Inst{21-22} = idx; + let Inst{23-30} = xo; + let Inst{31} = RC; +} + +//===----------------------------------------------------------------------===// +class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern> + : I<0, OOL, IOL, asmstr, NoItinerary> { + let isCodeGenOnly = 1; + let PPC64 = 0; + let Pattern = pattern; + let Inst{31-0} = 0; + let hasNoSchedulingInfo = 1; +} + +class PseudoXFormMemOp<dag OOL, dag IOL, string asmstr, list<dag> pattern> + : Pseudo<OOL, IOL, asmstr, pattern>, XFormMemOp; + diff --git a/capstone/suite/synctools/tablegen/PPC/PPCInstrHTM.td b/capstone/suite/synctools/tablegen/PPC/PPCInstrHTM.td new file mode 100644 index 000000000..f9c4f42bd --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCInstrHTM.td @@ -0,0 +1,170 @@ +//===-- PPCInstrHTM.td - The PowerPC Hardware Transactional Memory -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hardware Transactional Memory extension to the +// PowerPC instruction set. +// +//===----------------------------------------------------------------------===// + + + +def HasHTM : Predicate<"PPCSubTarget->hasHTM()">; + +def HTM_get_imm : SDNodeXForm<imm, [{ + return getI32Imm (N->getZExtValue(), SDLoc(N)); +}]>; + +let hasSideEffects = 1, usesCustomInserter = 1 in { +def TCHECK_RET : Pseudo<(outs gprc:$out), (ins), "#TCHECK_RET", []>; +def TBEGIN_RET : PPCCustomInserterPseudo<(outs gprc:$out), (ins u1imm:$R), "#TBEGIN_RET", []>; +} + + +let Predicates = [HasHTM] in { + +let Defs = [CR0] in { +def TBEGIN : XForm_htm0 <31, 654, + (outs), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>; + +def TEND : XForm_htm1 <31, 686, + (outs), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>; + +def TABORT : XForm_base_r3xo <31, 910, + (outs), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR, + []>, isDOT { + let RST = 0; + let B = 0; +} + +def TABORTWC : XForm_base_r3xo <31, 782, + (outs), (ins u5imm:$RTS, gprc:$A, gprc:$B), + "tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>, + isDOT; + +def TABORTWCI : XForm_base_r3xo <31, 846, + (outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B), + "tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>, + isDOT; + +def TABORTDC : XForm_base_r3xo <31, 814, + (outs), (ins u5imm:$RTS, gprc:$A, gprc:$B), + "tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>, + isDOT; + +def TABORTDCI : XForm_base_r3xo <31, 878, + (outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B), + "tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>, + isDOT; + +def TSR : XForm_htm2 <31, 750, + (outs), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>, + isDOT; + +def TRECLAIM : XForm_base_r3xo <31, 942, + (outs), (ins gprc:$A), "treclaim. $A", + IIC_SprMTSPR, []>, + isDOT { + let RST = 0; + let B = 0; +} + +def TRECHKPT : XForm_base_r3xo <31, 1006, + (outs), (ins), "trechkpt.", IIC_SprMTSPR, []>, + isDOT { + let RST = 0; + let A = 0; + let B = 0; +} + +}//Defs = [CR0] + +def TCHECK : XForm_htm3 <31, 718, + (outs crrc:$BF), (ins), "tcheck $BF", IIC_SprMTSPR, []>; +// Builtins + +// All HTM instructions, with the exception of tcheck, set CR0 with the +// value of the MSR Transaction State (TS) bits that exist before the +// instruction is executed. For tbegin., the EQ bit in CR0 can be used +// to determine whether the transaction was successfully started (0) or +// failed (1). We use an XORI pattern to 'flip' the bit to match the +// tbegin builtin API which defines a return value of 1 as success. + +def : Pat<(int_ppc_tbegin i32:$R), + (XORI (TBEGIN_RET(HTM_get_imm imm:$R)), 1)>; + +def : Pat<(int_ppc_tend i32:$R), + (TEND (HTM_get_imm imm:$R))>; + +def : Pat<(int_ppc_tabort i32:$R), + (TABORT $R)>; + +def : Pat<(int_ppc_tabortwc i32:$TO, i32:$RA, i32:$RB), + (TABORTWC (HTM_get_imm imm:$TO), $RA, $RB)>; + +def : Pat<(int_ppc_tabortwci i32:$TO, i32:$RA, i32:$SI), + (TABORTWCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>; + +def : Pat<(int_ppc_tabortdc i32:$TO, i32:$RA, i32:$RB), + (TABORTDC (HTM_get_imm imm:$TO), $RA, $RB)>; + +def : Pat<(int_ppc_tabortdci i32:$TO, i32:$RA, i32:$SI), + (TABORTDCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>; + +def : Pat<(int_ppc_tcheck), + (TCHECK_RET)>; + +def : Pat<(int_ppc_treclaim i32:$RA), + (TRECLAIM $RA)>; + +def : Pat<(int_ppc_trechkpt), + (TRECHKPT)>; + +def : Pat<(int_ppc_tsr i32:$L), + (TSR (HTM_get_imm imm:$L))>; + +def : Pat<(int_ppc_get_texasr), + (MFSPR8 130)>; + +def : Pat<(int_ppc_get_texasru), + (MFSPR8 131)>; + +def : Pat<(int_ppc_get_tfhar), + (MFSPR8 128)>; + +def : Pat<(int_ppc_get_tfiar), + (MFSPR8 129)>; + + +def : Pat<(int_ppc_set_texasr i64:$V), + (MTSPR8 130, $V)>; + +def : Pat<(int_ppc_set_texasru i64:$V), + (MTSPR8 131, $V)>; + +def : Pat<(int_ppc_set_tfhar i64:$V), + (MTSPR8 128, $V)>; + +def : Pat<(int_ppc_set_tfiar i64:$V), + (MTSPR8 129, $V)>; + + +// Extended mnemonics +def : Pat<(int_ppc_tendall), + (TEND 1)>; + +def : Pat<(int_ppc_tresume), + (TSR 1)>; + +def : Pat<(int_ppc_tsuspend), + (TSR 0)>; + +def : Pat<(i64 (int_ppc_ttest)), + (RLDICL (i64 (COPY (TABORTWCI 0, ZERO, 0))), 36, 28)>; + +} // [HasHTM] diff --git a/capstone/suite/synctools/tablegen/PPC/PPCInstrInfo.td b/capstone/suite/synctools/tablegen/PPC/PPCInstrInfo.td new file mode 100644 index 000000000..863e13ed9 --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCInstrInfo.td @@ -0,0 +1,4948 @@ +//===-- PPCInstrInfo.td - The PowerPC Instruction Set ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the subset of the 32-bit PowerPC instruction set, as used +// by the PowerPC instruction selector. +// +//===----------------------------------------------------------------------===// + +include "PPCInstrFormats.td" + +//===----------------------------------------------------------------------===// +// PowerPC specific type constraints. +// +def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx + SDTCisVT<0, f64>, SDTCisPtrTy<1> +]>; +def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x + SDTCisVT<0, f64>, SDTCisPtrTy<1> +]>; +def SDT_PPCLxsizx : SDTypeProfile<1, 2, [ + SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisPtrTy<2> +]>; +def SDT_PPCstxsix : SDTypeProfile<0, 3, [ + SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisPtrTy<2> +]>; +def SDT_PPCcv_fp_to_int : SDTypeProfile<1, 1, [ + SDTCisFP<0>, SDTCisFP<1> + ]>; +def SDT_PPCstore_scal_int_from_vsr : SDTypeProfile<0, 3, [ + SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisPtrTy<2> +]>; +def SDT_PPCVexts : SDTypeProfile<1, 2, [ + SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2> +]>; +def SDT_PPCSExtVElems : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisVec<1> +]>; + +def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; +def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; +def SDT_PPCvperm : SDTypeProfile<1, 3, [ + SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2> +]>; + +def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>, + SDTCisVec<1>, SDTCisInt<2> +]>; + +def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>, + SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3> +]>; + +def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>, + SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> +]>; + +def SDT_PPCVecReverse: SDTypeProfile<1, 1, [ SDTCisVec<0>, + SDTCisVec<1> +]>; + +def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>, + SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> +]>; + +def SDT_PPCvcmp : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32> +]>; + +def SDT_PPCcondbr : SDTypeProfile<0, 3, [ + SDTCisVT<0, i32>, SDTCisVT<2, OtherVT> +]>; + +def SDT_PPClbrx : SDTypeProfile<1, 2, [ + SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> +]>; +def SDT_PPCstbrx : SDTypeProfile<0, 3, [ + SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> +]>; + +def SDT_PPCTC_ret : SDTypeProfile<0, 2, [ + SDTCisPtrTy<0>, SDTCisVT<1, i32> +]>; + +def tocentry32 : Operand<iPTR> { + let MIOperandInfo = (ops i32imm:$imm); +} + +def SDT_PPCqvfperm : SDTypeProfile<1, 3, [ + SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVec<3> +]>; +def SDT_PPCqvgpci : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisInt<1> +]>; +def SDT_PPCqvaligni : SDTypeProfile<1, 3, [ + SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3> +]>; +def SDT_PPCqvesplati : SDTypeProfile<1, 2, [ + SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2> +]>; + +def SDT_PPCqbflt : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisVec<1> +]>; + +def SDT_PPCqvlfsb : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisPtrTy<1> +]>; + +//===----------------------------------------------------------------------===// +// PowerPC specific DAG Nodes. +// + +def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>; +def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>; + +def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>; +def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>; +def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>; +def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>; +def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>; +def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; +def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>; +def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>; + +def PPCcv_fp_to_uint_in_vsr: + SDNode<"PPCISD::FP_TO_UINT_IN_VSR", SDT_PPCcv_fp_to_int, []>; +def PPCcv_fp_to_sint_in_vsr: + SDNode<"PPCISD::FP_TO_SINT_IN_VSR", SDT_PPCcv_fp_to_int, []>; +def PPCstore_scal_int_from_vsr: + SDNode<"PPCISD::ST_VSR_SCAL_INT", SDT_PPCstore_scal_int_from_vsr, + [SDNPHasChain, SDNPMayStore]>; +def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, + [SDNPHasChain, SDNPMayStore]>; +def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx, + [SDNPHasChain, SDNPMayLoad]>; +def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix, + [SDNPHasChain, SDNPMayStore]>; +def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>; +def PPCSExtVElems : SDNode<"PPCISD::SExtVElems", SDT_PPCSExtVElems, []>; + +// Extract FPSCR (not modeled at the DAG level). +def PPCmffs : SDNode<"PPCISD::MFFS", + SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>; + +// Perform FADD in round-to-zero mode. +def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>; + + +def PPCfsel : SDNode<"PPCISD::FSEL", + // Type constraint for fsel. + SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, + SDTCisFP<0>, SDTCisVT<1, f64>]>, []>; + +def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>; +def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>; +def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, + [SDNPMayLoad, SDNPMemOperand]>; +def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>; +def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>; + +def PPCppc32GOT : SDNode<"PPCISD::PPC32_GOT", SDTIntLeaf, []>; + +def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>; +def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp, + [SDNPMayLoad]>; +def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>; +def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>; +def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>; +def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>; +def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR", + SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>; +def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>; +def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>; +def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>; +def PPCaddiTlsldLAddr : SDNode<"PPCISD::ADDI_TLSLD_L_ADDR", + SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>; +def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>; +def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; + +def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; +def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; +def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>; +def PPCxxreverse : SDNode<"PPCISD::XXREVERSE", SDT_PPCVecReverse, []>; +def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>; +def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; + +def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>; +def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>; +def PPCqvaligni : SDNode<"PPCISD::QVALIGNI", SDT_PPCqvaligni, []>; +def PPCqvesplati : SDNode<"PPCISD::QVESPLATI", SDT_PPCqvesplati, []>; + +def PPCqbflt : SDNode<"PPCISD::QBFLT", SDT_PPCqbflt, []>; + +def PPCqvlfsb : SDNode<"PPCISD::QVLFSb", SDT_PPCqvlfsb, + [SDNPHasChain, SDNPMayLoad]>; + +def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>; + +// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift +// amounts. These nodes are generated by the multi-precision shift code. +def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>; +def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>; +def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>; + +// Move 2 i64 values into a VSX register +def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128", + SDTypeProfile<1, 2, + [SDTCisFP<0>, SDTCisSameSizeAs<1,2>, + SDTCisSameAs<1,2>]>, + []>; + +// These are target-independent nodes, but have target-specific formats. +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; +def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def PPCbctrl_load_toc : SDNode<"PPCISD::BCTRL_LOAD_TOC", + SDTypeProfile<0, 1, []>, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + +def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP", + SDTypeProfile<1, 1, [SDTCisInt<0>, + SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPSideEffect]>; +def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP", + SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPSideEffect]>; + +def SDT_PPCsc : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def PPCsc : SDNode<"PPCISD::SC", SDT_PPCsc, + [SDNPHasChain, SDNPSideEffect]>; + +def PPCclrbhrb : SDNode<"PPCISD::CLRBHRB", SDTNone, + [SDNPHasChain, SDNPSideEffect]>; +def PPCmfbhrbe : SDNode<"PPCISD::MFBHRBE", SDTIntBinOp, [SDNPHasChain]>; +def PPCrfebb : SDNode<"PPCISD::RFEBB", SDT_PPCsc, + [SDNPHasChain, SDNPSideEffect]>; + +def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; +def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>; + +def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr, + [SDNPHasChain, SDNPOptInGlue]>; + +// PPC-specific atomic operations. +def PPCatomicCmpSwap_8 : + SDNode<"PPCISD::ATOMIC_CMP_SWAP_8", SDTAtomic3, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +def PPCatomicCmpSwap_16 : + SDNode<"PPCISD::ATOMIC_CMP_SWAP_16", SDTAtomic3, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, + [SDNPHasChain, SDNPMayStore]>; + +// Instructions to set/unset CR bit 6 for SVR4 vararg calls +def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +// Instructions to support dynamic alloca. +def SDTDynOp : SDTypeProfile<1, 2, []>; +def SDTDynAreaOp : SDTypeProfile<1, 1, []>; +def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>; +def PPCdynareaoffset : SDNode<"PPCISD::DYNAREAOFFSET", SDTDynAreaOp, [SDNPHasChain]>; + +//===----------------------------------------------------------------------===// +// PowerPC specific transformation functions and pattern fragments. +// + +def SHL32 : SDNodeXForm<imm, [{ + // Transformation function: 31 - imm + return getI32Imm(31 - N->getZExtValue(), SDLoc(N)); +}]>; + +def SRL32 : SDNodeXForm<imm, [{ + // Transformation function: 32 - imm + return N->getZExtValue() ? getI32Imm(32 - N->getZExtValue(), SDLoc(N)) + : getI32Imm(0, SDLoc(N)); +}]>; + +def LO16 : SDNodeXForm<imm, [{ + // Transformation function: get the low 16 bits. + return getI32Imm((unsigned short)N->getZExtValue(), SDLoc(N)); +}]>; + +def HI16 : SDNodeXForm<imm, [{ + // Transformation function: shift the immediate value down into the low bits. + return getI32Imm((unsigned)N->getZExtValue() >> 16, SDLoc(N)); +}]>; + +def HA16 : SDNodeXForm<imm, [{ + // Transformation function: shift the immediate value down into the low bits. + long Val = N->getZExtValue(); + return getI32Imm((Val - (signed short)Val) >> 16, SDLoc(N)); +}]>; +def MB : SDNodeXForm<imm, [{ + // Transformation function: get the start bit of a mask + unsigned mb = 0, me; + (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me); + return getI32Imm(mb, SDLoc(N)); +}]>; + +def ME : SDNodeXForm<imm, [{ + // Transformation function: get the end bit of a mask + unsigned mb, me = 0; + (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me); + return getI32Imm(me, SDLoc(N)); +}]>; +def maskimm32 : PatLeaf<(imm), [{ + // maskImm predicate - True if immediate is a run of ones. + unsigned mb, me; + if (N->getValueType(0) == MVT::i32) + return isRunOfOnes((unsigned)N->getZExtValue(), mb, me); + else + return false; +}]>; + +def imm32SExt16 : Operand<i32>, ImmLeaf<i32, [{ + // imm32SExt16 predicate - True if the i32 immediate fits in a 16-bit + // sign extended field. Used by instructions like 'addi'. + return (int32_t)Imm == (short)Imm; +}]>; +def imm64SExt16 : Operand<i64>, ImmLeaf<i64, [{ + // imm64SExt16 predicate - True if the i64 immediate fits in a 16-bit + // sign extended field. Used by instructions like 'addi'. + return (int64_t)Imm == (short)Imm; +}]>; +def immZExt16 : PatLeaf<(imm), [{ + // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended + // field. Used by instructions like 'ori'. + return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue(); +}], LO16>; +def immAnyExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm) || isUInt<8>(Imm); }]>; +def immSExt5NonZero : ImmLeaf<i32, [{ return Imm && isInt<5>(Imm); }]>; + +// imm16Shifted* - These match immediates where the low 16-bits are zero. There +// are two forms: imm16ShiftedSExt and imm16ShiftedZExt. These two forms are +// identical in 32-bit mode, but in 64-bit mode, they return true if the +// immediate fits into a sign/zero extended 32-bit immediate (with the low bits +// clear). +def imm16ShiftedZExt : PatLeaf<(imm), [{ + // imm16ShiftedZExt predicate - True if only bits in the top 16-bits of the + // immediate are set. Used by instructions like 'xoris'. + return (N->getZExtValue() & ~uint64_t(0xFFFF0000)) == 0; +}], HI16>; + +def imm16ShiftedSExt : PatLeaf<(imm), [{ + // imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the + // immediate are set. Used by instructions like 'addis'. Identical to + // imm16ShiftedZExt in 32-bit mode. + if (N->getZExtValue() & 0xFFFF) return false; + if (N->getValueType(0) == MVT::i32) + return true; + // For 64-bit, make sure it is sext right. + return N->getZExtValue() == (uint64_t)(int)N->getZExtValue(); +}], HI16>; + +def imm64ZExt32 : Operand<i64>, ImmLeaf<i64, [{ + // imm64ZExt32 predicate - True if the i64 immediate fits in a 32-bit + // zero extended field. + return isUInt<32>(Imm); +}]>; + +// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require +// restricted memrix (4-aligned) constants are alignment sensitive. If these +// offsets are hidden behind TOC entries than the values of the lower-order +// bits cannot be checked directly. As a result, we need to also incorporate +// an alignment check into the relevant patterns. + +def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4pre_store : PatFrag< + (ops node:$val, node:$base, node:$offset), + (pre_store node:$val, node:$base, node:$offset), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; + +def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; +def unaligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() < 4; +}]>; +def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; + +// This is a somewhat weaker condition than actually checking for 16-byte +// alignment. It is simply checking that the displacement can be represented +// as an immediate that is a multiple of 16 (i.e. the requirements for DQ-Form +// instructions). +def quadwOffsetLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isOffsetMultipleOf(N, 16); +}]>; +def quadwOffsetStore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return isOffsetMultipleOf(N, 16); +}]>; +def nonQuadwOffsetLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return !isOffsetMultipleOf(N, 16); +}]>; +def nonQuadwOffsetStore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return !isOffsetMultipleOf(N, 16); +}]>; + +//===----------------------------------------------------------------------===// +// PowerPC Flag Definitions. + +class isPPC64 { bit PPC64 = 1; } +class isDOT { bit RC = 1; } + +class RegConstraint<string C> { + string Constraints = C; +} +class NoEncode<string E> { + string DisableEncoding = E; +} + + +//===----------------------------------------------------------------------===// +// PowerPC Operand Definitions. + +// In the default PowerPC assembler syntax, registers are specified simply +// by number, so they cannot be distinguished from immediate values (without +// looking at the opcode). This means that the default operand matching logic +// for the asm parser does not work, and we need to specify custom matchers. +// Since those can only be specified with RegisterOperand classes and not +// directly on the RegisterClass, all instructions patterns used by the asm +// parser need to use a RegisterOperand (instead of a RegisterClass) for +// all their register operands. +// For this purpose, we define one RegisterOperand for each RegisterClass, +// using the same name as the class, just in lower case. + +def PPCRegGPRCAsmOperand : AsmOperandClass { + let Name = "RegGPRC"; let PredicateMethod = "isRegNumber"; +} +def gprc : RegisterOperand<GPRC> { + let ParserMatchClass = PPCRegGPRCAsmOperand; +} +def PPCRegG8RCAsmOperand : AsmOperandClass { + let Name = "RegG8RC"; let PredicateMethod = "isRegNumber"; +} +def g8rc : RegisterOperand<G8RC> { + let ParserMatchClass = PPCRegG8RCAsmOperand; +} +def PPCRegGPRCNoR0AsmOperand : AsmOperandClass { + let Name = "RegGPRCNoR0"; let PredicateMethod = "isRegNumber"; +} +def gprc_nor0 : RegisterOperand<GPRC_NOR0> { + let ParserMatchClass = PPCRegGPRCNoR0AsmOperand; +} +def PPCRegG8RCNoX0AsmOperand : AsmOperandClass { + let Name = "RegG8RCNoX0"; let PredicateMethod = "isRegNumber"; +} +def g8rc_nox0 : RegisterOperand<G8RC_NOX0> { + let ParserMatchClass = PPCRegG8RCNoX0AsmOperand; +} +def PPCRegF8RCAsmOperand : AsmOperandClass { + let Name = "RegF8RC"; let PredicateMethod = "isRegNumber"; +} +def f8rc : RegisterOperand<F8RC> { + let ParserMatchClass = PPCRegF8RCAsmOperand; +} +def PPCRegF4RCAsmOperand : AsmOperandClass { + let Name = "RegF4RC"; let PredicateMethod = "isRegNumber"; +} +def f4rc : RegisterOperand<F4RC> { + let ParserMatchClass = PPCRegF4RCAsmOperand; +} +def PPCRegVRRCAsmOperand : AsmOperandClass { + let Name = "RegVRRC"; let PredicateMethod = "isRegNumber"; +} +def vrrc : RegisterOperand<VRRC> { + let ParserMatchClass = PPCRegVRRCAsmOperand; +} +def PPCRegVFRCAsmOperand : AsmOperandClass { + let Name = "RegVFRC"; let PredicateMethod = "isRegNumber"; +} +def vfrc : RegisterOperand<VFRC> { + let ParserMatchClass = PPCRegVFRCAsmOperand; +} +def PPCRegCRBITRCAsmOperand : AsmOperandClass { + let Name = "RegCRBITRC"; let PredicateMethod = "isCRBitNumber"; +} +def crbitrc : RegisterOperand<CRBITRC> { + let ParserMatchClass = PPCRegCRBITRCAsmOperand; +} +def PPCRegCRRCAsmOperand : AsmOperandClass { + let Name = "RegCRRC"; let PredicateMethod = "isCCRegNumber"; +} +def crrc : RegisterOperand<CRRC> { + let ParserMatchClass = PPCRegCRRCAsmOperand; +} +def PPCRegSPERCAsmOperand : AsmOperandClass { + let Name = "RegSPERC"; let PredicateMethod = "isRegNumber"; +} +def sperc : RegisterOperand<SPERC> { + let ParserMatchClass = PPCRegSPERCAsmOperand; +} +def PPCRegSPE4RCAsmOperand : AsmOperandClass { + let Name = "RegSPE4RC"; let PredicateMethod = "isRegNumber"; +} +def spe4rc : RegisterOperand<SPE4RC> { + let ParserMatchClass = PPCRegSPE4RCAsmOperand; +} + +def PPCU1ImmAsmOperand : AsmOperandClass { + let Name = "U1Imm"; let PredicateMethod = "isU1Imm"; + let RenderMethod = "addImmOperands"; +} +def u1imm : Operand<i32> { + let PrintMethod = "printU1ImmOperand"; + let ParserMatchClass = PPCU1ImmAsmOperand; +} + +def PPCU2ImmAsmOperand : AsmOperandClass { + let Name = "U2Imm"; let PredicateMethod = "isU2Imm"; + let RenderMethod = "addImmOperands"; +} +def u2imm : Operand<i32> { + let PrintMethod = "printU2ImmOperand"; + let ParserMatchClass = PPCU2ImmAsmOperand; +} + +def PPCATBitsAsHintAsmOperand : AsmOperandClass { + let Name = "ATBitsAsHint"; let PredicateMethod = "isATBitsAsHint"; + let RenderMethod = "addImmOperands"; // Irrelevant, predicate always fails. +} +def atimm : Operand<i32> { + let PrintMethod = "printATBitsAsHint"; + let ParserMatchClass = PPCATBitsAsHintAsmOperand; +} + +def PPCU3ImmAsmOperand : AsmOperandClass { + let Name = "U3Imm"; let PredicateMethod = "isU3Imm"; + let RenderMethod = "addImmOperands"; +} +def u3imm : Operand<i32> { + let PrintMethod = "printU3ImmOperand"; + let ParserMatchClass = PPCU3ImmAsmOperand; +} + +def PPCU4ImmAsmOperand : AsmOperandClass { + let Name = "U4Imm"; let PredicateMethod = "isU4Imm"; + let RenderMethod = "addImmOperands"; +} +def u4imm : Operand<i32> { + let PrintMethod = "printU4ImmOperand"; + let ParserMatchClass = PPCU4ImmAsmOperand; +} +def PPCS5ImmAsmOperand : AsmOperandClass { + let Name = "S5Imm"; let PredicateMethod = "isS5Imm"; + let RenderMethod = "addImmOperands"; +} +def s5imm : Operand<i32> { + let PrintMethod = "printS5ImmOperand"; + let ParserMatchClass = PPCS5ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<5>"; +} +def PPCU5ImmAsmOperand : AsmOperandClass { + let Name = "U5Imm"; let PredicateMethod = "isU5Imm"; + let RenderMethod = "addImmOperands"; +} +def u5imm : Operand<i32> { + let PrintMethod = "printU5ImmOperand"; + let ParserMatchClass = PPCU5ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<5>"; +} +def PPCU6ImmAsmOperand : AsmOperandClass { + let Name = "U6Imm"; let PredicateMethod = "isU6Imm"; + let RenderMethod = "addImmOperands"; +} +def u6imm : Operand<i32> { + let PrintMethod = "printU6ImmOperand"; + let ParserMatchClass = PPCU6ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<6>"; +} +def PPCU7ImmAsmOperand : AsmOperandClass { + let Name = "U7Imm"; let PredicateMethod = "isU7Imm"; + let RenderMethod = "addImmOperands"; +} +def u7imm : Operand<i32> { + let PrintMethod = "printU7ImmOperand"; + let ParserMatchClass = PPCU7ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<7>"; +} +def PPCU8ImmAsmOperand : AsmOperandClass { + let Name = "U8Imm"; let PredicateMethod = "isU8Imm"; + let RenderMethod = "addImmOperands"; +} +def u8imm : Operand<i32> { + let PrintMethod = "printU8ImmOperand"; + let ParserMatchClass = PPCU8ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<8>"; +} +def PPCU10ImmAsmOperand : AsmOperandClass { + let Name = "U10Imm"; let PredicateMethod = "isU10Imm"; + let RenderMethod = "addImmOperands"; +} +def u10imm : Operand<i32> { + let PrintMethod = "printU10ImmOperand"; + let ParserMatchClass = PPCU10ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<10>"; +} +def PPCU12ImmAsmOperand : AsmOperandClass { + let Name = "U12Imm"; let PredicateMethod = "isU12Imm"; + let RenderMethod = "addImmOperands"; +} +def u12imm : Operand<i32> { + let PrintMethod = "printU12ImmOperand"; + let ParserMatchClass = PPCU12ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<12>"; +} +def PPCS16ImmAsmOperand : AsmOperandClass { + let Name = "S16Imm"; let PredicateMethod = "isS16Imm"; + let RenderMethod = "addS16ImmOperands"; +} +def s16imm : Operand<i32> { + let PrintMethod = "printS16ImmOperand"; + let EncoderMethod = "getImm16Encoding"; + let ParserMatchClass = PPCS16ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; +} +def PPCU16ImmAsmOperand : AsmOperandClass { + let Name = "U16Imm"; let PredicateMethod = "isU16Imm"; + let RenderMethod = "addU16ImmOperands"; +} +def u16imm : Operand<i32> { + let PrintMethod = "printU16ImmOperand"; + let EncoderMethod = "getImm16Encoding"; + let ParserMatchClass = PPCU16ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<16>"; +} +def PPCS17ImmAsmOperand : AsmOperandClass { + let Name = "S17Imm"; let PredicateMethod = "isS17Imm"; + let RenderMethod = "addS16ImmOperands"; +} +def s17imm : Operand<i32> { + // This operand type is used for addis/lis to allow the assembler parser + // to accept immediates in the range -65536..65535 for compatibility with + // the GNU assembler. The operand is treated as 16-bit otherwise. + let PrintMethod = "printS16ImmOperand"; + let EncoderMethod = "getImm16Encoding"; + let ParserMatchClass = PPCS17ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; +} + +def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>; + +def PPCDirectBrAsmOperand : AsmOperandClass { + let Name = "DirectBr"; let PredicateMethod = "isDirectBr"; + let RenderMethod = "addBranchTargetOperands"; +} +def directbrtarget : Operand<OtherVT> { + let PrintMethod = "printBranchOperand"; + let EncoderMethod = "getDirectBrEncoding"; + let ParserMatchClass = PPCDirectBrAsmOperand; +} +def absdirectbrtarget : Operand<OtherVT> { + let PrintMethod = "printAbsBranchOperand"; + let EncoderMethod = "getAbsDirectBrEncoding"; + let ParserMatchClass = PPCDirectBrAsmOperand; +} +def PPCCondBrAsmOperand : AsmOperandClass { + let Name = "CondBr"; let PredicateMethod = "isCondBr"; + let RenderMethod = "addBranchTargetOperands"; +} +def condbrtarget : Operand<OtherVT> { + let PrintMethod = "printBranchOperand"; + let EncoderMethod = "getCondBrEncoding"; + let ParserMatchClass = PPCCondBrAsmOperand; +} +def abscondbrtarget : Operand<OtherVT> { + let PrintMethod = "printAbsBranchOperand"; + let EncoderMethod = "getAbsCondBrEncoding"; + let ParserMatchClass = PPCCondBrAsmOperand; +} +def calltarget : Operand<iPTR> { + let PrintMethod = "printBranchOperand"; + let EncoderMethod = "getDirectBrEncoding"; + let ParserMatchClass = PPCDirectBrAsmOperand; +} +def abscalltarget : Operand<iPTR> { + let PrintMethod = "printAbsBranchOperand"; + let EncoderMethod = "getAbsDirectBrEncoding"; + let ParserMatchClass = PPCDirectBrAsmOperand; +} +def PPCCRBitMaskOperand : AsmOperandClass { + let Name = "CRBitMask"; let PredicateMethod = "isCRBitMask"; +} +def crbitm: Operand<i8> { + let PrintMethod = "printcrbitm"; + let EncoderMethod = "get_crbitm_encoding"; + let DecoderMethod = "decodeCRBitMOperand"; + let ParserMatchClass = PPCCRBitMaskOperand; +} +// Address operands +// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode). +def PPCRegGxRCNoR0Operand : AsmOperandClass { + let Name = "RegGxRCNoR0"; let PredicateMethod = "isRegNumber"; +} +def ptr_rc_nor0 : Operand<iPTR>, PointerLikeRegClass<1> { + let ParserMatchClass = PPCRegGxRCNoR0Operand; +} +// A version of ptr_rc usable with the asm parser. +def PPCRegGxRCOperand : AsmOperandClass { + let Name = "RegGxRC"; let PredicateMethod = "isRegNumber"; +} +def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> { + let ParserMatchClass = PPCRegGxRCOperand; +} + +def PPCDispRIOperand : AsmOperandClass { + let Name = "DispRI"; let PredicateMethod = "isS16Imm"; + let RenderMethod = "addS16ImmOperands"; +} +def dispRI : Operand<iPTR> { + let ParserMatchClass = PPCDispRIOperand; +} +def PPCDispRIXOperand : AsmOperandClass { + let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4"; + let RenderMethod = "addImmOperands"; +} +def dispRIX : Operand<iPTR> { + let ParserMatchClass = PPCDispRIXOperand; +} +def PPCDispRIX16Operand : AsmOperandClass { + let Name = "DispRIX16"; let PredicateMethod = "isS16ImmX16"; + let RenderMethod = "addImmOperands"; +} +def dispRIX16 : Operand<iPTR> { + let ParserMatchClass = PPCDispRIX16Operand; +} +def PPCDispSPE8Operand : AsmOperandClass { + let Name = "DispSPE8"; let PredicateMethod = "isU8ImmX8"; + let RenderMethod = "addImmOperands"; +} +def dispSPE8 : Operand<iPTR> { + let ParserMatchClass = PPCDispSPE8Operand; +} +def PPCDispSPE4Operand : AsmOperandClass { + let Name = "DispSPE4"; let PredicateMethod = "isU7ImmX4"; + let RenderMethod = "addImmOperands"; +} +def dispSPE4 : Operand<iPTR> { + let ParserMatchClass = PPCDispSPE4Operand; +} +def PPCDispSPE2Operand : AsmOperandClass { + let Name = "DispSPE2"; let PredicateMethod = "isU6ImmX2"; + let RenderMethod = "addImmOperands"; +} +def dispSPE2 : Operand<iPTR> { + let ParserMatchClass = PPCDispSPE2Operand; +} + +def memri : Operand<iPTR> { + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getMemRIEncoding"; + let DecoderMethod = "decodeMemRIOperands"; +} +def memrr : Operand<iPTR> { + let PrintMethod = "printMemRegReg"; + let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg); +} +def memrix : Operand<iPTR> { // memri where the imm is 4-aligned. + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getMemRIXEncoding"; + let DecoderMethod = "decodeMemRIXOperands"; +} +def memrix16 : Operand<iPTR> { // memri, imm is 16-aligned, 12-bit, Inst{16:27} + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops dispRIX16:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getMemRIX16Encoding"; + let DecoderMethod = "decodeMemRIX16Operands"; +} +def spe8dis : Operand<iPTR> { // SPE displacement where the imm is 8-aligned. + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops dispSPE8:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getSPE8DisEncoding"; + let DecoderMethod = "decodeSPE8Operands"; +} +def spe4dis : Operand<iPTR> { // SPE displacement where the imm is 4-aligned. + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops dispSPE4:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getSPE4DisEncoding"; + let DecoderMethod = "decodeSPE4Operands"; +} +def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned. + let PrintMethod = "printMemRegImm"; + let MIOperandInfo = (ops dispSPE2:$imm, ptr_rc_nor0:$reg); + let EncoderMethod = "getSPE2DisEncoding"; + let DecoderMethod = "decodeSPE2Operands"; +} + +// A single-register address. This is used with the SjLj +// pseudo-instructions which tranlates to LD/LWZ. These instructions requires +// G8RC_NOX0 registers. +def memr : Operand<iPTR> { + let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg); +} +def PPCTLSRegOperand : AsmOperandClass { + let Name = "TLSReg"; let PredicateMethod = "isTLSReg"; + let RenderMethod = "addTLSRegOperands"; +} +def tlsreg32 : Operand<i32> { + let EncoderMethod = "getTLSRegEncoding"; + let ParserMatchClass = PPCTLSRegOperand; +} +def tlsgd32 : Operand<i32> {} +def tlscall32 : Operand<i32> { + let PrintMethod = "printTLSCall"; + let MIOperandInfo = (ops calltarget:$func, tlsgd32:$sym); + let EncoderMethod = "getTLSCallEncoding"; +} + +// PowerPC Predicate operand. +def pred : Operand<OtherVT> { + let PrintMethod = "printPredicateOperand"; + let MIOperandInfo = (ops i32imm:$bibo, crrc:$reg); +} + +// Define PowerPC specific addressing mode. +def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>; +def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>; +def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>; +def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std" +def iqaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv" + +// The address in a single register. This is used with the SjLj +// pseudo-instructions. +def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>; + +/// This is just the offset part of iaddr, used for preinc. +def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>; + +//===----------------------------------------------------------------------===// +// PowerPC Instruction Predicate Definitions. +def In32BitMode : Predicate<"!PPCSubTarget->isPPC64()">; +def In64BitMode : Predicate<"PPCSubTarget->isPPC64()">; +def IsBookE : Predicate<"PPCSubTarget->isBookE()">; +def IsNotBookE : Predicate<"!PPCSubTarget->isBookE()">; +def HasOnlyMSYNC : Predicate<"PPCSubTarget->hasOnlyMSYNC()">; +def HasSYNC : Predicate<"!PPCSubTarget->hasOnlyMSYNC()">; +def IsPPC4xx : Predicate<"PPCSubTarget->isPPC4xx()">; +def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">; +def IsE500 : Predicate<"PPCSubTarget->isE500()">; +def HasSPE : Predicate<"PPCSubTarget->hasSPE()">; +def HasICBT : Predicate<"PPCSubTarget->hasICBT()">; +def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">; +def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; +def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">; +def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">; +def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">; +def IsISA3_0 : Predicate<"PPCSubTarget->isISA3_0()">; +def HasFPU : Predicate<"PPCSubTarget->hasFPU()">; + +//===----------------------------------------------------------------------===// +// PowerPC Multiclass Definitions. + +multiclass XForm_6r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XForm_6<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : XForm_6<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XForm_6rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + let Defs = [CARRY] in + def NAME : XForm_6<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CARRY, CR0] in + def o : XForm_6<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XForm_10rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + let Defs = [CARRY] in + def NAME : XForm_10<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CARRY, CR0] in + def o : XForm_10<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XForm_11r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XForm_11<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : XForm_11<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XOForm_1<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : XOForm_1<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +// Multiclass for instructions for which the non record form is not cracked +// and the record form is cracked (i.e. divw, mullw, etc.) +multiclass XOForm_1rcr<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XOForm_1<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : XOForm_1<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel, PPC970_DGroup_First, + PPC970_DGroup_Cracked; + } +} + +multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + let Defs = [CARRY] in + def NAME : XOForm_1<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CARRY, CR0] in + def o : XOForm_1<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XOForm_3r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XOForm_3<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : XOForm_3<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XOForm_3rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + let Defs = [CARRY] in + def NAME : XOForm_3<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CARRY, CR0] in + def o : XOForm_3<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass MForm_2r<bits<6> opcode, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : MForm_2<opcode, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : MForm_2<opcode, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass MDForm_1r<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : MDForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : MDForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass MDSForm_1r<bits<6> opcode, bits<4> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : MDSForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : MDSForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XSForm_1rc<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + let Defs = [CARRY] in + def NAME : XSForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CARRY, CR0] in + def o : XSForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XSForm_1r<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XSForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : XSForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XForm_26r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XForm_26<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR1] in + def o : XForm_26<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XForm_28r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XForm_28<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR1] in + def o : XForm_28<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass AForm_1r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : AForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR1] in + def o : AForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass AForm_2r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : AForm_2<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR1] in + def o : AForm_2<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : AForm_3<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR1] in + def o : AForm_3<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +//===----------------------------------------------------------------------===// +// PowerPC Instruction Definitions. + +// Pseudo-instructions: + +let hasCtrlDep = 1 in { +let Defs = [R1], Uses = [R1] in { +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), + "#ADJCALLSTACKDOWN $amt1 $amt2", + [(callseq_start timm:$amt1, timm:$amt2)]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), + "#ADJCALLSTACKUP $amt1 $amt2", + [(callseq_end timm:$amt1, timm:$amt2)]>; +} + +def UPDATE_VRSAVE : Pseudo<(outs gprc:$rD), (ins gprc:$rS), + "UPDATE_VRSAVE $rD, $rS", []>; +} + +let Defs = [R1], Uses = [R1] in +def DYNALLOC : Pseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC", + [(set i32:$result, + (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>; +def DYNAREAOFFSET : Pseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET", + [(set i32:$result, (PPCdynareaoffset iaddr:$fpsi))]>; + +// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after +// instruction selection into a branch sequence. +let usesCustomInserter = 1, // Expanded after instruction selection. + PPC970_Single = 1 in { + // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes + // because either operand might become the first operand in an isel, and + // that operand cannot be r0. + def SELECT_CC_I4 : Pseudo<(outs gprc:$dst), (ins crrc:$cond, + gprc_nor0:$T, gprc_nor0:$F, + i32imm:$BROPC), "#SELECT_CC_I4", + []>; + def SELECT_CC_I8 : Pseudo<(outs g8rc:$dst), (ins crrc:$cond, + g8rc_nox0:$T, g8rc_nox0:$F, + i32imm:$BROPC), "#SELECT_CC_I8", + []>; + def SELECT_CC_F4 : Pseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F, + i32imm:$BROPC), "#SELECT_CC_F4", + []>; + def SELECT_CC_F8 : Pseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F, + i32imm:$BROPC), "#SELECT_CC_F8", + []>; + def SELECT_CC_F16 : Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F, + i32imm:$BROPC), "#SELECT_CC_F16", + []>; + def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F, + i32imm:$BROPC), "#SELECT_CC_VRRC", + []>; + + // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition + // register bit directly. + def SELECT_I4 : Pseudo<(outs gprc:$dst), (ins crbitrc:$cond, + gprc_nor0:$T, gprc_nor0:$F), "#SELECT_I4", + [(set i32:$dst, (select i1:$cond, i32:$T, i32:$F))]>; + def SELECT_I8 : Pseudo<(outs g8rc:$dst), (ins crbitrc:$cond, + g8rc_nox0:$T, g8rc_nox0:$F), "#SELECT_I8", + [(set i64:$dst, (select i1:$cond, i64:$T, i64:$F))]>; +let Predicates = [HasFPU] in { + def SELECT_F4 : Pseudo<(outs f4rc:$dst), (ins crbitrc:$cond, + f4rc:$T, f4rc:$F), "#SELECT_F4", + [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>; + def SELECT_F8 : Pseudo<(outs f8rc:$dst), (ins crbitrc:$cond, + f8rc:$T, f8rc:$F), "#SELECT_F8", + [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>; + def SELECT_F16 : Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond, + vrrc:$T, vrrc:$F), "#SELECT_F16", + [(set f128:$dst, (select i1:$cond, f128:$T, f128:$F))]>; +} + def SELECT_VRRC: Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond, + vrrc:$T, vrrc:$F), "#SELECT_VRRC", + [(set v4i32:$dst, + (select i1:$cond, v4i32:$T, v4i32:$F))]>; +} + +// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to +// scavenge a register for it. +let mayStore = 1 in { +def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F), + "#SPILL_CR", []>; +def SPILL_CRBIT : Pseudo<(outs), (ins crbitrc:$cond, memri:$F), + "#SPILL_CRBIT", []>; +} + +// RESTORE_CR - Indicate that we're restoring the CR register (previously +// spilled), so we'll need to scavenge a register for it. +let mayLoad = 1 in { +def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F), + "#RESTORE_CR", []>; +def RESTORE_CRBIT : Pseudo<(outs crbitrc:$cond), (ins memri:$F), + "#RESTORE_CRBIT", []>; +} + +let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { + let isReturn = 1, Uses = [LR, RM] in + def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB, + [(retflag)]>, Requires<[In32BitMode]>; + let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in { + def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, + []>; + + let isCodeGenOnly = 1 in { + def BCCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), + "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB, + []>; + + def BCCTR : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi), + "bcctr 12, $bi, 0", IIC_BrB, []>; + def BCCTRn : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi), + "bcctr 4, $bi, 0", IIC_BrB, []>; + } + } +} + +let Defs = [LR] in + def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>, + PPC970_Unit_BRU; +let Defs = [LR] in + def MoveGOTtoLR : Pseudo<(outs), (ins), "#MoveGOTtoLR", []>, + PPC970_Unit_BRU; + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { + let isBarrier = 1 in { + def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst), + "b $dst", IIC_BrB, + [(br bb:$dst)]>; + def BA : IForm<18, 1, 0, (outs), (ins absdirectbrtarget:$dst), + "ba $dst", IIC_BrB, []>; + } + + // BCC represents an arbitrary conditional branch on a predicate. + // FIXME: should be able to write a pattern for PPCcondbranch, but can't use + // a two-value operand where a dag node expects two operands. :( + let isCodeGenOnly = 1 in { + class BCC_class : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst), + "b${cond:cc}${cond:pm} ${cond:reg}, $dst" + /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>; + def BCC : BCC_class; + + // The same as BCC, except that it's not a terminator. Used for introducing + // control flow dependency without creating new blocks. + let isTerminator = 0 in def CTRL_DEP : BCC_class; + + def BCCA : BForm<16, 1, 0, (outs), (ins pred:$cond, abscondbrtarget:$dst), + "b${cond:cc}a${cond:pm} ${cond:reg}, $dst">; + + let isReturn = 1, Uses = [LR, RM] in + def BCCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond), + "b${cond:cc}lr${cond:pm} ${cond:reg}", IIC_BrB, []>; + } + + let isCodeGenOnly = 1 in { + let Pattern = [(brcond i1:$bi, bb:$dst)] in + def BC : BForm_4<16, 12, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst), + "bc 12, $bi, $dst">; + + let Pattern = [(brcond (not i1:$bi), bb:$dst)] in + def BCn : BForm_4<16, 4, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst), + "bc 4, $bi, $dst">; + + let isReturn = 1, Uses = [LR, RM] in + def BCLR : XLForm_2_br2<19, 16, 12, 0, (outs), (ins crbitrc:$bi), + "bclr 12, $bi, 0", IIC_BrB, []>; + def BCLRn : XLForm_2_br2<19, 16, 4, 0, (outs), (ins crbitrc:$bi), + "bclr 4, $bi, 0", IIC_BrB, []>; + } + + let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in { + def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins), + "bdzlr", IIC_BrB, []>; + def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins), + "bdnzlr", IIC_BrB, []>; + def BDZLRp : XLForm_2_ext<19, 16, 27, 0, 0, (outs), (ins), + "bdzlr+", IIC_BrB, []>; + def BDNZLRp: XLForm_2_ext<19, 16, 25, 0, 0, (outs), (ins), + "bdnzlr+", IIC_BrB, []>; + def BDZLRm : XLForm_2_ext<19, 16, 26, 0, 0, (outs), (ins), + "bdzlr-", IIC_BrB, []>; + def BDNZLRm: XLForm_2_ext<19, 16, 24, 0, 0, (outs), (ins), + "bdnzlr-", IIC_BrB, []>; + } + + let Defs = [CTR], Uses = [CTR] in { + def BDZ : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), + "bdz $dst">; + def BDNZ : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), + "bdnz $dst">; + def BDZA : BForm_1<16, 18, 1, 0, (outs), (ins abscondbrtarget:$dst), + "bdza $dst">; + def BDNZA : BForm_1<16, 16, 1, 0, (outs), (ins abscondbrtarget:$dst), + "bdnza $dst">; + def BDZp : BForm_1<16, 27, 0, 0, (outs), (ins condbrtarget:$dst), + "bdz+ $dst">; + def BDNZp: BForm_1<16, 25, 0, 0, (outs), (ins condbrtarget:$dst), + "bdnz+ $dst">; + def BDZAp : BForm_1<16, 27, 1, 0, (outs), (ins abscondbrtarget:$dst), + "bdza+ $dst">; + def BDNZAp: BForm_1<16, 25, 1, 0, (outs), (ins abscondbrtarget:$dst), + "bdnza+ $dst">; + def BDZm : BForm_1<16, 26, 0, 0, (outs), (ins condbrtarget:$dst), + "bdz- $dst">; + def BDNZm: BForm_1<16, 24, 0, 0, (outs), (ins condbrtarget:$dst), + "bdnz- $dst">; + def BDZAm : BForm_1<16, 26, 1, 0, (outs), (ins abscondbrtarget:$dst), + "bdza- $dst">; + def BDNZAm: BForm_1<16, 24, 1, 0, (outs), (ins abscondbrtarget:$dst), + "bdnza- $dst">; + } +} + +// The unconditional BCL used by the SjLj setjmp code. +let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7 in { + let Defs = [LR], Uses = [RM] in { + def BCLalways : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst), + "bcl 20, 31, $dst">; + } +} + +let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { + // Convenient aliases for call instructions + let Uses = [RM] in { + def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func), + "bl $func", IIC_BrB, []>; // See Pat patterns below. + def BLA : IForm<18, 1, 1, (outs), (ins abscalltarget:$func), + "bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>; + + let isCodeGenOnly = 1 in { + def BL_TLS : IForm<18, 0, 1, (outs), (ins tlscall32:$func), + "bl $func", IIC_BrB, []>; + def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst), + "b${cond:cc}l${cond:pm} ${cond:reg}, $dst">; + def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst), + "b${cond:cc}la${cond:pm} ${cond:reg}, $dst">; + + def BCL : BForm_4<16, 12, 0, 1, (outs), + (ins crbitrc:$bi, condbrtarget:$dst), + "bcl 12, $bi, $dst">; + def BCLn : BForm_4<16, 4, 0, 1, (outs), + (ins crbitrc:$bi, condbrtarget:$dst), + "bcl 4, $bi, $dst">; + } + } + let Uses = [CTR, RM] in { + def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), + "bctrl", IIC_BrB, [(PPCbctrl)]>, + Requires<[In32BitMode]>; + + let isCodeGenOnly = 1 in { + def BCCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), + "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB, + []>; + + def BCCTRL : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi), + "bcctrl 12, $bi, 0", IIC_BrB, []>; + def BCCTRLn : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi), + "bcctrl 4, $bi, 0", IIC_BrB, []>; + } + } + let Uses = [LR, RM] in { + def BLRL : XLForm_2_ext<19, 16, 20, 0, 1, (outs), (ins), + "blrl", IIC_BrB, []>; + + let isCodeGenOnly = 1 in { + def BCCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond), + "b${cond:cc}lrl${cond:pm} ${cond:reg}", IIC_BrB, + []>; + + def BCLRL : XLForm_2_br2<19, 16, 12, 1, (outs), (ins crbitrc:$bi), + "bclrl 12, $bi, 0", IIC_BrB, []>; + def BCLRLn : XLForm_2_br2<19, 16, 4, 1, (outs), (ins crbitrc:$bi), + "bclrl 4, $bi, 0", IIC_BrB, []>; + } + } + let Defs = [CTR], Uses = [CTR, RM] in { + def BDZL : BForm_1<16, 18, 0, 1, (outs), (ins condbrtarget:$dst), + "bdzl $dst">; + def BDNZL : BForm_1<16, 16, 0, 1, (outs), (ins condbrtarget:$dst), + "bdnzl $dst">; + def BDZLA : BForm_1<16, 18, 1, 1, (outs), (ins abscondbrtarget:$dst), + "bdzla $dst">; + def BDNZLA : BForm_1<16, 16, 1, 1, (outs), (ins abscondbrtarget:$dst), + "bdnzla $dst">; + def BDZLp : BForm_1<16, 27, 0, 1, (outs), (ins condbrtarget:$dst), + "bdzl+ $dst">; + def BDNZLp: BForm_1<16, 25, 0, 1, (outs), (ins condbrtarget:$dst), + "bdnzl+ $dst">; + def BDZLAp : BForm_1<16, 27, 1, 1, (outs), (ins abscondbrtarget:$dst), + "bdzla+ $dst">; + def BDNZLAp: BForm_1<16, 25, 1, 1, (outs), (ins abscondbrtarget:$dst), + "bdnzla+ $dst">; + def BDZLm : BForm_1<16, 26, 0, 1, (outs), (ins condbrtarget:$dst), + "bdzl- $dst">; + def BDNZLm: BForm_1<16, 24, 0, 1, (outs), (ins condbrtarget:$dst), + "bdnzl- $dst">; + def BDZLAm : BForm_1<16, 26, 1, 1, (outs), (ins abscondbrtarget:$dst), + "bdzla- $dst">; + def BDNZLAm: BForm_1<16, 24, 1, 1, (outs), (ins abscondbrtarget:$dst), + "bdnzla- $dst">; + } + let Defs = [CTR], Uses = [CTR, LR, RM] in { + def BDZLRL : XLForm_2_ext<19, 16, 18, 0, 1, (outs), (ins), + "bdzlrl", IIC_BrB, []>; + def BDNZLRL : XLForm_2_ext<19, 16, 16, 0, 1, (outs), (ins), + "bdnzlrl", IIC_BrB, []>; + def BDZLRLp : XLForm_2_ext<19, 16, 27, 0, 1, (outs), (ins), + "bdzlrl+", IIC_BrB, []>; + def BDNZLRLp: XLForm_2_ext<19, 16, 25, 0, 1, (outs), (ins), + "bdnzlrl+", IIC_BrB, []>; + def BDZLRLm : XLForm_2_ext<19, 16, 26, 0, 1, (outs), (ins), + "bdzlrl-", IIC_BrB, []>; + def BDNZLRLm: XLForm_2_ext<19, 16, 24, 0, 1, (outs), (ins), + "bdnzlrl-", IIC_BrB, []>; + } +} + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNdi :Pseudo< (outs), + (ins calltarget:$dst, i32imm:$offset), + "#TC_RETURNd $dst $offset", + []>; + + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNai :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset), + "#TC_RETURNa $func $offset", + [(PPCtc_return (i32 imm:$func), imm:$offset)]>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in +def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset), + "#TC_RETURNr $dst $offset", + []>; + + +let isCodeGenOnly = 1 in { + +let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, + isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in +def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, + []>, Requires<[In32BitMode]>; + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, + isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in +def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst), + "b $dst", IIC_BrB, + []>; + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, + isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in +def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst), + "ba $dst", IIC_BrB, + []>; + +} + +let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { + let Defs = [CTR] in + def EH_SjLj_SetJmp32 : Pseudo<(outs gprc:$dst), (ins memr:$buf), + "#EH_SJLJ_SETJMP32", + [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, + Requires<[In32BitMode]>; + let isTerminator = 1 in + def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf), + "#EH_SJLJ_LONGJMP32", + [(PPCeh_sjlj_longjmp addr:$buf)]>, + Requires<[In32BitMode]>; +} + +// This pseudo is never removed from the function, as it serves as +// a terminator. Size is set to 0 to prevent the builtin assembler +// from emitting it. +let isBranch = 1, isTerminator = 1, Size = 0 in { + def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst), + "#EH_SjLj_Setup\t$dst", []>; +} + +// System call. +let PPC970_Unit = 7 in { + def SC : SCForm<17, 1, (outs), (ins i32imm:$lev), + "sc $lev", IIC_BrB, [(PPCsc (i32 imm:$lev))]>; +} + +// Branch history rolling buffer. +def CLRBHRB : XForm_0<31, 430, (outs), (ins), "clrbhrb", IIC_BrB, + [(PPCclrbhrb)]>, + PPC970_DGroup_Single; +// The $dmy argument used for MFBHRBE is not needed; however, including +// it avoids automatic generation of PPCFastISel::fastEmit_i(), which +// interferes with necessary special handling (see PPCFastISel.cpp). +def MFBHRBE : XFXForm_3p<31, 302, (outs gprc:$rD), + (ins u10imm:$imm, u10imm:$dmy), + "mfbhrbe $rD, $imm", IIC_BrB, + [(set i32:$rD, + (PPCmfbhrbe imm:$imm, imm:$dmy))]>, + PPC970_DGroup_First; + +def RFEBB : XLForm_S<19, 146, (outs), (ins u1imm:$imm), "rfebb $imm", + IIC_BrB, [(PPCrfebb (i32 imm:$imm))]>, + PPC970_DGroup_Single; + +// DCB* instructions. +def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst", + IIC_LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst), "dcbi $dst", + IIC_LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst), "dcbst $dst", + IIC_LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst), "dcbz $dst", + IIC_LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>, + PPC970_DGroup_Single; +def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst", + IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>, + PPC970_DGroup_Single; + +def DCBF : DCB_Form_hint<86, (outs), (ins u5imm:$TH, memrr:$dst), + "dcbf $dst, $TH", IIC_LdStDCBF, []>, + PPC970_DGroup_Single; + +let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in { +def DCBT : DCB_Form_hint<278, (outs), (ins u5imm:$TH, memrr:$dst), + "dcbt $dst, $TH", IIC_LdStDCBF, []>, + PPC970_DGroup_Single; +def DCBTST : DCB_Form_hint<246, (outs), (ins u5imm:$TH, memrr:$dst), + "dcbtst $dst, $TH", IIC_LdStDCBF, []>, + PPC970_DGroup_Single; +} // hasSideEffects = 0 + +def ICBLC : XForm_icbt<31, 230, (outs), (ins u4imm:$CT, memrr:$src), + "icblc $CT, $src", IIC_LdStStore>, Requires<[HasICBT]>; +def ICBLQ : XForm_icbt<31, 198, (outs), (ins u4imm:$CT, memrr:$src), + "icblq. $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>; +def ICBT : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src), + "icbt $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>; +def ICBTLS : XForm_icbt<31, 486, (outs), (ins u4imm:$CT, memrr:$src), + "icbtls $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>; + +def : Pat<(int_ppc_dcbt xoaddr:$dst), + (DCBT 0, xoaddr:$dst)>; +def : Pat<(int_ppc_dcbtst xoaddr:$dst), + (DCBTST 0, xoaddr:$dst)>; +def : Pat<(int_ppc_dcbf xoaddr:$dst), + (DCBF 0, xoaddr:$dst)>; + +def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)), + (DCBT 0, xoaddr:$dst)>; // data prefetch for loads +def : Pat<(prefetch xoaddr:$dst, (i32 1), imm, (i32 1)), + (DCBTST 0, xoaddr:$dst)>; // data prefetch for stores +def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)), + (ICBT 0, xoaddr:$dst)>, Requires<[HasICBT]>; // inst prefetch (for read) + +// Atomic operations +// FIXME: some of these might be used with constant operands. This will result +// in constant materialization instructions that may be redundant. We currently +// clean this up in PPCMIPeephole with calls to +// PPCInstrInfo::convertToImmediateForm() but we should probably not emit them +// in the first place. +let usesCustomInserter = 1 in { + let Defs = [CR0] in { + def ATOMIC_LOAD_ADD_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8", + [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_SUB_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8", + [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_AND_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8", + [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_OR_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8", + [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_XOR_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8", + [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_NAND_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8", + [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MIN_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I8", + [(set i32:$dst, (atomic_load_min_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MAX_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I8", + [(set i32:$dst, (atomic_load_max_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMIN_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I8", + [(set i32:$dst, (atomic_load_umin_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMAX_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I8", + [(set i32:$dst, (atomic_load_umax_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_ADD_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16", + [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_SUB_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16", + [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_AND_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16", + [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_OR_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16", + [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_XOR_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16", + [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_NAND_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16", + [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MIN_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I16", + [(set i32:$dst, (atomic_load_min_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MAX_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I16", + [(set i32:$dst, (atomic_load_max_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMIN_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I16", + [(set i32:$dst, (atomic_load_umin_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMAX_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I16", + [(set i32:$dst, (atomic_load_umax_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_ADD_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32", + [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_SUB_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32", + [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_AND_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32", + [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_OR_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32", + [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_XOR_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32", + [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_NAND_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32", + [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MIN_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I32", + [(set i32:$dst, (atomic_load_min_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MAX_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I32", + [(set i32:$dst, (atomic_load_max_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMIN_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I32", + [(set i32:$dst, (atomic_load_umin_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMAX_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I32", + [(set i32:$dst, (atomic_load_umax_32 xoaddr:$ptr, i32:$incr))]>; + + def ATOMIC_CMP_SWAP_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8", + [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>; + def ATOMIC_CMP_SWAP_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new", + [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>; + def ATOMIC_CMP_SWAP_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new", + [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>; + + def ATOMIC_SWAP_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8", + [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>; + def ATOMIC_SWAP_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16", + [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>; + def ATOMIC_SWAP_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32", + [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>; + } +} + +def : Pat<(PPCatomicCmpSwap_8 xoaddr:$ptr, i32:$old, i32:$new), + (ATOMIC_CMP_SWAP_I8 xoaddr:$ptr, i32:$old, i32:$new)>; +def : Pat<(PPCatomicCmpSwap_16 xoaddr:$ptr, i32:$old, i32:$new), + (ATOMIC_CMP_SWAP_I16 xoaddr:$ptr, i32:$old, i32:$new)>; + +// Instructions to support atomic operations +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in { +def LBARX : XForm_1_memOp<31, 52, (outs gprc:$rD), (ins memrr:$src), + "lbarx $rD, $src", IIC_LdStLWARX, []>, + Requires<[HasPartwordAtomics]>; + +def LHARX : XForm_1_memOp<31, 116, (outs gprc:$rD), (ins memrr:$src), + "lharx $rD, $src", IIC_LdStLWARX, []>, + Requires<[HasPartwordAtomics]>; + +def LWARX : XForm_1_memOp<31, 20, (outs gprc:$rD), (ins memrr:$src), + "lwarx $rD, $src", IIC_LdStLWARX, []>; + +// Instructions to support lock versions of atomics +// (EH=1 - see Power ISA 2.07 Book II 4.4.2) +def LBARXL : XForm_1_memOp<31, 52, (outs gprc:$rD), (ins memrr:$src), + "lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT, + Requires<[HasPartwordAtomics]>; + +def LHARXL : XForm_1_memOp<31, 116, (outs gprc:$rD), (ins memrr:$src), + "lharx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT, + Requires<[HasPartwordAtomics]>; + +def LWARXL : XForm_1_memOp<31, 20, (outs gprc:$rD), (ins memrr:$src), + "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT; + +// The atomic instructions use the destination register as well as the next one +// or two registers in order (modulo 31). +let hasExtraSrcRegAllocReq = 1 in +def LWAT : X_RD5_RS5_IM5<31, 582, (outs gprc:$rD), (ins gprc:$rA, u5imm:$FC), + "lwat $rD, $rA, $FC", IIC_LdStLoad>, + Requires<[IsISA3_0]>; +} + +let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in { +def STBCX : XForm_1_memOp<31, 694, (outs), (ins gprc:$rS, memrr:$dst), + "stbcx. $rS, $dst", IIC_LdStSTWCX, []>, + isDOT, Requires<[HasPartwordAtomics]>; + +def STHCX : XForm_1_memOp<31, 726, (outs), (ins gprc:$rS, memrr:$dst), + "sthcx. $rS, $dst", IIC_LdStSTWCX, []>, + isDOT, Requires<[HasPartwordAtomics]>; + +def STWCX : XForm_1_memOp<31, 150, (outs), (ins gprc:$rS, memrr:$dst), + "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT; +} + +let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in +def STWAT : X_RD5_RS5_IM5<31, 710, (outs), (ins gprc:$rS, gprc:$rA, u5imm:$FC), + "stwat $rS, $rA, $FC", IIC_LdStStore>, + Requires<[IsISA3_0]>; + +let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in +def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>; + +def TWI : DForm_base<3, (outs), (ins u5imm:$to, gprc:$rA, s16imm:$imm), + "twi $to, $rA, $imm", IIC_IntTrapW, []>; +def TW : XForm_1<31, 4, (outs), (ins u5imm:$to, gprc:$rA, gprc:$rB), + "tw $to, $rA, $rB", IIC_IntTrapW, []>; +def TDI : DForm_base<2, (outs), (ins u5imm:$to, g8rc:$rA, s16imm:$imm), + "tdi $to, $rA, $imm", IIC_IntTrapD, []>; +def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB), + "td $to, $rA, $rB", IIC_IntTrapD, []>; + +//===----------------------------------------------------------------------===// +// PPC32 Load Instructions. +// + +// Unindexed (r+i) Loads. +let PPC970_Unit = 2 in { +def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src), + "lbz $rD, $src", IIC_LdStLoad, + [(set i32:$rD, (zextloadi8 iaddr:$src))]>; +def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src), + "lha $rD, $src", IIC_LdStLHA, + [(set i32:$rD, (sextloadi16 iaddr:$src))]>, + PPC970_DGroup_Cracked; +def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src), + "lhz $rD, $src", IIC_LdStLoad, + [(set i32:$rD, (zextloadi16 iaddr:$src))]>; +def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src), + "lwz $rD, $src", IIC_LdStLoad, + [(set i32:$rD, (load iaddr:$src))]>; + +let Predicates = [HasFPU] in { +def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src), + "lfs $rD, $src", IIC_LdStLFD, + [(set f32:$rD, (load iaddr:$src))]>; +def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src), + "lfd $rD, $src", IIC_LdStLFD, + [(set f64:$rD, (load iaddr:$src))]>; +} + + +// Unindexed (r+i) Loads with Update (preinc). +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in { +def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), + "lbzu $rD, $addr", IIC_LdStLoadUpd, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; + +def LHAU : DForm_1<43, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), + "lhau $rD, $addr", IIC_LdStLHAU, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; + +def LHZU : DForm_1<41, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), + "lhzu $rD, $addr", IIC_LdStLoadUpd, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; + +def LWZU : DForm_1<33, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), + "lwzu $rD, $addr", IIC_LdStLoadUpd, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; + +let Predicates = [HasFPU] in { +def LFSU : DForm_1<49, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), + "lfsu $rD, $addr", IIC_LdStLFDU, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; + +def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), + "lfdu $rD, $addr", IIC_LdStLFDU, + []>, RegConstraint<"$addr.reg = $ea_result">, + NoEncode<"$ea_result">; +} + + +// Indexed (r+r) Loads with Update (preinc). +def LBZUX : XForm_1_memOp<31, 119, (outs gprc:$rD, ptr_rc_nor0:$ea_result), + (ins memrr:$addr), + "lbzux $rD, $addr", IIC_LdStLoadUpdX, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, + NoEncode<"$ea_result">; + +def LHAUX : XForm_1_memOp<31, 375, (outs gprc:$rD, ptr_rc_nor0:$ea_result), + (ins memrr:$addr), + "lhaux $rD, $addr", IIC_LdStLHAUX, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, + NoEncode<"$ea_result">; + +def LHZUX : XForm_1_memOp<31, 311, (outs gprc:$rD, ptr_rc_nor0:$ea_result), + (ins memrr:$addr), + "lhzux $rD, $addr", IIC_LdStLoadUpdX, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, + NoEncode<"$ea_result">; + +def LWZUX : XForm_1_memOp<31, 55, (outs gprc:$rD, ptr_rc_nor0:$ea_result), + (ins memrr:$addr), + "lwzux $rD, $addr", IIC_LdStLoadUpdX, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, + NoEncode<"$ea_result">; + +let Predicates = [HasFPU] in { +def LFSUX : XForm_1_memOp<31, 567, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), + (ins memrr:$addr), + "lfsux $rD, $addr", IIC_LdStLFDUX, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, + NoEncode<"$ea_result">; + +def LFDUX : XForm_1_memOp<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), + (ins memrr:$addr), + "lfdux $rD, $addr", IIC_LdStLFDUX, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, + NoEncode<"$ea_result">; +} +} +} + +// Indexed (r+r) Loads. +// +let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { +def LBZX : XForm_1_memOp<31, 87, (outs gprc:$rD), (ins memrr:$src), + "lbzx $rD, $src", IIC_LdStLoad, + [(set i32:$rD, (zextloadi8 xaddr:$src))]>; +def LHAX : XForm_1_memOp<31, 343, (outs gprc:$rD), (ins memrr:$src), + "lhax $rD, $src", IIC_LdStLHA, + [(set i32:$rD, (sextloadi16 xaddr:$src))]>, + PPC970_DGroup_Cracked; +def LHZX : XForm_1_memOp<31, 279, (outs gprc:$rD), (ins memrr:$src), + "lhzx $rD, $src", IIC_LdStLoad, + [(set i32:$rD, (zextloadi16 xaddr:$src))]>; +def LWZX : XForm_1_memOp<31, 23, (outs gprc:$rD), (ins memrr:$src), + "lwzx $rD, $src", IIC_LdStLoad, + [(set i32:$rD, (load xaddr:$src))]>; +def LHBRX : XForm_1_memOp<31, 790, (outs gprc:$rD), (ins memrr:$src), + "lhbrx $rD, $src", IIC_LdStLoad, + [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>; +def LWBRX : XForm_1_memOp<31, 534, (outs gprc:$rD), (ins memrr:$src), + "lwbrx $rD, $src", IIC_LdStLoad, + [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>; + +let Predicates = [HasFPU] in { +def LFSX : XForm_25_memOp<31, 535, (outs f4rc:$frD), (ins memrr:$src), + "lfsx $frD, $src", IIC_LdStLFD, + [(set f32:$frD, (load xaddr:$src))]>; +def LFDX : XForm_25_memOp<31, 599, (outs f8rc:$frD), (ins memrr:$src), + "lfdx $frD, $src", IIC_LdStLFD, + [(set f64:$frD, (load xaddr:$src))]>; + +def LFIWAX : XForm_25_memOp<31, 855, (outs f8rc:$frD), (ins memrr:$src), + "lfiwax $frD, $src", IIC_LdStLFD, + [(set f64:$frD, (PPClfiwax xoaddr:$src))]>; +def LFIWZX : XForm_25_memOp<31, 887, (outs f8rc:$frD), (ins memrr:$src), + "lfiwzx $frD, $src", IIC_LdStLFD, + [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>; +} +} + +// Load Multiple +def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src), + "lmw $rD, $src", IIC_LdStLMW, []>; + +//===----------------------------------------------------------------------===// +// PPC32 Store Instructions. +// + +// Unindexed (r+i) Stores. +let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { +def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src), + "stb $rS, $src", IIC_LdStStore, + [(truncstorei8 i32:$rS, iaddr:$src)]>; +def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$src), + "sth $rS, $src", IIC_LdStStore, + [(truncstorei16 i32:$rS, iaddr:$src)]>; +def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$src), + "stw $rS, $src", IIC_LdStStore, + [(store i32:$rS, iaddr:$src)]>; +let Predicates = [HasFPU] in { +def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst), + "stfs $rS, $dst", IIC_LdStSTFD, + [(store f32:$rS, iaddr:$dst)]>; +def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst), + "stfd $rS, $dst", IIC_LdStSTFD, + [(store f64:$rS, iaddr:$dst)]>; +} +} + +// Unindexed (r+i) Stores with Update (preinc). +let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { +def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), + "stbu $rS, $dst", IIC_LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), + "sthu $rS, $dst", IIC_LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), + "stwu $rS, $dst", IIC_LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +let Predicates = [HasFPU] in { +def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst), + "stfsu $rS, $dst", IIC_LdStSTFDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STFDU : DForm_1<55, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memri:$dst), + "stfdu $rS, $dst", IIC_LdStSTFDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +} +} + +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STBU $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STHU $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STWU $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STFSU $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STFDU $rS, iaddroff:$ptroff, $ptrreg)>; + +// Indexed (r+r) Stores. +let PPC970_Unit = 2 in { +def STBX : XForm_8_memOp<31, 215, (outs), (ins gprc:$rS, memrr:$dst), + "stbx $rS, $dst", IIC_LdStStore, + [(truncstorei8 i32:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; +def STHX : XForm_8_memOp<31, 407, (outs), (ins gprc:$rS, memrr:$dst), + "sthx $rS, $dst", IIC_LdStStore, + [(truncstorei16 i32:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; +def STWX : XForm_8_memOp<31, 151, (outs), (ins gprc:$rS, memrr:$dst), + "stwx $rS, $dst", IIC_LdStStore, + [(store i32:$rS, xaddr:$dst)]>, + PPC970_DGroup_Cracked; + +def STHBRX: XForm_8_memOp<31, 918, (outs), (ins gprc:$rS, memrr:$dst), + "sthbrx $rS, $dst", IIC_LdStStore, + [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>, + PPC970_DGroup_Cracked; +def STWBRX: XForm_8_memOp<31, 662, (outs), (ins gprc:$rS, memrr:$dst), + "stwbrx $rS, $dst", IIC_LdStStore, + [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>, + PPC970_DGroup_Cracked; + +let Predicates = [HasFPU] in { +def STFIWX: XForm_28_memOp<31, 983, (outs), (ins f8rc:$frS, memrr:$dst), + "stfiwx $frS, $dst", IIC_LdStSTFD, + [(PPCstfiwx f64:$frS, xoaddr:$dst)]>; + +def STFSX : XForm_28_memOp<31, 663, (outs), (ins f4rc:$frS, memrr:$dst), + "stfsx $frS, $dst", IIC_LdStSTFD, + [(store f32:$frS, xaddr:$dst)]>; +def STFDX : XForm_28_memOp<31, 727, (outs), (ins f8rc:$frS, memrr:$dst), + "stfdx $frS, $dst", IIC_LdStSTFD, + [(store f64:$frS, xaddr:$dst)]>; +} +} + +// Indexed (r+r) Stores with Update (preinc). +let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { +def STBUX : XForm_8_memOp<31, 247, (outs ptr_rc_nor0:$ea_res), + (ins gprc:$rS, memrr:$dst), + "stbux $rS, $dst", IIC_LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STHUX : XForm_8_memOp<31, 439, (outs ptr_rc_nor0:$ea_res), + (ins gprc:$rS, memrr:$dst), + "sthux $rS, $dst", IIC_LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STWUX : XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res), + (ins gprc:$rS, memrr:$dst), + "stwux $rS, $dst", IIC_LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +let Predicates = [HasFPU] in { +def STFSUX: XForm_8_memOp<31, 695, (outs ptr_rc_nor0:$ea_res), + (ins f4rc:$rS, memrr:$dst), + "stfsux $rS, $dst", IIC_LdStSTFDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STFDUX: XForm_8_memOp<31, 759, (outs ptr_rc_nor0:$ea_res), + (ins f8rc:$rS, memrr:$dst), + "stfdux $rS, $dst", IIC_LdStSTFDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +} +} + +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STBUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STHUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STWUX $rS, $ptrreg, $ptroff)>; +let Predicates = [HasFPU] in { +def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STFSUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STFDUX $rS, $ptrreg, $ptroff)>; +} + +// Store Multiple +def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst), + "stmw $rS, $dst", IIC_LdStLMW, []>; + +def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L), + "sync $L", IIC_LdStSync, []>; + +let isCodeGenOnly = 1 in { + def MSYNC : XForm_24_sync<31, 598, (outs), (ins), + "msync", IIC_LdStSync, []> { + let L = 0; + } +} + +def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[HasSYNC]>; +def : Pat<(int_ppc_lwsync), (SYNC 1)>, Requires<[HasSYNC]>; +def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[HasOnlyMSYNC]>; +def : Pat<(int_ppc_lwsync), (MSYNC)>, Requires<[HasOnlyMSYNC]>; + +//===----------------------------------------------------------------------===// +// PPC32 Arithmetic Instructions. +// + +let PPC970_Unit = 1 in { // FXU Operations. +def ADDI : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$imm), + "addi $rD, $rA, $imm", IIC_IntSimple, + [(set i32:$rD, (add i32:$rA, imm32SExt16:$imm))]>; +let BaseName = "addic" in { +let Defs = [CARRY] in +def ADDIC : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), + "addic $rD, $rA, $imm", IIC_IntGeneral, + [(set i32:$rD, (addc i32:$rA, imm32SExt16:$imm))]>, + RecFormRel, PPC970_DGroup_Cracked; +let Defs = [CARRY, CR0] in +def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), + "addic. $rD, $rA, $imm", IIC_IntGeneral, + []>, isDOT, RecFormRel; +} +def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s17imm:$imm), + "addis $rD, $rA, $imm", IIC_IntSimple, + [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>; +let isCodeGenOnly = 1 in +def LA : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$sym), + "la $rD, $sym($rA)", IIC_IntGeneral, + [(set i32:$rD, (add i32:$rA, + (PPClo tglobaladdr:$sym, 0)))]>; +def MULLI : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), + "mulli $rD, $rA, $imm", IIC_IntMulLI, + [(set i32:$rD, (mul i32:$rA, imm32SExt16:$imm))]>; +let Defs = [CARRY] in +def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), + "subfic $rD, $rA, $imm", IIC_IntGeneral, + [(set i32:$rD, (subc imm32SExt16:$imm, i32:$rA))]>; + +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { + def LI : DForm_2_r0<14, (outs gprc:$rD), (ins s16imm:$imm), + "li $rD, $imm", IIC_IntSimple, + [(set i32:$rD, imm32SExt16:$imm)]>; + def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins s17imm:$imm), + "lis $rD, $imm", IIC_IntSimple, + [(set i32:$rD, imm16ShiftedSExt:$imm)]>; +} +} + +let PPC970_Unit = 1 in { // FXU Operations. +let Defs = [CR0] in { +def ANDIo : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), + "andi. $dst, $src1, $src2", IIC_IntGeneral, + [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>, + isDOT; +def ANDISo : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), + "andis. $dst, $src1, $src2", IIC_IntGeneral, + [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>, + isDOT; +} +def ORI : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), + "ori $dst, $src1, $src2", IIC_IntSimple, + [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>; +def ORIS : DForm_4<25, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), + "oris $dst, $src1, $src2", IIC_IntSimple, + [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>; +def XORI : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), + "xori $dst, $src1, $src2", IIC_IntSimple, + [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>; +def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), + "xoris $dst, $src1, $src2", IIC_IntSimple, + [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>; + +def NOP : DForm_4_zero<24, (outs), (ins), "nop", IIC_IntSimple, + []>; +let isCodeGenOnly = 1 in { +// The POWER6 and POWER7 have special group-terminating nops. +def NOP_GT_PWR6 : DForm_4_fixedreg_zero<24, 1, (outs), (ins), + "ori 1, 1, 0", IIC_IntSimple, []>; +def NOP_GT_PWR7 : DForm_4_fixedreg_zero<24, 2, (outs), (ins), + "ori 2, 2, 0", IIC_IntSimple, []>; +} + +let isCompare = 1, hasSideEffects = 0 in { + def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm), + "cmpwi $crD, $rA, $imm", IIC_IntCompare>; + def CMPLWI : DForm_6_ext<10, (outs crrc:$dst), (ins gprc:$src1, u16imm:$src2), + "cmplwi $dst, $src1, $src2", IIC_IntCompare>; + def CMPRB : X_BF3_L1_RS5_RS5<31, 192, (outs crbitrc:$BF), + (ins u1imm:$L, g8rc:$rA, g8rc:$rB), + "cmprb $BF, $L, $rA, $rB", IIC_IntCompare, []>, + Requires<[IsISA3_0]>; +} +} + +let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations. +let isCommutable = 1 in { +defm NAND : XForm_6r<31, 476, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "nand", "$rA, $rS, $rB", IIC_IntSimple, + [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>; +defm AND : XForm_6r<31, 28, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "and", "$rA, $rS, $rB", IIC_IntSimple, + [(set i32:$rA, (and i32:$rS, i32:$rB))]>; +} // isCommutable +defm ANDC : XForm_6r<31, 60, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "andc", "$rA, $rS, $rB", IIC_IntSimple, + [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>; +let isCommutable = 1 in { +defm OR : XForm_6r<31, 444, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "or", "$rA, $rS, $rB", IIC_IntSimple, + [(set i32:$rA, (or i32:$rS, i32:$rB))]>; +defm NOR : XForm_6r<31, 124, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "nor", "$rA, $rS, $rB", IIC_IntSimple, + [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>; +} // isCommutable +defm ORC : XForm_6r<31, 412, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "orc", "$rA, $rS, $rB", IIC_IntSimple, + [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>; +let isCommutable = 1 in { +defm EQV : XForm_6r<31, 284, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "eqv", "$rA, $rS, $rB", IIC_IntSimple, + [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>; +defm XOR : XForm_6r<31, 316, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "xor", "$rA, $rS, $rB", IIC_IntSimple, + [(set i32:$rA, (xor i32:$rS, i32:$rB))]>; +} // isCommutable +defm SLW : XForm_6r<31, 24, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "slw", "$rA, $rS, $rB", IIC_IntGeneral, + [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>; +defm SRW : XForm_6r<31, 536, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "srw", "$rA, $rS, $rB", IIC_IntGeneral, + [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>; +defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "sraw", "$rA, $rS, $rB", IIC_IntShift, + [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>; +} + +let PPC970_Unit = 1 in { // FXU Operations. +let hasSideEffects = 0 in { +defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH), + "srawi", "$rA, $rS, $SH", IIC_IntShift, + [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>; +defm CNTLZW : XForm_11r<31, 26, (outs gprc:$rA), (ins gprc:$rS), + "cntlzw", "$rA, $rS", IIC_IntGeneral, + [(set i32:$rA, (ctlz i32:$rS))]>; +defm CNTTZW : XForm_11r<31, 538, (outs gprc:$rA), (ins gprc:$rS), + "cnttzw", "$rA, $rS", IIC_IntGeneral, + [(set i32:$rA, (cttz i32:$rS))]>, Requires<[IsISA3_0]>; +defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS), + "extsb", "$rA, $rS", IIC_IntSimple, + [(set i32:$rA, (sext_inreg i32:$rS, i8))]>; +defm EXTSH : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS), + "extsh", "$rA, $rS", IIC_IntSimple, + [(set i32:$rA, (sext_inreg i32:$rS, i16))]>; + +let isCommutable = 1 in +def CMPB : XForm_6<31, 508, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "cmpb $rA, $rS, $rB", IIC_IntGeneral, + [(set i32:$rA, (PPCcmpb i32:$rS, i32:$rB))]>; +} +let isCompare = 1, hasSideEffects = 0 in { + def CMPW : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB), + "cmpw $crD, $rA, $rB", IIC_IntCompare>; + def CMPLW : XForm_16_ext<31, 32, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB), + "cmplw $crD, $rA, $rB", IIC_IntCompare>; +} +} +let PPC970_Unit = 3, Predicates = [HasFPU] in { // FPU Operations. +//def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB), +// "fcmpo $crD, $fA, $fB", IIC_FPCompare>; +let isCompare = 1, hasSideEffects = 0 in { + def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB), + "fcmpu $crD, $fA, $fB", IIC_FPCompare>; + let Interpretation64Bit = 1, isCodeGenOnly = 1 in + def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB), + "fcmpu $crD, $fA, $fB", IIC_FPCompare>; +} + +def FTDIV: XForm_17<63, 128, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB), + "ftdiv $crD, $fA, $fB", IIC_FPCompare>; +def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB), + "ftsqrt $crD, $fB", IIC_FPCompare>; + +let Uses = [RM] in { + let hasSideEffects = 0 in { + defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB), + "fctiw", "$frD, $frB", IIC_FPGeneral, + []>; + defm FCTIWU : XForm_26r<63, 142, (outs f8rc:$frD), (ins f8rc:$frB), + "fctiwu", "$frD, $frB", IIC_FPGeneral, + []>; + defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB), + "fctiwz", "$frD, $frB", IIC_FPGeneral, + [(set f64:$frD, (PPCfctiwz f64:$frB))]>; + + defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB), + "frsp", "$frD, $frB", IIC_FPGeneral, + [(set f32:$frD, (fpround f64:$frB))]>; + + let Interpretation64Bit = 1, isCodeGenOnly = 1 in + defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB), + "frin", "$frD, $frB", IIC_FPGeneral, + [(set f64:$frD, (fround f64:$frB))]>; + defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB), + "frin", "$frD, $frB", IIC_FPGeneral, + [(set f32:$frD, (fround f32:$frB))]>; + } + + let hasSideEffects = 0 in { + let Interpretation64Bit = 1, isCodeGenOnly = 1 in + defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB), + "frip", "$frD, $frB", IIC_FPGeneral, + [(set f64:$frD, (fceil f64:$frB))]>; + defm FRIPS : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB), + "frip", "$frD, $frB", IIC_FPGeneral, + [(set f32:$frD, (fceil f32:$frB))]>; + let Interpretation64Bit = 1, isCodeGenOnly = 1 in + defm FRIZD : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB), + "friz", "$frD, $frB", IIC_FPGeneral, + [(set f64:$frD, (ftrunc f64:$frB))]>; + defm FRIZS : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB), + "friz", "$frD, $frB", IIC_FPGeneral, + [(set f32:$frD, (ftrunc f32:$frB))]>; + let Interpretation64Bit = 1, isCodeGenOnly = 1 in + defm FRIMD : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB), + "frim", "$frD, $frB", IIC_FPGeneral, + [(set f64:$frD, (ffloor f64:$frB))]>; + defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB), + "frim", "$frD, $frB", IIC_FPGeneral, + [(set f32:$frD, (ffloor f32:$frB))]>; + + defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB), + "fsqrt", "$frD, $frB", IIC_FPSqrtD, + [(set f64:$frD, (fsqrt f64:$frB))]>; + defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB), + "fsqrts", "$frD, $frB", IIC_FPSqrtS, + [(set f32:$frD, (fsqrt f32:$frB))]>; + } + } +} + +/// Note that FMR is defined as pseudo-ops on the PPC970 because they are +/// often coalesced away and we don't want the dispatch group builder to think +/// that they will fill slots (which could cause the load of a LSU reject to +/// sneak into a d-group with a store). +let hasSideEffects = 0, Predicates = [HasFPU] in +defm FMR : XForm_26r<63, 72, (outs f4rc:$frD), (ins f4rc:$frB), + "fmr", "$frD, $frB", IIC_FPGeneral, + []>, // (set f32:$frD, f32:$frB) + PPC970_Unit_Pseudo; + +let PPC970_Unit = 3, hasSideEffects = 0, Predicates = [HasFPU] in { // FPU Operations. +// These are artificially split into two different forms, for 4/8 byte FP. +defm FABSS : XForm_26r<63, 264, (outs f4rc:$frD), (ins f4rc:$frB), + "fabs", "$frD, $frB", IIC_FPGeneral, + [(set f32:$frD, (fabs f32:$frB))]>; +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +defm FABSD : XForm_26r<63, 264, (outs f8rc:$frD), (ins f8rc:$frB), + "fabs", "$frD, $frB", IIC_FPGeneral, + [(set f64:$frD, (fabs f64:$frB))]>; +defm FNABSS : XForm_26r<63, 136, (outs f4rc:$frD), (ins f4rc:$frB), + "fnabs", "$frD, $frB", IIC_FPGeneral, + [(set f32:$frD, (fneg (fabs f32:$frB)))]>; +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +defm FNABSD : XForm_26r<63, 136, (outs f8rc:$frD), (ins f8rc:$frB), + "fnabs", "$frD, $frB", IIC_FPGeneral, + [(set f64:$frD, (fneg (fabs f64:$frB)))]>; +defm FNEGS : XForm_26r<63, 40, (outs f4rc:$frD), (ins f4rc:$frB), + "fneg", "$frD, $frB", IIC_FPGeneral, + [(set f32:$frD, (fneg f32:$frB))]>; +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB), + "fneg", "$frD, $frB", IIC_FPGeneral, + [(set f64:$frD, (fneg f64:$frB))]>; + +defm FCPSGNS : XForm_28r<63, 8, (outs f4rc:$frD), (ins f4rc:$frA, f4rc:$frB), + "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral, + [(set f32:$frD, (fcopysign f32:$frB, f32:$frA))]>; +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB), + "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral, + [(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>; + +// Reciprocal estimates. +defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB), + "fre", "$frD, $frB", IIC_FPGeneral, + [(set f64:$frD, (PPCfre f64:$frB))]>; +defm FRES : XForm_26r<59, 24, (outs f4rc:$frD), (ins f4rc:$frB), + "fres", "$frD, $frB", IIC_FPGeneral, + [(set f32:$frD, (PPCfre f32:$frB))]>; +defm FRSQRTE : XForm_26r<63, 26, (outs f8rc:$frD), (ins f8rc:$frB), + "frsqrte", "$frD, $frB", IIC_FPGeneral, + [(set f64:$frD, (PPCfrsqrte f64:$frB))]>; +defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB), + "frsqrtes", "$frD, $frB", IIC_FPGeneral, + [(set f32:$frD, (PPCfrsqrte f32:$frB))]>; +} + +// XL-Form instructions. condition register logical ops. +// +let hasSideEffects = 0 in +def MCRF : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA), + "mcrf $BF, $BFA", IIC_BrMCR>, + PPC970_DGroup_First, PPC970_Unit_CRU; + +// FIXME: According to the ISA (section 2.5.1 of version 2.06), the +// condition-register logical instructions have preferred forms. Specifically, +// it is preferred that the bit specified by the BT field be in the same +// condition register as that specified by the bit BB. We might want to account +// for this via hinting the register allocator and anti-dep breakers, or we +// could constrain the register class to force this constraint and then loosen +// it during register allocation via convertToThreeAddress or some similar +// mechanism. + +let isCommutable = 1 in { +def CRAND : XLForm_1<19, 257, (outs crbitrc:$CRD), + (ins crbitrc:$CRA, crbitrc:$CRB), + "crand $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (and i1:$CRA, i1:$CRB))]>; + +def CRNAND : XLForm_1<19, 225, (outs crbitrc:$CRD), + (ins crbitrc:$CRA, crbitrc:$CRB), + "crnand $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (not (and i1:$CRA, i1:$CRB)))]>; + +def CROR : XLForm_1<19, 449, (outs crbitrc:$CRD), + (ins crbitrc:$CRA, crbitrc:$CRB), + "cror $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (or i1:$CRA, i1:$CRB))]>; + +def CRXOR : XLForm_1<19, 193, (outs crbitrc:$CRD), + (ins crbitrc:$CRA, crbitrc:$CRB), + "crxor $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (xor i1:$CRA, i1:$CRB))]>; + +def CRNOR : XLForm_1<19, 33, (outs crbitrc:$CRD), + (ins crbitrc:$CRA, crbitrc:$CRB), + "crnor $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (not (or i1:$CRA, i1:$CRB)))]>; + +def CREQV : XLForm_1<19, 289, (outs crbitrc:$CRD), + (ins crbitrc:$CRA, crbitrc:$CRB), + "creqv $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (not (xor i1:$CRA, i1:$CRB)))]>; +} // isCommutable + +def CRANDC : XLForm_1<19, 129, (outs crbitrc:$CRD), + (ins crbitrc:$CRA, crbitrc:$CRB), + "crandc $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (and i1:$CRA, (not i1:$CRB)))]>; + +def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD), + (ins crbitrc:$CRA, crbitrc:$CRB), + "crorc $CRD, $CRA, $CRB", IIC_BrCR, + [(set i1:$CRD, (or i1:$CRA, (not i1:$CRB)))]>; + +let isCodeGenOnly = 1 in { +def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins), + "creqv $dst, $dst, $dst", IIC_BrCR, + [(set i1:$dst, 1)]>; + +def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins), + "crxor $dst, $dst, $dst", IIC_BrCR, + [(set i1:$dst, 0)]>; + +let Defs = [CR1EQ], CRD = 6 in { +def CR6SET : XLForm_1_ext<19, 289, (outs), (ins), + "creqv 6, 6, 6", IIC_BrCR, + [(PPCcr6set)]>; + +def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), + "crxor 6, 6, 6", IIC_BrCR, + [(PPCcr6unset)]>; +} +} + +// XFX-Form instructions. Instructions that deal with SPRs. +// + +def MFSPR : XFXForm_1<31, 339, (outs gprc:$RT), (ins i32imm:$SPR), + "mfspr $RT, $SPR", IIC_SprMFSPR>; +def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT), + "mtspr $SPR, $RT", IIC_SprMTSPR>; + +def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR), + "mftb $RT, $SPR", IIC_SprMFTB>; + +def MFPMR : XFXForm_1<31, 334, (outs gprc:$RT), (ins i32imm:$SPR), + "mfpmr $RT, $SPR", IIC_SprMFPMR>; + +def MTPMR : XFXForm_1<31, 462, (outs), (ins i32imm:$SPR, gprc:$RT), + "mtpmr $SPR, $RT", IIC_SprMTPMR>; + + +// A pseudo-instruction used to implement the read of the 64-bit cycle counter +// on a 32-bit target. +let hasSideEffects = 1, usesCustomInserter = 1 in +def ReadTB : Pseudo<(outs gprc:$lo, gprc:$hi), (ins), + "#ReadTB", []>; + +let Uses = [CTR] in { +def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins), + "mfctr $rT", IIC_SprMFSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} +let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in { +def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), + "mtctr $rS", IIC_SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} +let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in { +let Pattern = [(int_ppc_mtctr i32:$rS)] in +def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), + "mtctr $rS", IIC_SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +let Defs = [LR] in { +def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS), + "mtlr $rS", IIC_SprMTSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} +let Uses = [LR] in { +def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins), + "mflr $rT", IIC_SprMFSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +let isCodeGenOnly = 1 in { + // Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed + // like a GPR on the PPC970. As such, copies in and out have the same + // performance characteristics as an OR instruction. + def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS), + "mtspr 256, $rS", IIC_IntGeneral>, + PPC970_DGroup_Single, PPC970_Unit_FXU; + def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins), + "mfspr $rT, 256", IIC_IntGeneral>, + PPC970_DGroup_First, PPC970_Unit_FXU; + + def MTVRSAVEv : XFXForm_7_ext<31, 467, 256, + (outs VRSAVERC:$reg), (ins gprc:$rS), + "mtspr 256, $rS", IIC_IntGeneral>, + PPC970_DGroup_Single, PPC970_Unit_FXU; + def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), + (ins VRSAVERC:$reg), + "mfspr $rT, 256", IIC_IntGeneral>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +// Aliases for mtvrsave/mfvrsave to mfspr/mtspr. +def : InstAlias<"mtvrsave $rS", (MTVRSAVE gprc:$rS)>; +def : InstAlias<"mfvrsave $rS", (MFVRSAVE gprc:$rS)>; + +// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register, +// so we'll need to scavenge a register for it. +let mayStore = 1 in +def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F), + "#SPILL_VRSAVE", []>; + +// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously +// spilled), so we'll need to scavenge a register for it. +let mayLoad = 1 in +def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F), + "#RESTORE_VRSAVE", []>; + +let hasSideEffects = 0 in { +// mtocrf's input needs to be prepared by shifting by an amount dependent +// on the cr register selected. Thus, post-ra anti-dep breaking must not +// later change that register assignment. +let hasExtraDefRegAllocReq = 1 in { +def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST), + "mtocrf $FXM, $ST", IIC_BrMCRX>, + PPC970_DGroup_First, PPC970_Unit_CRU; + +// Similarly to mtocrf, the mask for mtcrf must be prepared in a way that +// is dependent on the cr fields being set. +def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS), + "mtcrf $FXM, $rS", IIC_BrMCRX>, + PPC970_MicroCode, PPC970_Unit_CRU; +} // hasExtraDefRegAllocReq = 1 + +// mfocrf's input needs to be prepared by shifting by an amount dependent +// on the cr register selected. Thus, post-ra anti-dep breaking must not +// later change that register assignment. +let hasExtraSrcRegAllocReq = 1 in { +def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM), + "mfocrf $rT, $FXM", IIC_SprMFCRF>, + PPC970_DGroup_First, PPC970_Unit_CRU; + +// Similarly to mfocrf, the mask for mfcrf must be prepared in a way that +// is dependent on the cr fields being copied. +def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins), + "mfcr $rT", IIC_SprMFCR>, + PPC970_MicroCode, PPC970_Unit_CRU; +} // hasExtraSrcRegAllocReq = 1 + +def MCRXRX : X_BF3<31, 576, (outs crrc:$BF), (ins), + "mcrxrx $BF", IIC_BrMCRX>, Requires<[IsISA3_0]>; +} // hasSideEffects = 0 + +let Predicates = [HasFPU] in { +// Pseudo instruction to perform FADD in round-to-zero mode. +let usesCustomInserter = 1, Uses = [RM] in { + def FADDrtz: Pseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "", + [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>; +} + +// The above pseudo gets expanded to make use of the following instructions +// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level. +let Uses = [RM], Defs = [RM] in { + def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM), + "mtfsb0 $FM", IIC_IntMTFSB0, []>, + PPC970_DGroup_Single, PPC970_Unit_FPU; + def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM), + "mtfsb1 $FM", IIC_IntMTFSB0, []>, + PPC970_DGroup_Single, PPC970_Unit_FPU; + let isCodeGenOnly = 1 in + def MTFSFb : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT), + "mtfsf $FM, $rT", IIC_IntMTFSB0, []>, + PPC970_DGroup_Single, PPC970_Unit_FPU; +} +let Uses = [RM] in { + def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins), + "mffs $rT", IIC_IntMFFS, + [(set f64:$rT, (PPCmffs))]>, + PPC970_DGroup_Single, PPC970_Unit_FPU; + + let Defs = [CR1] in + def MFFSo : XForm_42<63, 583, (outs f8rc:$rT), (ins), + "mffs. $rT", IIC_IntMFFS, []>, isDOT; + + def MFFSCE : X_FRT5_XO2_XO3_XO10<63, 0, 1, 583, (outs f8rc:$rT), (ins), + "mffsce $rT", IIC_IntMFFS, []>, + PPC970_DGroup_Single, PPC970_Unit_FPU; + + def MFFSCDRN : X_FRT5_XO2_XO3_FRB5_XO10<63, 2, 4, 583, (outs f8rc:$rT), + (ins f8rc:$FRB), "mffscdrn $rT, $FRB", + IIC_IntMFFS, []>, + PPC970_DGroup_Single, PPC970_Unit_FPU; + + def MFFSCDRNI : X_FRT5_XO2_XO3_DRM3_XO10<63, 2, 5, 583, (outs f8rc:$rT), + (ins u3imm:$DRM), + "mffscdrni $rT, $DRM", + IIC_IntMFFS, []>, + PPC970_DGroup_Single, PPC970_Unit_FPU; + + def MFFSCRN : X_FRT5_XO2_XO3_FRB5_XO10<63, 2, 6, 583, (outs f8rc:$rT), + (ins f8rc:$FRB), "mffscrn $rT, $FRB", + IIC_IntMFFS, []>, + PPC970_DGroup_Single, PPC970_Unit_FPU; + + def MFFSCRNI : X_FRT5_XO2_XO3_RM2_X10<63, 2, 7, 583, (outs f8rc:$rT), + (ins u2imm:$RM), "mffscrni $rT, $RM", + IIC_IntMFFS, []>, + PPC970_DGroup_Single, PPC970_Unit_FPU; + + def MFFSL : X_FRT5_XO2_XO3_XO10<63, 3, 0, 583, (outs f8rc:$rT), (ins), + "mffsl $rT", IIC_IntMFFS, []>, + PPC970_DGroup_Single, PPC970_Unit_FPU; +} +} + +let Predicates = [IsISA3_0] in { +def MODSW : XForm_8<31, 779, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "modsw $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (srem i32:$rA, i32:$rB))]>; +def MODUW : XForm_8<31, 267, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "moduw $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (urem i32:$rA, i32:$rB))]>; +} + +let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations. +// XO-Form instructions. Arithmetic instructions that can set overflow bit +let isCommutable = 1 in +defm ADD4 : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "add", "$rT, $rA, $rB", IIC_IntSimple, + [(set i32:$rT, (add i32:$rA, i32:$rB))]>; +let isCodeGenOnly = 1 in +def ADD4TLS : XOForm_1<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, tlsreg32:$rB), + "add $rT, $rA, $rB", IIC_IntSimple, + [(set i32:$rT, (add i32:$rA, tglobaltlsaddr:$rB))]>; +let isCommutable = 1 in +defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "addc", "$rT, $rA, $rB", IIC_IntGeneral, + [(set i32:$rT, (addc i32:$rA, i32:$rB))]>, + PPC970_DGroup_Cracked; + +defm DIVW : XOForm_1rcr<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divw", "$rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>; +defm DIVWU : XOForm_1rcr<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divwu", "$rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>; +def DIVWE : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divwe $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>, + Requires<[HasExtDiv]>; +let Defs = [CR0] in +def DIVWEo : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divwe. $rT, $rA, $rB", IIC_IntDivW, + []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, + Requires<[HasExtDiv]>; +def DIVWEU : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divweu $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>, + Requires<[HasExtDiv]>; +let Defs = [CR0] in +def DIVWEUo : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divweu. $rT, $rA, $rB", IIC_IntDivW, + []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, + Requires<[HasExtDiv]>; +let isCommutable = 1 in { +defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "mulhw", "$rT, $rA, $rB", IIC_IntMulHW, + [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>; +defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "mulhwu", "$rT, $rA, $rB", IIC_IntMulHWU, + [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>; +defm MULLW : XOForm_1r<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "mullw", "$rT, $rA, $rB", IIC_IntMulHW, + [(set i32:$rT, (mul i32:$rA, i32:$rB))]>; +} // isCommutable +defm SUBF : XOForm_1r<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "subf", "$rT, $rA, $rB", IIC_IntGeneral, + [(set i32:$rT, (sub i32:$rB, i32:$rA))]>; +defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "subfc", "$rT, $rA, $rB", IIC_IntGeneral, + [(set i32:$rT, (subc i32:$rB, i32:$rA))]>, + PPC970_DGroup_Cracked; +defm NEG : XOForm_3r<31, 104, 0, (outs gprc:$rT), (ins gprc:$rA), + "neg", "$rT, $rA", IIC_IntSimple, + [(set i32:$rT, (ineg i32:$rA))]>; +let Uses = [CARRY] in { +let isCommutable = 1 in +defm ADDE : XOForm_1rc<31, 138, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "adde", "$rT, $rA, $rB", IIC_IntGeneral, + [(set i32:$rT, (adde i32:$rA, i32:$rB))]>; +defm ADDME : XOForm_3rc<31, 234, 0, (outs gprc:$rT), (ins gprc:$rA), + "addme", "$rT, $rA", IIC_IntGeneral, + [(set i32:$rT, (adde i32:$rA, -1))]>; +defm ADDZE : XOForm_3rc<31, 202, 0, (outs gprc:$rT), (ins gprc:$rA), + "addze", "$rT, $rA", IIC_IntGeneral, + [(set i32:$rT, (adde i32:$rA, 0))]>; +defm SUBFE : XOForm_1rc<31, 136, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "subfe", "$rT, $rA, $rB", IIC_IntGeneral, + [(set i32:$rT, (sube i32:$rB, i32:$rA))]>; +defm SUBFME : XOForm_3rc<31, 232, 0, (outs gprc:$rT), (ins gprc:$rA), + "subfme", "$rT, $rA", IIC_IntGeneral, + [(set i32:$rT, (sube -1, i32:$rA))]>; +defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA), + "subfze", "$rT, $rA", IIC_IntGeneral, + [(set i32:$rT, (sube 0, i32:$rA))]>; +} +} + +// A-Form instructions. Most of the instructions executed in the FPU are of +// this type. +// +let PPC970_Unit = 3, hasSideEffects = 0, Predicates = [HasFPU] in { // FPU Operations. +let Uses = [RM] in { +let isCommutable = 1 in { + defm FMADD : AForm_1r<63, 29, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), + "fmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>; + defm FMADDS : AForm_1r<59, 29, + (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), + "fmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, + [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>; + defm FMSUB : AForm_1r<63, 28, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), + "fmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set f64:$FRT, + (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>; + defm FMSUBS : AForm_1r<59, 28, + (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), + "fmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, + [(set f32:$FRT, + (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>; + defm FNMADD : AForm_1r<63, 31, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), + "fnmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set f64:$FRT, + (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>; + defm FNMADDS : AForm_1r<59, 31, + (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), + "fnmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, + [(set f32:$FRT, + (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>; + defm FNMSUB : AForm_1r<63, 30, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), + "fnmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC, + (fneg f64:$FRB))))]>; + defm FNMSUBS : AForm_1r<59, 30, + (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), + "fnmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, + [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC, + (fneg f32:$FRB))))]>; +} // isCommutable +} +// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid +// having 4 of these, force the comparison to always be an 8-byte double (code +// should use an FMRSD if the input comparison value really wants to be a float) +// and 4/8 byte forms for the result and operand type.. +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +defm FSELD : AForm_1r<63, 23, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), + "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, + [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>; +defm FSELS : AForm_1r<63, 23, + (outs f4rc:$FRT), (ins f8rc:$FRA, f4rc:$FRC, f4rc:$FRB), + "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, + [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>; +let Uses = [RM] in { + let isCommutable = 1 in { + defm FADD : AForm_2r<63, 21, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), + "fadd", "$FRT, $FRA, $FRB", IIC_FPAddSub, + [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>; + defm FADDS : AForm_2r<59, 21, + (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), + "fadds", "$FRT, $FRA, $FRB", IIC_FPGeneral, + [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>; + } // isCommutable + defm FDIV : AForm_2r<63, 18, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), + "fdiv", "$FRT, $FRA, $FRB", IIC_FPDivD, + [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>; + defm FDIVS : AForm_2r<59, 18, + (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), + "fdivs", "$FRT, $FRA, $FRB", IIC_FPDivS, + [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>; + let isCommutable = 1 in { + defm FMUL : AForm_3r<63, 25, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC), + "fmul", "$FRT, $FRA, $FRC", IIC_FPFused, + [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>; + defm FMULS : AForm_3r<59, 25, + (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC), + "fmuls", "$FRT, $FRA, $FRC", IIC_FPGeneral, + [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>; + } // isCommutable + defm FSUB : AForm_2r<63, 20, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), + "fsub", "$FRT, $FRA, $FRB", IIC_FPAddSub, + [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>; + defm FSUBS : AForm_2r<59, 20, + (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), + "fsubs", "$FRT, $FRA, $FRB", IIC_FPGeneral, + [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>; + } +} + +let hasSideEffects = 0 in { +let PPC970_Unit = 1 in { // FXU Operations. + let isSelect = 1 in + def ISEL : AForm_4<31, 15, + (outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond), + "isel $rT, $rA, $rB, $cond", IIC_IntISEL, + []>; +} + +let PPC970_Unit = 1 in { // FXU Operations. +// M-Form instructions. rotate and mask instructions. +// +let isCommutable = 1 in { +// RLWIMI can be commuted if the rotate amount is zero. +defm RLWIMI : MForm_2r<20, (outs gprc:$rA), + (ins gprc:$rSi, gprc:$rS, u5imm:$SH, u5imm:$MB, + u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", + IIC_IntRotate, []>, PPC970_DGroup_Cracked, + RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">; +} +let BaseName = "rlwinm" in { +def RLWINM : MForm_2<21, + (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), + "rlwinm $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral, + []>, RecFormRel; +let Defs = [CR0] in +def RLWINMo : MForm_2<21, + (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), + "rlwinm. $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral, + []>, isDOT, RecFormRel, PPC970_DGroup_Cracked; +} +defm RLWNM : MForm_2r<23, (outs gprc:$rA), + (ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME), + "rlwnm", "$rA, $rS, $rB, $MB, $ME", IIC_IntGeneral, + []>; +} +} // hasSideEffects = 0 + +//===----------------------------------------------------------------------===// +// PowerPC Instruction Patterns +// + +// Arbitrary immediate support. Implement in terms of LIS/ORI. +def : Pat<(i32 imm:$imm), + (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>; + +// Implement the 'not' operation with the NOR instruction. +def i32not : OutPatFrag<(ops node:$in), + (NOR $in, $in)>; +def : Pat<(not i32:$in), + (i32not $in)>; + +// ADD an arbitrary immediate. +def : Pat<(add i32:$in, imm:$imm), + (ADDIS (ADDI $in, (LO16 imm:$imm)), (HA16 imm:$imm))>; +// OR an arbitrary immediate. +def : Pat<(or i32:$in, imm:$imm), + (ORIS (ORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>; +// XOR an arbitrary immediate. +def : Pat<(xor i32:$in, imm:$imm), + (XORIS (XORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>; +// SUBFIC +def : Pat<(sub imm32SExt16:$imm, i32:$in), + (SUBFIC $in, imm:$imm)>; + +// SHL/SRL +def : Pat<(shl i32:$in, (i32 imm:$imm)), + (RLWINM $in, imm:$imm, 0, (SHL32 imm:$imm))>; +def : Pat<(srl i32:$in, (i32 imm:$imm)), + (RLWINM $in, (SRL32 imm:$imm), imm:$imm, 31)>; + +// ROTL +def : Pat<(rotl i32:$in, i32:$sh), + (RLWNM $in, $sh, 0, 31)>; +def : Pat<(rotl i32:$in, (i32 imm:$imm)), + (RLWINM $in, imm:$imm, 0, 31)>; + +// RLWNM +def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm), + (RLWNM $in, $sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>; + +// Calls +def : Pat<(PPCcall (i32 tglobaladdr:$dst)), + (BL tglobaladdr:$dst)>; +def : Pat<(PPCcall (i32 texternalsym:$dst)), + (BL texternalsym:$dst)>; + +def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), + (TCRETURNdi tglobaladdr:$dst, imm:$imm)>; + +def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm), + (TCRETURNdi texternalsym:$dst, imm:$imm)>; + +def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm), + (TCRETURNri CTRRC:$dst, imm:$imm)>; + + + +// Hi and Lo for Darwin Global Addresses. +def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>; +def : Pat<(PPClo tglobaladdr:$in, 0), (LI tglobaladdr:$in)>; +def : Pat<(PPChi tconstpool:$in, 0), (LIS tconstpool:$in)>; +def : Pat<(PPClo tconstpool:$in, 0), (LI tconstpool:$in)>; +def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>; +def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>; +def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>; +def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>; +def : Pat<(PPChi tglobaltlsaddr:$g, i32:$in), + (ADDIS $in, tglobaltlsaddr:$g)>; +def : Pat<(PPClo tglobaltlsaddr:$g, i32:$in), + (ADDI $in, tglobaltlsaddr:$g)>; +def : Pat<(add i32:$in, (PPChi tglobaladdr:$g, 0)), + (ADDIS $in, tglobaladdr:$g)>; +def : Pat<(add i32:$in, (PPChi tconstpool:$g, 0)), + (ADDIS $in, tconstpool:$g)>; +def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)), + (ADDIS $in, tjumptable:$g)>; +def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)), + (ADDIS $in, tblockaddress:$g)>; + +// Support for thread-local storage. +def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT", + [(set i32:$rD, (PPCppc32GOT))]>; + +// Get the _GLOBAL_OFFSET_TABLE_ in PIC mode. +// This uses two output registers, the first as the real output, the second as a +// temporary register, used internally in code generation. +def PPC32PICGOT: Pseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT", + []>, NoEncode<"$rT">; + +def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg), + "#LDgotTprelL32", + [(set i32:$rD, + (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>; +def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g), + (ADD4TLS $in, tglobaltlsaddr:$g)>; + +def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), + "#ADDItlsgdL32", + [(set i32:$rD, + (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>; +// LR is a true define, while the rest of the Defs are clobbers. R3 is +// explicitly defined when this op is created, so not mentioned here. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, + Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in +def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), + "GETtlsADDR32", + [(set i32:$rD, + (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>; +// Combined op for ADDItlsgdL32 and GETtlsADDR32, late expanded. R3 and LR +// are true defines while the rest of the Defs are clobbers. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, + Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in +def ADDItlsgdLADDR32 : Pseudo<(outs gprc:$rD), + (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym), + "#ADDItlsgdLADDR32", + [(set i32:$rD, + (PPCaddiTlsgdLAddr i32:$reg, + tglobaltlsaddr:$disp, + tglobaltlsaddr:$sym))]>; +def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), + "#ADDItlsldL32", + [(set i32:$rD, + (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>; +// LR is a true define, while the rest of the Defs are clobbers. R3 is +// explicitly defined when this op is created, so not mentioned here. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, + Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in +def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), + "GETtlsldADDR32", + [(set i32:$rD, + (PPCgetTlsldAddr i32:$reg, + tglobaltlsaddr:$sym))]>; +// Combined op for ADDItlsldL32 and GETtlsADDR32, late expanded. R3 and LR +// are true defines while the rest of the Defs are clobbers. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, + Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in +def ADDItlsldLADDR32 : Pseudo<(outs gprc:$rD), + (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym), + "#ADDItlsldLADDR32", + [(set i32:$rD, + (PPCaddiTlsldLAddr i32:$reg, + tglobaltlsaddr:$disp, + tglobaltlsaddr:$sym))]>; +def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), + "#ADDIdtprelL32", + [(set i32:$rD, + (PPCaddiDtprelL i32:$reg, tglobaltlsaddr:$disp))]>; +def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), + "#ADDISdtprelHA32", + [(set i32:$rD, + (PPCaddisDtprelHA i32:$reg, + tglobaltlsaddr:$disp))]>; + +// Support for Position-independent code +def LWZtoc : Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg), + "#LWZtoc", + [(set i32:$rD, + (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>; +// Get Global (GOT) Base Register offset, from the word immediately preceding +// the function label. +def UpdateGBR : Pseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>; + + +// Standard shifts. These are represented separately from the real shifts above +// so that we can distinguish between shifts that allow 5-bit and 6-bit shift +// amounts. +def : Pat<(sra i32:$rS, i32:$rB), + (SRAW $rS, $rB)>; +def : Pat<(srl i32:$rS, i32:$rB), + (SRW $rS, $rB)>; +def : Pat<(shl i32:$rS, i32:$rB), + (SLW $rS, $rB)>; + +def : Pat<(zextloadi1 iaddr:$src), + (LBZ iaddr:$src)>; +def : Pat<(zextloadi1 xaddr:$src), + (LBZX xaddr:$src)>; +def : Pat<(extloadi1 iaddr:$src), + (LBZ iaddr:$src)>; +def : Pat<(extloadi1 xaddr:$src), + (LBZX xaddr:$src)>; +def : Pat<(extloadi8 iaddr:$src), + (LBZ iaddr:$src)>; +def : Pat<(extloadi8 xaddr:$src), + (LBZX xaddr:$src)>; +def : Pat<(extloadi16 iaddr:$src), + (LHZ iaddr:$src)>; +def : Pat<(extloadi16 xaddr:$src), + (LHZX xaddr:$src)>; +let Predicates = [HasFPU] in { +def : Pat<(f64 (extloadf32 iaddr:$src)), + (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>; +def : Pat<(f64 (extloadf32 xaddr:$src)), + (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>; + +def : Pat<(f64 (fpextend f32:$src)), + (COPY_TO_REGCLASS $src, F8RC)>; +} + +// Only seq_cst fences require the heavyweight sync (SYNC 0). +// All others can use the lightweight sync (SYNC 1). +// source: http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html +// The rule for seq_cst is duplicated to work with both 64 bits and 32 bits +// versions of Power. +def : Pat<(atomic_fence (i64 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>; +def : Pat<(atomic_fence (i32 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>; +def : Pat<(atomic_fence (imm), (imm)), (SYNC 1)>, Requires<[HasSYNC]>; +def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>; + +let Predicates = [HasFPU] in { +// Additional FNMSUB patterns: -a*c + b == -(a*c - b) +def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), + (FNMSUB $A, $C, $B)>; +def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B), + (FNMSUB $A, $C, $B)>; +def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B), + (FNMSUBS $A, $C, $B)>; +def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B), + (FNMSUBS $A, $C, $B)>; + +// FCOPYSIGN's operand types need not agree. +def : Pat<(fcopysign f64:$frB, f32:$frA), + (FCPSGND (COPY_TO_REGCLASS $frA, F8RC), $frB)>; +def : Pat<(fcopysign f32:$frB, f64:$frA), + (FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>; +} + +include "PPCInstrAltivec.td" +include "PPCInstrSPE.td" +include "PPCInstr64Bit.td" +include "PPCInstrVSX.td" +include "PPCInstrQPX.td" +include "PPCInstrHTM.td" + +def crnot : OutPatFrag<(ops node:$in), + (CRNOR $in, $in)>; +def : Pat<(not i1:$in), + (crnot $in)>; + +// Patterns for arithmetic i1 operations. +def : Pat<(add i1:$a, i1:$b), + (CRXOR $a, $b)>; +def : Pat<(sub i1:$a, i1:$b), + (CRXOR $a, $b)>; +def : Pat<(mul i1:$a, i1:$b), + (CRAND $a, $b)>; + +// We're sometimes asked to materialize i1 -1, which is just 1 in this case +// (-1 is used to mean all bits set). +def : Pat<(i1 -1), (CRSET)>; + +// i1 extensions, implemented in terms of isel. +def : Pat<(i32 (zext i1:$in)), + (SELECT_I4 $in, (LI 1), (LI 0))>; +def : Pat<(i32 (sext i1:$in)), + (SELECT_I4 $in, (LI -1), (LI 0))>; + +def : Pat<(i64 (zext i1:$in)), + (SELECT_I8 $in, (LI8 1), (LI8 0))>; +def : Pat<(i64 (sext i1:$in)), + (SELECT_I8 $in, (LI8 -1), (LI8 0))>; + +// FIXME: We should choose either a zext or a sext based on other constants +// already around. +def : Pat<(i32 (anyext i1:$in)), + (SELECT_I4 $in, (LI 1), (LI 0))>; +def : Pat<(i64 (anyext i1:$in)), + (SELECT_I8 $in, (LI8 1), (LI8 0))>; + +// match setcc on i1 variables. +// CRANDC is: +// 1 1 : F +// 1 0 : T +// 0 1 : F +// 0 0 : F +// +// LT is: +// -1 -1 : F +// -1 0 : T +// 0 -1 : F +// 0 0 : F +// +// ULT is: +// 1 1 : F +// 1 0 : F +// 0 1 : T +// 0 0 : F +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLT)), + (CRANDC $s1, $s2)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULT)), + (CRANDC $s2, $s1)>; +// CRORC is: +// 1 1 : T +// 1 0 : T +// 0 1 : F +// 0 0 : T +// +// LE is: +// -1 -1 : T +// -1 0 : T +// 0 -1 : F +// 0 0 : T +// +// ULE is: +// 1 1 : T +// 1 0 : F +// 0 1 : T +// 0 0 : T +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLE)), + (CRORC $s1, $s2)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULE)), + (CRORC $s2, $s1)>; + +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETEQ)), + (CREQV $s1, $s2)>; + +// GE is: +// -1 -1 : T +// -1 0 : F +// 0 -1 : T +// 0 0 : T +// +// UGE is: +// 1 1 : T +// 1 0 : T +// 0 1 : F +// 0 0 : T +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGE)), + (CRORC $s2, $s1)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGE)), + (CRORC $s1, $s2)>; + +// GT is: +// -1 -1 : F +// -1 0 : F +// 0 -1 : T +// 0 0 : F +// +// UGT is: +// 1 1 : F +// 1 0 : T +// 0 1 : F +// 0 0 : F +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGT)), + (CRANDC $s2, $s1)>; +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGT)), + (CRANDC $s1, $s2)>; + +def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETNE)), + (CRXOR $s1, $s2)>; + +// match setcc on non-i1 (non-vector) variables. Note that SETUEQ, SETOGE, +// SETOLE, SETONE, SETULT and SETUGT should be expanded by legalize for +// floating-point types. + +multiclass CRNotPat<dag pattern, dag result> { + def : Pat<pattern, (crnot result)>; + def : Pat<(not pattern), result>; + + // We can also fold the crnot into an extension: + def : Pat<(i32 (zext pattern)), + (SELECT_I4 result, (LI 0), (LI 1))>; + def : Pat<(i32 (sext pattern)), + (SELECT_I4 result, (LI 0), (LI -1))>; + + // We can also fold the crnot into an extension: + def : Pat<(i64 (zext pattern)), + (SELECT_I8 result, (LI8 0), (LI8 1))>; + def : Pat<(i64 (sext pattern)), + (SELECT_I8 result, (LI8 0), (LI8 -1))>; + + // FIXME: We should choose either a zext or a sext based on other constants + // already around. + def : Pat<(i32 (anyext pattern)), + (SELECT_I4 result, (LI 0), (LI 1))>; + + def : Pat<(i64 (anyext pattern)), + (SELECT_I8 result, (LI8 0), (LI8 1))>; +} + +// FIXME: Because of what seems like a bug in TableGen's type-inference code, +// we need to write imm:$imm in the output patterns below, not just $imm, or +// else the resulting matcher will not correctly add the immediate operand +// (making it a register operand instead). + +// extended SETCC. +multiclass ExtSetCCPat<CondCode cc, PatFrag pfrag, + OutPatFrag rfrag, OutPatFrag rfrag8> { + def : Pat<(i32 (zext (i1 (pfrag i32:$s1, cc)))), + (rfrag $s1)>; + def : Pat<(i64 (zext (i1 (pfrag i64:$s1, cc)))), + (rfrag8 $s1)>; + def : Pat<(i64 (zext (i1 (pfrag i32:$s1, cc)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>; + def : Pat<(i32 (zext (i1 (pfrag i64:$s1, cc)))), + (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>; + + def : Pat<(i32 (anyext (i1 (pfrag i32:$s1, cc)))), + (rfrag $s1)>; + def : Pat<(i64 (anyext (i1 (pfrag i64:$s1, cc)))), + (rfrag8 $s1)>; + def : Pat<(i64 (anyext (i1 (pfrag i32:$s1, cc)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>; + def : Pat<(i32 (anyext (i1 (pfrag i64:$s1, cc)))), + (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>; +} + +// Note that we do all inversions below with i(32|64)not, instead of using +// (xori x, 1) because on the A2 nor has single-cycle latency while xori +// has 2-cycle latency. + +defm : ExtSetCCPat<SETEQ, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (CNTLZW $in), 27, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (CNTLZD $in), 58, 63)> >; + +defm : ExtSetCCPat<SETNE, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (i32not (CNTLZW $in)), 27, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (i64not (CNTLZD $in)), 58, 63)> >; + +defm : ExtSetCCPat<SETLT, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM $in, 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL $in, 1, 63)> >; + +defm : ExtSetCCPat<SETGE, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (i32not $in), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (i64not $in), 1, 63)> >; + +defm : ExtSetCCPat<SETGT, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (ANDC (NEG $in), $in), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (ANDC8 (NEG8 $in), $in), 1, 63)> >; + +defm : ExtSetCCPat<SETLE, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, 0, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (ORC $in, (NEG $in)), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (ORC8 $in, (NEG8 $in)), 1, 63)> >; + +defm : ExtSetCCPat<SETLT, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, -1, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (AND $in, (ADDI $in, 1)), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (AND8 $in, (ADDI8 $in, 1)), 1, 63)> >; + +defm : ExtSetCCPat<SETGE, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, -1, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (NAND $in, (ADDI $in, 1)), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (NAND8 $in, (ADDI8 $in, 1)), 1, 63)> >; + +defm : ExtSetCCPat<SETGT, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, -1, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM (i32not $in), 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL (i64not $in), 1, 63)> >; + +defm : ExtSetCCPat<SETLE, + PatFrag<(ops node:$in, node:$cc), + (setcc $in, -1, $cc)>, + OutPatFrag<(ops node:$in), + (RLWINM $in, 1, 31, 31)>, + OutPatFrag<(ops node:$in), + (RLDICL $in, 1, 63)> >; + +// An extended SETCC with shift amount. +multiclass ExtSetCCShiftPat<CondCode cc, PatFrag pfrag, + OutPatFrag rfrag, OutPatFrag rfrag8> { + def : Pat<(i32 (zext (i1 (pfrag i32:$s1, i32:$sa, cc)))), + (rfrag $s1, $sa)>; + def : Pat<(i64 (zext (i1 (pfrag i64:$s1, i32:$sa, cc)))), + (rfrag8 $s1, $sa)>; + def : Pat<(i64 (zext (i1 (pfrag i32:$s1, i32:$sa, cc)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1, $sa), sub_32)>; + def : Pat<(i32 (zext (i1 (pfrag i64:$s1, i32:$sa, cc)))), + (EXTRACT_SUBREG (rfrag8 $s1, $sa), sub_32)>; + + def : Pat<(i32 (anyext (i1 (pfrag i32:$s1, i32:$sa, cc)))), + (rfrag $s1, $sa)>; + def : Pat<(i64 (anyext (i1 (pfrag i64:$s1, i32:$sa, cc)))), + (rfrag8 $s1, $sa)>; + def : Pat<(i64 (anyext (i1 (pfrag i32:$s1, i32:$sa, cc)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1, $sa), sub_32)>; + def : Pat<(i32 (anyext (i1 (pfrag i64:$s1, i32:$sa, cc)))), + (EXTRACT_SUBREG (rfrag8 $s1, $sa), sub_32)>; +} + +defm : ExtSetCCShiftPat<SETNE, + PatFrag<(ops node:$in, node:$sa, node:$cc), + (setcc (and $in, (shl 1, $sa)), 0, $cc)>, + OutPatFrag<(ops node:$in, node:$sa), + (RLWNM $in, (SUBFIC $sa, 32), 31, 31)>, + OutPatFrag<(ops node:$in, node:$sa), + (RLDCL $in, (SUBFIC $sa, 64), 63)> >; + +defm : ExtSetCCShiftPat<SETEQ, + PatFrag<(ops node:$in, node:$sa, node:$cc), + (setcc (and $in, (shl 1, $sa)), 0, $cc)>, + OutPatFrag<(ops node:$in, node:$sa), + (RLWNM (i32not $in), + (SUBFIC $sa, 32), 31, 31)>, + OutPatFrag<(ops node:$in, node:$sa), + (RLDCL (i64not $in), + (SUBFIC $sa, 64), 63)> >; + +// SETCC for i32. +def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULT)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; +def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLT)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; +def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGT)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; +def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGT)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; +def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; +def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; + +// For non-equality comparisons, the default code would materialize the +// constant, then compare against it, like this: +// lis r2, 4660 +// ori r2, r2, 22136 +// cmpw cr0, r3, r2 +// beq cr0,L6 +// Since we are just comparing for equality, we can emit this instead: +// xoris r0,r3,0x1234 +// cmplwi cr0,r0,0x5678 +// beq cr0,L6 + +def : Pat<(i1 (setcc i32:$s1, imm:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETULT)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETLT)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETUGT)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETGT)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETEQ)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; + +// SETCC for i64. +def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULT)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLT)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGT)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGT)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>; +def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>; + +// For non-equality comparisons, the default code would materialize the +// constant, then compare against it, like this: +// lis r2, 4660 +// ori r2, r2, 22136 +// cmpd cr0, r3, r2 +// beq cr0,L6 +// Since we are just comparing for equality, we can emit this instead: +// xoris r0,r3,0x1234 +// cmpldi cr0,r0,0x5678 +// beq cr0,L6 + +def : Pat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETEQ)), + (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGE)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGE)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULE)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLE)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>; +defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETUGE)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETGE)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETULE)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; + +// SETCC for f32. +let Predicates = [HasFPU] in { +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETLT)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOGT)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETGT)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOEQ)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; + +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; + +// SETCC for f64. +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETLT)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOGT)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETGT)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOEQ)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; + +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; + +// SETCC for f128. +def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOLT)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETLT)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOGT)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETGT)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOEQ)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETEQ)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; +def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETUO)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; + +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUGE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETGE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETULE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETLE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUNE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETNE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETO)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; + +} + +// This must be in this file because it relies on patterns defined in this file +// after the inclusion of the instruction sets. +let Predicates = [HasSPE] in { +// SETCC for f32. +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)), + (EXTRACT_SUBREG (EFSCMPLT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETLT)), + (EXTRACT_SUBREG (EFSCMPLT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOGT)), + (EXTRACT_SUBREG (EFSCMPGT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETGT)), + (EXTRACT_SUBREG (EFSCMPGT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOEQ)), + (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)), + (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_gt)>; + +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), + (EXTRACT_SUBREG (EFSCMPLT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), + (EXTRACT_SUBREG (EFSCMPLT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), + (EXTRACT_SUBREG (EFSCMPGT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), + (EXTRACT_SUBREG (EFSCMPGT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), + (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), + (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_gt)>; + +// SETCC for f64. +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)), + (EXTRACT_SUBREG (EFDCMPLT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETLT)), + (EXTRACT_SUBREG (EFDCMPLT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOGT)), + (EXTRACT_SUBREG (EFDCMPGT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETGT)), + (EXTRACT_SUBREG (EFDCMPGT $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOEQ)), + (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)), + (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_gt)>; + +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), + (EXTRACT_SUBREG (EFDCMPLT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), + (EXTRACT_SUBREG (EFDCMPLT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), + (EXTRACT_SUBREG (EFDCMPGT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), + (EXTRACT_SUBREG (EFDCMPGT $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), + (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), + (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_gt)>; +} +// match select on i1 variables: +def : Pat<(i1 (select i1:$cond, i1:$tval, i1:$fval)), + (CROR (CRAND $cond , $tval), + (CRAND (crnot $cond), $fval))>; + +// match selectcc on i1 variables: +// select (lhs == rhs), tval, fval is: +// ((lhs == rhs) & tval) | (!(lhs == rhs) & fval) +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLT)), + (CROR (CRAND (CRANDC $lhs, $rhs), $tval), + (CRAND (CRORC $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULT)), + (CROR (CRAND (CRANDC $rhs, $lhs), $tval), + (CRAND (CRORC $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLE)), + (CROR (CRAND (CRORC $lhs, $rhs), $tval), + (CRAND (CRANDC $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULE)), + (CROR (CRAND (CRORC $rhs, $lhs), $tval), + (CRAND (CRANDC $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETEQ)), + (CROR (CRAND (CREQV $lhs, $rhs), $tval), + (CRAND (CRXOR $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGE)), + (CROR (CRAND (CRORC $rhs, $lhs), $tval), + (CRAND (CRANDC $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGE)), + (CROR (CRAND (CRORC $lhs, $rhs), $tval), + (CRAND (CRANDC $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGT)), + (CROR (CRAND (CRANDC $rhs, $lhs), $tval), + (CRAND (CRORC $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGT)), + (CROR (CRAND (CRANDC $lhs, $rhs), $tval), + (CRAND (CRORC $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)), + (CROR (CRAND (CREQV $lhs, $rhs), $fval), + (CRAND (CRXOR $lhs, $rhs), $tval))>; + +// match selectcc on i1 variables with non-i1 output. +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLT)), + (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULT)), + (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLE)), + (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULE)), + (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETEQ)), + (SELECT_I4 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGE)), + (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGE)), + (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGT)), + (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGT)), + (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETNE)), + (SELECT_I4 (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLT)), + (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULT)), + (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLE)), + (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULE)), + (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETEQ)), + (SELECT_I8 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGE)), + (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGE)), + (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGT)), + (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGT)), + (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETNE)), + (SELECT_I8 (CRXOR $lhs, $rhs), $tval, $fval)>; + +let Predicates = [HasFPU] in { +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), + (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)), + (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), + (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)), + (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), + (SELECT_F4 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), + (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)), + (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), + (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)), + (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), + (SELECT_F4 (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), + (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)), + (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), + (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)), + (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), + (SELECT_F8 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), + (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)), + (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), + (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)), + (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), + (SELECT_F8 (CRXOR $lhs, $rhs), $tval, $fval)>; +} + +def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETLT)), + (SELECT_F16 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETULT)), + (SELECT_F16 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETLE)), + (SELECT_F16 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETULE)), + (SELECT_F16 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETEQ)), + (SELECT_F16 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETGE)), + (SELECT_F16 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETUGE)), + (SELECT_F16 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETGT)), + (SELECT_F16 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETUGT)), + (SELECT_F16 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETNE)), + (SELECT_F16 (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLT)), + (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULT)), + (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLE)), + (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULE)), + (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETEQ)), + (SELECT_VRRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGE)), + (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGE)), + (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGT)), + (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGT)), + (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)), + (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +let usesCustomInserter = 1 in { +def ANDIo_1_EQ_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in), + "#ANDIo_1_EQ_BIT", + [(set i1:$dst, (trunc (not i32:$in)))]>; +def ANDIo_1_GT_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in), + "#ANDIo_1_GT_BIT", + [(set i1:$dst, (trunc i32:$in))]>; + +def ANDIo_1_EQ_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in), + "#ANDIo_1_EQ_BIT8", + [(set i1:$dst, (trunc (not i64:$in)))]>; +def ANDIo_1_GT_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in), + "#ANDIo_1_GT_BIT8", + [(set i1:$dst, (trunc i64:$in))]>; +} + +def : Pat<(i1 (not (trunc i32:$in))), + (ANDIo_1_EQ_BIT $in)>; +def : Pat<(i1 (not (trunc i64:$in))), + (ANDIo_1_EQ_BIT8 $in)>; + +//===----------------------------------------------------------------------===// +// PowerPC Instructions used for assembler/disassembler only +// + +// FIXME: For B=0 or B > 8, the registers following RT are used. +// WARNING: Do not add patterns for this instruction without fixing this. +def LSWI : XForm_base_r3xo_memOp<31, 597, (outs gprc:$RT), + (ins gprc:$A, u5imm:$B), + "lswi $RT, $A, $B", IIC_LdStLoad, []>; + +// FIXME: For B=0 or B > 8, the registers following RT are used. +// WARNING: Do not add patterns for this instruction without fixing this. +def STSWI : XForm_base_r3xo_memOp<31, 725, (outs), + (ins gprc:$RT, gprc:$A, u5imm:$B), + "stswi $RT, $A, $B", IIC_LdStLoad, []>; + +def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins), + "isync", IIC_SprISYNC, []>; + +def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src), + "icbi $src", IIC_LdStICBI, []>; + +// We used to have EIEIO as value but E[0-9A-Z] is a reserved name +def EnforceIEIO : XForm_24_eieio<31, 854, (outs), (ins), + "eieio", IIC_LdStLoad, []>; + +def WAIT : XForm_24_sync<31, 30, (outs), (ins i32imm:$L), + "wait $L", IIC_LdStLoad, []>; + +def MBAR : XForm_mbar<31, 854, (outs), (ins u5imm:$MO), + "mbar $MO", IIC_LdStLoad>, Requires<[IsBookE]>; + +def MTSR: XForm_sr<31, 210, (outs), (ins gprc:$RS, u4imm:$SR), + "mtsr $SR, $RS", IIC_SprMTSR>; + +def MFSR: XForm_sr<31, 595, (outs gprc:$RS), (ins u4imm:$SR), + "mfsr $RS, $SR", IIC_SprMFSR>; + +def MTSRIN: XForm_srin<31, 242, (outs), (ins gprc:$RS, gprc:$RB), + "mtsrin $RS, $RB", IIC_SprMTSR>; + +def MFSRIN: XForm_srin<31, 659, (outs gprc:$RS), (ins gprc:$RB), + "mfsrin $RS, $RB", IIC_SprMFSR>; + +def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L), + "mtmsr $RS, $L", IIC_SprMTMSR>; + +def WRTEE: XForm_mtmsr<31, 131, (outs), (ins gprc:$RS), + "wrtee $RS", IIC_SprMTMSR>, Requires<[IsBookE]> { + let L = 0; +} + +def WRTEEI: I<31, (outs), (ins i1imm:$E), "wrteei $E", IIC_SprMTMSR>, + Requires<[IsBookE]> { + bits<1> E; + + let Inst{16} = E; + let Inst{21-30} = 163; +} + +def DCCCI : XForm_tlb<454, (outs), (ins gprc:$A, gprc:$B), + "dccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>; +def ICCCI : XForm_tlb<966, (outs), (ins gprc:$A, gprc:$B), + "iccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>; + +def : InstAlias<"dci 0", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>; +def : InstAlias<"dccci", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>; +def : InstAlias<"ici 0", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>; +def : InstAlias<"iccci", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>; + +def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins), + "mfmsr $RT", IIC_SprMFMSR, []>; + +def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L), + "mtmsrd $RS, $L", IIC_SprMTMSRD>; + +def MCRFS : XLForm_3<63, 64, (outs crrc:$BF), (ins crrc:$BFA), + "mcrfs $BF, $BFA", IIC_BrMCR>; + +def MTFSFI : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W), + "mtfsfi $BF, $U, $W", IIC_IntMFFS>; + +def MTFSFIo : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W), + "mtfsfi. $BF, $U, $W", IIC_IntMFFS>, isDOT; + +def : InstAlias<"mtfsfi $BF, $U", (MTFSFI crrc:$BF, i32imm:$U, 0)>; +def : InstAlias<"mtfsfi. $BF, $U", (MTFSFIo crrc:$BF, i32imm:$U, 0)>; + +let Predicates = [HasFPU] in { +def MTFSF : XFLForm_1<63, 711, (outs), + (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W), + "mtfsf $FLM, $FRB, $L, $W", IIC_IntMFFS, []>; +def MTFSFo : XFLForm_1<63, 711, (outs), + (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W), + "mtfsf. $FLM, $FRB, $L, $W", IIC_IntMFFS, []>, isDOT; + +def : InstAlias<"mtfsf $FLM, $FRB", (MTFSF i32imm:$FLM, f8rc:$FRB, 0, 0)>; +def : InstAlias<"mtfsf. $FLM, $FRB", (MTFSFo i32imm:$FLM, f8rc:$FRB, 0, 0)>; +} + +def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB), + "slbie $RB", IIC_SprSLBIE, []>; + +def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RS, gprc:$RB), + "slbmte $RS, $RB", IIC_SprSLBMTE, []>; + +def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB), + "slbmfee $RT, $RB", IIC_SprSLBMFEE, []>; + +def SLBMFEV : XLForm_1_gen<31, 851, (outs gprc:$RT), (ins gprc:$RB), + "slbmfev $RT, $RB", IIC_SprSLBMFEV, []>; + +def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>; + +def TLBIA : XForm_0<31, 370, (outs), (ins), + "tlbia", IIC_SprTLBIA, []>; + +def TLBSYNC : XForm_0<31, 566, (outs), (ins), + "tlbsync", IIC_SprTLBSYNC, []>; + +def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB), + "tlbiel $RB", IIC_SprTLBIEL, []>; + +def TLBLD : XForm_16b<31, 978, (outs), (ins gprc:$RB), + "tlbld $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>; +def TLBLI : XForm_16b<31, 1010, (outs), (ins gprc:$RB), + "tlbli $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>; + +def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB), + "tlbie $RB,$RS", IIC_SprTLBIE, []>; + +def TLBSX : XForm_tlb<914, (outs), (ins gprc:$A, gprc:$B), "tlbsx $A, $B", + IIC_LdStLoad>, Requires<[IsBookE]>; + +def TLBIVAX : XForm_tlb<786, (outs), (ins gprc:$A, gprc:$B), "tlbivax $A, $B", + IIC_LdStLoad>, Requires<[IsBookE]>; + +def TLBRE : XForm_24_eieio<31, 946, (outs), (ins), + "tlbre", IIC_LdStLoad, []>, Requires<[IsBookE]>; + +def TLBWE : XForm_24_eieio<31, 978, (outs), (ins), + "tlbwe", IIC_LdStLoad, []>, Requires<[IsBookE]>; + +def TLBRE2 : XForm_tlbws<31, 946, (outs gprc:$RS), (ins gprc:$A, i1imm:$WS), + "tlbre $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>; + +def TLBWE2 : XForm_tlbws<31, 978, (outs), (ins gprc:$RS, gprc:$A, i1imm:$WS), + "tlbwe $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>; + +def TLBSX2 : XForm_base_r3xo<31, 914, (outs), (ins gprc:$RST, gprc:$A, gprc:$B), + "tlbsx $RST, $A, $B", IIC_LdStLoad, []>, + Requires<[IsPPC4xx]>; +def TLBSX2D : XForm_base_r3xo<31, 914, (outs), + (ins gprc:$RST, gprc:$A, gprc:$B), + "tlbsx. $RST, $A, $B", IIC_LdStLoad, []>, + Requires<[IsPPC4xx]>, isDOT; + +def RFID : XForm_0<19, 18, (outs), (ins), "rfid", IIC_IntRFID, []>; + +def RFI : XForm_0<19, 50, (outs), (ins), "rfi", IIC_SprRFI, []>, + Requires<[IsBookE]>; +def RFCI : XForm_0<19, 51, (outs), (ins), "rfci", IIC_BrB, []>, + Requires<[IsBookE]>; + +def RFDI : XForm_0<19, 39, (outs), (ins), "rfdi", IIC_BrB, []>, + Requires<[IsE500]>; +def RFMCI : XForm_0<19, 38, (outs), (ins), "rfmci", IIC_BrB, []>, + Requires<[IsE500]>; + +def MFDCR : XFXForm_1<31, 323, (outs gprc:$RT), (ins i32imm:$SPR), + "mfdcr $RT, $SPR", IIC_SprMFSPR>, Requires<[IsPPC4xx]>; +def MTDCR : XFXForm_1<31, 451, (outs), (ins gprc:$RT, i32imm:$SPR), + "mtdcr $SPR, $RT", IIC_SprMTSPR>, Requires<[IsPPC4xx]>; + +def HRFID : XLForm_1_np<19, 274, (outs), (ins), "hrfid", IIC_BrB, []>; +def NAP : XLForm_1_np<19, 434, (outs), (ins), "nap", IIC_BrB, []>; + +def ATTN : XForm_attn<0, 256, (outs), (ins), "attn", IIC_BrB>; + +def LBZCIX : XForm_base_r3xo_memOp<31, 853, (outs gprc:$RST), + (ins gprc:$A, gprc:$B), + "lbzcix $RST, $A, $B", IIC_LdStLoad, []>; +def LHZCIX : XForm_base_r3xo_memOp<31, 821, (outs gprc:$RST), + (ins gprc:$A, gprc:$B), + "lhzcix $RST, $A, $B", IIC_LdStLoad, []>; +def LWZCIX : XForm_base_r3xo_memOp<31, 789, (outs gprc:$RST), + (ins gprc:$A, gprc:$B), + "lwzcix $RST, $A, $B", IIC_LdStLoad, []>; +def LDCIX : XForm_base_r3xo_memOp<31, 885, (outs gprc:$RST), + (ins gprc:$A, gprc:$B), + "ldcix $RST, $A, $B", IIC_LdStLoad, []>; + +def STBCIX : XForm_base_r3xo_memOp<31, 981, (outs), + (ins gprc:$RST, gprc:$A, gprc:$B), + "stbcix $RST, $A, $B", IIC_LdStLoad, []>; +def STHCIX : XForm_base_r3xo_memOp<31, 949, (outs), + (ins gprc:$RST, gprc:$A, gprc:$B), + "sthcix $RST, $A, $B", IIC_LdStLoad, []>; +def STWCIX : XForm_base_r3xo_memOp<31, 917, (outs), + (ins gprc:$RST, gprc:$A, gprc:$B), + "stwcix $RST, $A, $B", IIC_LdStLoad, []>; +def STDCIX : XForm_base_r3xo_memOp<31, 1013, (outs), + (ins gprc:$RST, gprc:$A, gprc:$B), + "stdcix $RST, $A, $B", IIC_LdStLoad, []>; + +// External PID Load Store Instructions + +def LBEPX : XForm_1<31, 95, (outs gprc:$rD), (ins memrr:$src), + "lbepx $rD, $src", IIC_LdStLoad, []>, + Requires<[IsE500]>; + +def LFDEPX : XForm_25<31, 607, (outs f8rc:$frD), (ins memrr:$src), + "lfdepx $frD, $src", IIC_LdStLFD, []>, + Requires<[IsE500]>; + +def LHEPX : XForm_1<31, 287, (outs gprc:$rD), (ins memrr:$src), + "lhepx $rD, $src", IIC_LdStLoad, []>, + Requires<[IsE500]>; + +def LWEPX : XForm_1<31, 31, (outs gprc:$rD), (ins memrr:$src), + "lwepx $rD, $src", IIC_LdStLoad, []>, + Requires<[IsE500]>; + +def STBEPX : XForm_8<31, 223, (outs), (ins gprc:$rS, memrr:$dst), + "stbepx $rS, $dst", IIC_LdStStore, []>, + Requires<[IsE500]>; + +def STFDEPX : XForm_28_memOp<31, 735, (outs), (ins f8rc:$frS, memrr:$dst), + "stfdepx $frS, $dst", IIC_LdStSTFD, []>, + Requires<[IsE500]>; + +def STHEPX : XForm_8<31, 415, (outs), (ins gprc:$rS, memrr:$dst), + "sthepx $rS, $dst", IIC_LdStStore, []>, + Requires<[IsE500]>; + +def STWEPX : XForm_8<31, 159, (outs), (ins gprc:$rS, memrr:$dst), + "stwepx $rS, $dst", IIC_LdStStore, []>, + Requires<[IsE500]>; + +def DCBFEP : DCB_Form<127, 0, (outs), (ins memrr:$dst), "dcbfep $dst", + IIC_LdStDCBF, []>, Requires<[IsE500]>; + +def DCBSTEP : DCB_Form<63, 0, (outs), (ins memrr:$dst), "dcbstep $dst", + IIC_LdStDCBF, []>, Requires<[IsE500]>; + +def DCBTEP : DCB_Form_hint<319, (outs), (ins memrr:$dst, u5imm:$TH), + "dcbtep $TH, $dst", IIC_LdStDCBF, []>, + Requires<[IsE500]>; + +def DCBTSTEP : DCB_Form_hint<255, (outs), (ins memrr:$dst, u5imm:$TH), + "dcbtstep $TH, $dst", IIC_LdStDCBF, []>, + Requires<[IsE500]>; + +def DCBZEP : DCB_Form<1023, 0, (outs), (ins memrr:$dst), "dcbzep $dst", + IIC_LdStDCBF, []>, Requires<[IsE500]>; + +def DCBZLEP : DCB_Form<1023, 1, (outs), (ins memrr:$dst), "dcbzlep $dst", + IIC_LdStDCBF, []>, Requires<[IsE500]>; + +def ICBIEP : XForm_1a<31, 991, (outs), (ins memrr:$src), "icbiep $src", + IIC_LdStICBI, []>, Requires<[IsE500]>; + +//===----------------------------------------------------------------------===// +// PowerPC Assembler Instruction Aliases +// + +// Pseudo-instructions for alternate assembly syntax (never used by codegen). +// These are aliases that require C++ handling to convert to the target +// instruction, while InstAliases can be handled directly by tblgen. +class PPCAsmPseudo<string asm, dag iops> + : Instruction { + let Namespace = "PPC"; + bit PPC64 = 0; // Default value, override with isPPC64 + + let OutOperandList = (outs); + let InOperandList = iops; + let Pattern = []; + let AsmString = asm; + let isAsmParserOnly = 1; + let isPseudo = 1; + let hasNoSchedulingInfo = 1; +} + +def : InstAlias<"sc", (SC 0)>; + +def : InstAlias<"sync", (SYNC 0)>, Requires<[HasSYNC]>; +def : InstAlias<"msync", (SYNC 0), 0>, Requires<[HasSYNC]>; +def : InstAlias<"lwsync", (SYNC 1)>, Requires<[HasSYNC]>; +def : InstAlias<"ptesync", (SYNC 2)>, Requires<[HasSYNC]>; + +def : InstAlias<"wait", (WAIT 0)>; +def : InstAlias<"waitrsv", (WAIT 1)>; +def : InstAlias<"waitimpl", (WAIT 2)>; + +def : InstAlias<"mbar", (MBAR 0)>, Requires<[IsBookE]>; + +def DCBTx : PPCAsmPseudo<"dcbt $dst", (ins memrr:$dst)>; +def DCBTSTx : PPCAsmPseudo<"dcbtst $dst", (ins memrr:$dst)>; + +def DCBTCT : PPCAsmPseudo<"dcbtct $dst, $TH", (ins memrr:$dst, u5imm:$TH)>; +def DCBTDS : PPCAsmPseudo<"dcbtds $dst, $TH", (ins memrr:$dst, u5imm:$TH)>; +def DCBTT : PPCAsmPseudo<"dcbtt $dst", (ins memrr:$dst)>; + +def DCBTSTCT : PPCAsmPseudo<"dcbtstct $dst, $TH", (ins memrr:$dst, u5imm:$TH)>; +def DCBTSTDS : PPCAsmPseudo<"dcbtstds $dst, $TH", (ins memrr:$dst, u5imm:$TH)>; +def DCBTSTT : PPCAsmPseudo<"dcbtstt $dst", (ins memrr:$dst)>; + +def DCBFx : PPCAsmPseudo<"dcbf $dst", (ins memrr:$dst)>; +def DCBFL : PPCAsmPseudo<"dcbfl $dst", (ins memrr:$dst)>; +def DCBFLP : PPCAsmPseudo<"dcbflp $dst", (ins memrr:$dst)>; + +def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>; +def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>; +def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>; +def : InstAlias<"crnot $bx, $by", (CRNOR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>; + +def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>; +def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>; + +def : InstAlias<"mfrtcu $Rx", (MFSPR gprc:$Rx, 4)>; +def : InstAlias<"mfrtcl $Rx", (MFSPR gprc:$Rx, 5)>; + +def : InstAlias<"mtdscr $Rx", (MTSPR 17, gprc:$Rx)>; +def : InstAlias<"mfdscr $Rx", (MFSPR gprc:$Rx, 17)>; + +def : InstAlias<"mtdsisr $Rx", (MTSPR 18, gprc:$Rx)>; +def : InstAlias<"mfdsisr $Rx", (MFSPR gprc:$Rx, 18)>; + +def : InstAlias<"mtdar $Rx", (MTSPR 19, gprc:$Rx)>; +def : InstAlias<"mfdar $Rx", (MFSPR gprc:$Rx, 19)>; + +def : InstAlias<"mtdec $Rx", (MTSPR 22, gprc:$Rx)>; +def : InstAlias<"mfdec $Rx", (MFSPR gprc:$Rx, 22)>; + +def : InstAlias<"mtsdr1 $Rx", (MTSPR 25, gprc:$Rx)>; +def : InstAlias<"mfsdr1 $Rx", (MFSPR gprc:$Rx, 25)>; + +def : InstAlias<"mtsrr0 $Rx", (MTSPR 26, gprc:$Rx)>; +def : InstAlias<"mfsrr0 $Rx", (MFSPR gprc:$Rx, 26)>; + +def : InstAlias<"mtsrr1 $Rx", (MTSPR 27, gprc:$Rx)>; +def : InstAlias<"mfsrr1 $Rx", (MFSPR gprc:$Rx, 27)>; + +def : InstAlias<"mtsrr2 $Rx", (MTSPR 990, gprc:$Rx)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mfsrr2 $Rx", (MFSPR gprc:$Rx, 990)>, Requires<[IsPPC4xx]>; + +def : InstAlias<"mtsrr3 $Rx", (MTSPR 991, gprc:$Rx)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mfsrr3 $Rx", (MFSPR gprc:$Rx, 991)>, Requires<[IsPPC4xx]>; + +def : InstAlias<"mtcfar $Rx", (MTSPR 28, gprc:$Rx)>; +def : InstAlias<"mfcfar $Rx", (MFSPR gprc:$Rx, 28)>; + +def : InstAlias<"mtamr $Rx", (MTSPR 29, gprc:$Rx)>; +def : InstAlias<"mfamr $Rx", (MFSPR gprc:$Rx, 29)>; + +def : InstAlias<"mtpid $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsBookE]>; +def : InstAlias<"mfpid $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsBookE]>; + +def : InstAlias<"mftb $Rx", (MFTB gprc:$Rx, 268)>; +def : InstAlias<"mftbl $Rx", (MFTB gprc:$Rx, 268)>; +def : InstAlias<"mftbu $Rx", (MFTB gprc:$Rx, 269)>; + +def : InstAlias<"mttbl $Rx", (MTSPR 284, gprc:$Rx)>; +def : InstAlias<"mttbu $Rx", (MTSPR 285, gprc:$Rx)>; + +def : InstAlias<"mftblo $Rx", (MFSPR gprc:$Rx, 989)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mttblo $Rx", (MTSPR 989, gprc:$Rx)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mftbhi $Rx", (MFSPR gprc:$Rx, 988)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mttbhi $Rx", (MTSPR 988, gprc:$Rx)>, Requires<[IsPPC4xx]>; + +def : InstAlias<"xnop", (XORI R0, R0, 0)>; + +def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>; +def : InstAlias<"mr. $rA, $rB", (OR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>; + +def : InstAlias<"not $rA, $rB", (NOR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>; +def : InstAlias<"not. $rA, $rB", (NOR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>; + +def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>; + +foreach BATR = 0-3 in { + def : InstAlias<"mtdbatu "#BATR#", $Rx", + (MTSPR !add(BATR, !add(BATR, 536)), gprc:$Rx)>, + Requires<[IsPPC6xx]>; + def : InstAlias<"mfdbatu $Rx, "#BATR, + (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 536)))>, + Requires<[IsPPC6xx]>; + def : InstAlias<"mtdbatl "#BATR#", $Rx", + (MTSPR !add(BATR, !add(BATR, 537)), gprc:$Rx)>, + Requires<[IsPPC6xx]>; + def : InstAlias<"mfdbatl $Rx, "#BATR, + (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 537)))>, + Requires<[IsPPC6xx]>; + def : InstAlias<"mtibatu "#BATR#", $Rx", + (MTSPR !add(BATR, !add(BATR, 528)), gprc:$Rx)>, + Requires<[IsPPC6xx]>; + def : InstAlias<"mfibatu $Rx, "#BATR, + (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 528)))>, + Requires<[IsPPC6xx]>; + def : InstAlias<"mtibatl "#BATR#", $Rx", + (MTSPR !add(BATR, !add(BATR, 529)), gprc:$Rx)>, + Requires<[IsPPC6xx]>; + def : InstAlias<"mfibatl $Rx, "#BATR, + (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 529)))>, + Requires<[IsPPC6xx]>; +} + +foreach BR = 0-7 in { + def : InstAlias<"mfbr"#BR#" $Rx", + (MFDCR gprc:$Rx, !add(BR, 0x80))>, + Requires<[IsPPC4xx]>; + def : InstAlias<"mtbr"#BR#" $Rx", + (MTDCR gprc:$Rx, !add(BR, 0x80))>, + Requires<[IsPPC4xx]>; +} + +def : InstAlias<"mtdccr $Rx", (MTSPR 1018, gprc:$Rx)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mfdccr $Rx", (MFSPR gprc:$Rx, 1018)>, Requires<[IsPPC4xx]>; + +def : InstAlias<"mticcr $Rx", (MTSPR 1019, gprc:$Rx)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mficcr $Rx", (MFSPR gprc:$Rx, 1019)>, Requires<[IsPPC4xx]>; + +def : InstAlias<"mtdear $Rx", (MTSPR 981, gprc:$Rx)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mfdear $Rx", (MFSPR gprc:$Rx, 981)>, Requires<[IsPPC4xx]>; + +def : InstAlias<"mtesr $Rx", (MTSPR 980, gprc:$Rx)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mfesr $Rx", (MFSPR gprc:$Rx, 980)>, Requires<[IsPPC4xx]>; + +def : InstAlias<"mfspefscr $Rx", (MFSPR gprc:$Rx, 512)>; +def : InstAlias<"mtspefscr $Rx", (MTSPR 512, gprc:$Rx)>; + +def : InstAlias<"mttcr $Rx", (MTSPR 986, gprc:$Rx)>, Requires<[IsPPC4xx]>; +def : InstAlias<"mftcr $Rx", (MFSPR gprc:$Rx, 986)>, Requires<[IsPPC4xx]>; + +def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>; + +def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm", + (ins gprc:$rA, gprc:$rB, s16imm:$imm)>; +def SUBIS : PPCAsmPseudo<"subis $rA, $rB, $imm", + (ins gprc:$rA, gprc:$rB, s16imm:$imm)>; +def SUBIC : PPCAsmPseudo<"subic $rA, $rB, $imm", + (ins gprc:$rA, gprc:$rB, s16imm:$imm)>; +def SUBICo : PPCAsmPseudo<"subic. $rA, $rB, $imm", + (ins gprc:$rA, gprc:$rB, s16imm:$imm)>; + +def : InstAlias<"sub $rA, $rB, $rC", (SUBF8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>; +def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>; +def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>; +def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>; + +def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>; +def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>; + +def : InstAlias<"mfasr $RT", (MFSPR gprc:$RT, 280)>; +def : InstAlias<"mtasr $RT", (MTSPR 280, gprc:$RT)>; + +foreach SPRG = 0-3 in { + def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 272))>; + def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 272))>; + def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>; + def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>; +} +foreach SPRG = 4-7 in { + def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 256))>, + Requires<[IsBookE]>; + def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 256))>, + Requires<[IsBookE]>; + def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>, + Requires<[IsBookE]>; + def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>, + Requires<[IsBookE]>; +} + +def : InstAlias<"mtasr $RS", (MTSPR 280, gprc:$RS)>; + +def : InstAlias<"mfdec $RT", (MFSPR gprc:$RT, 22)>; +def : InstAlias<"mtdec $RT", (MTSPR 22, gprc:$RT)>; + +def : InstAlias<"mfpvr $RT", (MFSPR gprc:$RT, 287)>; + +def : InstAlias<"mfsdr1 $RT", (MFSPR gprc:$RT, 25)>; +def : InstAlias<"mtsdr1 $RT", (MTSPR 25, gprc:$RT)>; + +def : InstAlias<"mfsrr0 $RT", (MFSPR gprc:$RT, 26)>; +def : InstAlias<"mfsrr1 $RT", (MFSPR gprc:$RT, 27)>; +def : InstAlias<"mtsrr0 $RT", (MTSPR 26, gprc:$RT)>; +def : InstAlias<"mtsrr1 $RT", (MTSPR 27, gprc:$RT)>; + +def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>; + +def : InstAlias<"tlbrehi $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 0)>, + Requires<[IsPPC4xx]>; +def : InstAlias<"tlbrelo $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 1)>, + Requires<[IsPPC4xx]>; +def : InstAlias<"tlbwehi $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 0)>, + Requires<[IsPPC4xx]>; +def : InstAlias<"tlbwelo $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 1)>, + Requires<[IsPPC4xx]>; + +def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b", + (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; +def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b", + (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; +def EXTRWI : PPCAsmPseudo<"extrwi $rA, $rS, $n, $b", + (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; +def EXTRWIo : PPCAsmPseudo<"extrwi. $rA, $rS, $n, $b", + (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; +def INSLWI : PPCAsmPseudo<"inslwi $rA, $rS, $n, $b", + (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; +def INSLWIo : PPCAsmPseudo<"inslwi. $rA, $rS, $n, $b", + (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; +def INSRWI : PPCAsmPseudo<"insrwi $rA, $rS, $n, $b", + (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; +def INSRWIo : PPCAsmPseudo<"insrwi. $rA, $rS, $n, $b", + (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; +def ROTRWI : PPCAsmPseudo<"rotrwi $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def ROTRWIo : PPCAsmPseudo<"rotrwi. $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def SLWIo : PPCAsmPseudo<"slwi. $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def SRWI : PPCAsmPseudo<"srwi $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def SRWIo : PPCAsmPseudo<"srwi. $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def CLRRWI : PPCAsmPseudo<"clrrwi $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def CLRRWIo : PPCAsmPseudo<"clrrwi. $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def CLRLSLWI : PPCAsmPseudo<"clrlslwi $rA, $rS, $b, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>; +def CLRLSLWIo : PPCAsmPseudo<"clrlslwi. $rA, $rS, $b, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>; + +def : InstAlias<"rotlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>; +def : InstAlias<"rotlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>; +def : InstAlias<"rotlw $rA, $rS, $rB", (RLWNM gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>; +def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNMo gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>; +def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>; +def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>; + +def : InstAlias<"cntlzw $rA, $rS", (CNTLZW gprc:$rA, gprc:$rS)>; +def : InstAlias<"cntlzw. $rA, $rS", (CNTLZWo gprc:$rA, gprc:$rS)>; +// The POWER variant +def : MnemonicAlias<"cntlz", "cntlzw">; +def : MnemonicAlias<"cntlz.", "cntlzw.">; + +def EXTLDI : PPCAsmPseudo<"extldi $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; +def EXTLDIo : PPCAsmPseudo<"extldi. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; +def EXTRDI : PPCAsmPseudo<"extrdi $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; +def EXTRDIo : PPCAsmPseudo<"extrdi. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; +def INSRDI : PPCAsmPseudo<"insrdi $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; +def INSRDIo : PPCAsmPseudo<"insrdi. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; +def ROTRDI : PPCAsmPseudo<"rotrdi $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; +def ROTRDIo : PPCAsmPseudo<"rotrdi. $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; +def SLDI : PPCAsmPseudo<"sldi $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; +def SLDIo : PPCAsmPseudo<"sldi. $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; +def SRDI : PPCAsmPseudo<"srdi $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; +def SRDIo : PPCAsmPseudo<"srdi. $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; +def CLRRDI : PPCAsmPseudo<"clrrdi $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; +def CLRRDIo : PPCAsmPseudo<"clrrdi. $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; +def CLRLSLDI : PPCAsmPseudo<"clrlsldi $rA, $rS, $b, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>; +def CLRLSLDIo : PPCAsmPseudo<"clrlsldi. $rA, $rS, $b, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>; +def SUBPCIS : PPCAsmPseudo<"subpcis $RT, $D", (ins g8rc:$RT, s16imm:$D)>; + +def : InstAlias<"rotldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>; +def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>; +def : InstAlias<"rotld $rA, $rS, $rB", (RLDCL g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>; +def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>; +def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>; +def : InstAlias<"clrldi $rA, $rS, $n", + (RLDICL_32_64 g8rc:$rA, gprc:$rS, 0, u6imm:$n)>; +def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>; +def : InstAlias<"lnia $RT", (ADDPCIS g8rc:$RT, 0)>; + +def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWINMobm : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWIMIbm : PPCAsmPseudo<"rlwimi $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWIMIobm : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWNMbm : PPCAsmPseudo<"rlwnm $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; +def RLWNMobm : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b", + (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; + +// These generic branch instruction forms are used for the assembler parser only. +// Defs and Uses are conservative, since we don't know the BO value. +let PPC970_Unit = 7, isBranch = 1 in { + let Defs = [CTR], Uses = [CTR, RM] in { + def gBC : BForm_3<16, 0, 0, (outs), + (ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst), + "bc $bo, $bi, $dst">; + def gBCA : BForm_3<16, 1, 0, (outs), + (ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst), + "bca $bo, $bi, $dst">; + let isAsmParserOnly = 1 in { + def gBCat : BForm_3_at<16, 0, 0, (outs), + (ins u5imm:$bo, atimm:$at, crbitrc:$bi, + condbrtarget:$dst), + "bc$at $bo, $bi, $dst">; + def gBCAat : BForm_3_at<16, 1, 0, (outs), + (ins u5imm:$bo, atimm:$at, crbitrc:$bi, + abscondbrtarget:$dst), + "bca$at $bo, $bi, $dst">; + } // isAsmParserOnly = 1 + } + let Defs = [LR, CTR], Uses = [CTR, RM] in { + def gBCL : BForm_3<16, 0, 1, (outs), + (ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst), + "bcl $bo, $bi, $dst">; + def gBCLA : BForm_3<16, 1, 1, (outs), + (ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst), + "bcla $bo, $bi, $dst">; + let isAsmParserOnly = 1 in { + def gBCLat : BForm_3_at<16, 0, 1, (outs), + (ins u5imm:$bo, atimm:$at, crbitrc:$bi, + condbrtarget:$dst), + "bcl$at $bo, $bi, $dst">; + def gBCLAat : BForm_3_at<16, 1, 1, (outs), + (ins u5imm:$bo, atimm:$at, crbitrc:$bi, + abscondbrtarget:$dst), + "bcla$at $bo, $bi, $dst">; + } // // isAsmParserOnly = 1 + } + let Defs = [CTR], Uses = [CTR, LR, RM] in + def gBCLR : XLForm_2<19, 16, 0, (outs), + (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), + "bclr $bo, $bi, $bh", IIC_BrB, []>; + let Defs = [LR, CTR], Uses = [CTR, LR, RM] in + def gBCLRL : XLForm_2<19, 16, 1, (outs), + (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), + "bclrl $bo, $bi, $bh", IIC_BrB, []>; + let Defs = [CTR], Uses = [CTR, LR, RM] in + def gBCCTR : XLForm_2<19, 528, 0, (outs), + (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), + "bcctr $bo, $bi, $bh", IIC_BrB, []>; + let Defs = [LR, CTR], Uses = [CTR, LR, RM] in + def gBCCTRL : XLForm_2<19, 528, 1, (outs), + (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), + "bcctrl $bo, $bi, $bh", IIC_BrB, []>; +} + +multiclass BranchSimpleMnemonicAT<string pm, int at> { + def : InstAlias<"bc"#pm#" $bo, $bi, $dst", (gBCat u5imm:$bo, at, crbitrc:$bi, + condbrtarget:$dst)>; + def : InstAlias<"bca"#pm#" $bo, $bi, $dst", (gBCAat u5imm:$bo, at, crbitrc:$bi, + condbrtarget:$dst)>; + def : InstAlias<"bcl"#pm#" $bo, $bi, $dst", (gBCLat u5imm:$bo, at, crbitrc:$bi, + condbrtarget:$dst)>; + def : InstAlias<"bcla"#pm#" $bo, $bi, $dst", (gBCLAat u5imm:$bo, at, crbitrc:$bi, + condbrtarget:$dst)>; +} +defm : BranchSimpleMnemonicAT<"+", 3>; +defm : BranchSimpleMnemonicAT<"-", 2>; + +def : InstAlias<"bclr $bo, $bi", (gBCLR u5imm:$bo, crbitrc:$bi, 0)>; +def : InstAlias<"bclrl $bo, $bi", (gBCLRL u5imm:$bo, crbitrc:$bi, 0)>; +def : InstAlias<"bcctr $bo, $bi", (gBCCTR u5imm:$bo, crbitrc:$bi, 0)>; +def : InstAlias<"bcctrl $bo, $bi", (gBCCTRL u5imm:$bo, crbitrc:$bi, 0)>; + +multiclass BranchSimpleMnemonic1<string name, string pm, int bo> { + def : InstAlias<"b"#name#pm#" $bi, $dst", (gBC bo, crbitrc:$bi, condbrtarget:$dst)>; + def : InstAlias<"b"#name#"a"#pm#" $bi, $dst", (gBCA bo, crbitrc:$bi, abscondbrtarget:$dst)>; + def : InstAlias<"b"#name#"lr"#pm#" $bi", (gBCLR bo, crbitrc:$bi, 0)>; + def : InstAlias<"b"#name#"l"#pm#" $bi, $dst", (gBCL bo, crbitrc:$bi, condbrtarget:$dst)>; + def : InstAlias<"b"#name#"la"#pm#" $bi, $dst", (gBCLA bo, crbitrc:$bi, abscondbrtarget:$dst)>; + def : InstAlias<"b"#name#"lrl"#pm#" $bi", (gBCLRL bo, crbitrc:$bi, 0)>; +} +multiclass BranchSimpleMnemonic2<string name, string pm, int bo> + : BranchSimpleMnemonic1<name, pm, bo> { + def : InstAlias<"b"#name#"ctr"#pm#" $bi", (gBCCTR bo, crbitrc:$bi, 0)>; + def : InstAlias<"b"#name#"ctrl"#pm#" $bi", (gBCCTRL bo, crbitrc:$bi, 0)>; +} +defm : BranchSimpleMnemonic2<"t", "", 12>; +defm : BranchSimpleMnemonic2<"f", "", 4>; +defm : BranchSimpleMnemonic2<"t", "-", 14>; +defm : BranchSimpleMnemonic2<"f", "-", 6>; +defm : BranchSimpleMnemonic2<"t", "+", 15>; +defm : BranchSimpleMnemonic2<"f", "+", 7>; +defm : BranchSimpleMnemonic1<"dnzt", "", 8>; +defm : BranchSimpleMnemonic1<"dnzf", "", 0>; +defm : BranchSimpleMnemonic1<"dzt", "", 10>; +defm : BranchSimpleMnemonic1<"dzf", "", 2>; + +multiclass BranchExtendedMnemonicPM<string name, string pm, int bibo> { + def : InstAlias<"b"#name#pm#" $cc, $dst", + (BCC bibo, crrc:$cc, condbrtarget:$dst)>; + def : InstAlias<"b"#name#pm#" $dst", + (BCC bibo, CR0, condbrtarget:$dst)>; + + def : InstAlias<"b"#name#"a"#pm#" $cc, $dst", + (BCCA bibo, crrc:$cc, abscondbrtarget:$dst)>; + def : InstAlias<"b"#name#"a"#pm#" $dst", + (BCCA bibo, CR0, abscondbrtarget:$dst)>; + + def : InstAlias<"b"#name#"lr"#pm#" $cc", + (BCCLR bibo, crrc:$cc)>; + def : InstAlias<"b"#name#"lr"#pm, + (BCCLR bibo, CR0)>; + + def : InstAlias<"b"#name#"ctr"#pm#" $cc", + (BCCCTR bibo, crrc:$cc)>; + def : InstAlias<"b"#name#"ctr"#pm, + (BCCCTR bibo, CR0)>; + + def : InstAlias<"b"#name#"l"#pm#" $cc, $dst", + (BCCL bibo, crrc:$cc, condbrtarget:$dst)>; + def : InstAlias<"b"#name#"l"#pm#" $dst", + (BCCL bibo, CR0, condbrtarget:$dst)>; + + def : InstAlias<"b"#name#"la"#pm#" $cc, $dst", + (BCCLA bibo, crrc:$cc, abscondbrtarget:$dst)>; + def : InstAlias<"b"#name#"la"#pm#" $dst", + (BCCLA bibo, CR0, abscondbrtarget:$dst)>; + + def : InstAlias<"b"#name#"lrl"#pm#" $cc", + (BCCLRL bibo, crrc:$cc)>; + def : InstAlias<"b"#name#"lrl"#pm, + (BCCLRL bibo, CR0)>; + + def : InstAlias<"b"#name#"ctrl"#pm#" $cc", + (BCCCTRL bibo, crrc:$cc)>; + def : InstAlias<"b"#name#"ctrl"#pm, + (BCCCTRL bibo, CR0)>; +} +multiclass BranchExtendedMnemonic<string name, int bibo> { + defm : BranchExtendedMnemonicPM<name, "", bibo>; + defm : BranchExtendedMnemonicPM<name, "-", !add(bibo, 2)>; + defm : BranchExtendedMnemonicPM<name, "+", !add(bibo, 3)>; +} +defm : BranchExtendedMnemonic<"lt", 12>; +defm : BranchExtendedMnemonic<"gt", 44>; +defm : BranchExtendedMnemonic<"eq", 76>; +defm : BranchExtendedMnemonic<"un", 108>; +defm : BranchExtendedMnemonic<"so", 108>; +defm : BranchExtendedMnemonic<"ge", 4>; +defm : BranchExtendedMnemonic<"nl", 4>; +defm : BranchExtendedMnemonic<"le", 36>; +defm : BranchExtendedMnemonic<"ng", 36>; +defm : BranchExtendedMnemonic<"ne", 68>; +defm : BranchExtendedMnemonic<"nu", 100>; +defm : BranchExtendedMnemonic<"ns", 100>; + +def : InstAlias<"cmpwi $rA, $imm", (CMPWI CR0, gprc:$rA, s16imm:$imm)>; +def : InstAlias<"cmpw $rA, $rB", (CMPW CR0, gprc:$rA, gprc:$rB)>; +def : InstAlias<"cmplwi $rA, $imm", (CMPLWI CR0, gprc:$rA, u16imm:$imm)>; +def : InstAlias<"cmplw $rA, $rB", (CMPLW CR0, gprc:$rA, gprc:$rB)>; +def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm64:$imm)>; +def : InstAlias<"cmpd $rA, $rB", (CMPD CR0, g8rc:$rA, g8rc:$rB)>; +def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm64:$imm)>; +def : InstAlias<"cmpld $rA, $rB", (CMPLD CR0, g8rc:$rA, g8rc:$rB)>; + +def : InstAlias<"cmpi $bf, 0, $rA, $imm", (CMPWI crrc:$bf, gprc:$rA, s16imm:$imm)>; +def : InstAlias<"cmp $bf, 0, $rA, $rB", (CMPW crrc:$bf, gprc:$rA, gprc:$rB)>; +def : InstAlias<"cmpli $bf, 0, $rA, $imm", (CMPLWI crrc:$bf, gprc:$rA, u16imm:$imm)>; +def : InstAlias<"cmpl $bf, 0, $rA, $rB", (CMPLW crrc:$bf, gprc:$rA, gprc:$rB)>; +def : InstAlias<"cmpi $bf, 1, $rA, $imm", (CMPDI crrc:$bf, g8rc:$rA, s16imm64:$imm)>; +def : InstAlias<"cmp $bf, 1, $rA, $rB", (CMPD crrc:$bf, g8rc:$rA, g8rc:$rB)>; +def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm64:$imm)>; +def : InstAlias<"cmpl $bf, 1, $rA, $rB", (CMPLD crrc:$bf, g8rc:$rA, g8rc:$rB)>; + +multiclass TrapExtendedMnemonic<string name, int to> { + def : InstAlias<"td"#name#"i $rA, $imm", (TDI to, g8rc:$rA, s16imm:$imm)>; + def : InstAlias<"td"#name#" $rA, $rB", (TD to, g8rc:$rA, g8rc:$rB)>; + def : InstAlias<"tw"#name#"i $rA, $imm", (TWI to, gprc:$rA, s16imm:$imm)>; + def : InstAlias<"tw"#name#" $rA, $rB", (TW to, gprc:$rA, gprc:$rB)>; +} +defm : TrapExtendedMnemonic<"lt", 16>; +defm : TrapExtendedMnemonic<"le", 20>; +defm : TrapExtendedMnemonic<"eq", 4>; +defm : TrapExtendedMnemonic<"ge", 12>; +defm : TrapExtendedMnemonic<"gt", 8>; +defm : TrapExtendedMnemonic<"nl", 12>; +defm : TrapExtendedMnemonic<"ne", 24>; +defm : TrapExtendedMnemonic<"ng", 20>; +defm : TrapExtendedMnemonic<"llt", 2>; +defm : TrapExtendedMnemonic<"lle", 6>; +defm : TrapExtendedMnemonic<"lge", 5>; +defm : TrapExtendedMnemonic<"lgt", 1>; +defm : TrapExtendedMnemonic<"lnl", 5>; +defm : TrapExtendedMnemonic<"lng", 6>; +defm : TrapExtendedMnemonic<"u", 31>; + +// Atomic loads +def : Pat<(atomic_load_8 iaddr:$src), (LBZ memri:$src)>; +def : Pat<(atomic_load_16 iaddr:$src), (LHZ memri:$src)>; +def : Pat<(atomic_load_32 iaddr:$src), (LWZ memri:$src)>; +def : Pat<(atomic_load_8 xaddr:$src), (LBZX memrr:$src)>; +def : Pat<(atomic_load_16 xaddr:$src), (LHZX memrr:$src)>; +def : Pat<(atomic_load_32 xaddr:$src), (LWZX memrr:$src)>; + +// Atomic stores +def : Pat<(atomic_store_8 iaddr:$ptr, i32:$val), (STB gprc:$val, memri:$ptr)>; +def : Pat<(atomic_store_16 iaddr:$ptr, i32:$val), (STH gprc:$val, memri:$ptr)>; +def : Pat<(atomic_store_32 iaddr:$ptr, i32:$val), (STW gprc:$val, memri:$ptr)>; +def : Pat<(atomic_store_8 xaddr:$ptr, i32:$val), (STBX gprc:$val, memrr:$ptr)>; +def : Pat<(atomic_store_16 xaddr:$ptr, i32:$val), (STHX gprc:$val, memrr:$ptr)>; +def : Pat<(atomic_store_32 xaddr:$ptr, i32:$val), (STWX gprc:$val, memrr:$ptr)>; + +let Predicates = [IsISA3_0] in { + +// Copy-Paste Facility +// We prefix 'CP' to COPY due to name conflict in Target.td. We also prefix to +// PASTE for naming consistency. +let mayLoad = 1 in +def CP_COPY : X_L1_RA5_RB5<31, 774, "copy" , gprc, IIC_LdStCOPY, []>; + +let mayStore = 1 in +def CP_PASTE : X_L1_RA5_RB5<31, 902, "paste" , gprc, IIC_LdStPASTE, []>; + +let mayStore = 1, Defs = [CR0] in +def CP_PASTEo : X_L1_RA5_RB5<31, 902, "paste.", gprc, IIC_LdStPASTE, []>, isDOT; + +def CP_COPYx : PPCAsmPseudo<"copy $rA, $rB" , (ins gprc:$rA, gprc:$rB)>; +def CP_PASTEx : PPCAsmPseudo<"paste $rA, $rB", (ins gprc:$rA, gprc:$rB)>; +def CP_COPY_FIRST : PPCAsmPseudo<"copy_first $rA, $rB", + (ins gprc:$rA, gprc:$rB)>; +def CP_PASTE_LAST : PPCAsmPseudo<"paste_last $rA, $rB", + (ins gprc:$rA, gprc:$rB)>; +def CP_ABORT : XForm_0<31, 838, (outs), (ins), "cp_abort", IIC_SprABORT, []>; + +// Message Synchronize +def MSGSYNC : XForm_0<31, 886, (outs), (ins), "msgsync", IIC_SprMSGSYNC, []>; + +// Power-Saving Mode Instruction: +def STOP : XForm_0<19, 370, (outs), (ins), "stop", IIC_SprSTOP, []>; + +} // IsISA3_0 + +// Fast 32-bit reverse bits algorithm: +// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit): +// n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA); +// Step 2: 2-bit swap (swap odd 2-bit and even 2-bit): +// n = ((n >> 2) & 0x33333333) | ((n << 2) & 0xCCCCCCCC); +// Step 3: 4-bit swap (swap odd 4-bit and even 4-bit): +// n = ((n >> 4) & 0x0F0F0F0F) | ((n << 4) & 0xF0F0F0F0); +// Step 4: byte reverse (Suppose n = [B1,B2,B3,B4]): +// Step 4.1: Put B4,B2 in the right position (rotate left 3 bytes): +// n' = (n rotl 24); After which n' = [B4, B1, B2, B3] +// Step 4.2: Insert B3 to the right position: +// n' = rlwimi n', n, 8, 8, 15; After which n' = [B4, B3, B2, B3] +// Step 4.3: Insert B1 to the right position: +// n' = rlwimi n', n, 8, 24, 31; After which n' = [B4, B3, B2, B1] +def MaskValues { + dag Lo1 = (ORI (LIS 0x5555), 0x5555); + dag Hi1 = (ORI (LIS 0xAAAA), 0xAAAA); + dag Lo2 = (ORI (LIS 0x3333), 0x3333); + dag Hi2 = (ORI (LIS 0xCCCC), 0xCCCC); + dag Lo4 = (ORI (LIS 0x0F0F), 0x0F0F); + dag Hi4 = (ORI (LIS 0xF0F0), 0xF0F0); +} + +def Shift1 { + dag Right = (RLWINM $A, 31, 1, 31); + dag Left = (RLWINM $A, 1, 0, 30); +} + +def Swap1 { + dag Bit = (OR (AND Shift1.Right, MaskValues.Lo1), + (AND Shift1.Left, MaskValues.Hi1)); +} + +def Shift2 { + dag Right = (RLWINM Swap1.Bit, 30, 2, 31); + dag Left = (RLWINM Swap1.Bit, 2, 0, 29); +} + +def Swap2 { + dag Bits = (OR (AND Shift2.Right, MaskValues.Lo2), + (AND Shift2.Left, MaskValues.Hi2)); +} + +def Shift4 { + dag Right = (RLWINM Swap2.Bits, 28, 4, 31); + dag Left = (RLWINM Swap2.Bits, 4, 0, 27); +} + +def Swap4 { + dag Bits = (OR (AND Shift4.Right, MaskValues.Lo4), + (AND Shift4.Left, MaskValues.Hi4)); +} + +def Rotate { + dag Left3Bytes = (RLWINM Swap4.Bits, 24, 0, 31); +} + +def RotateInsertByte3 { + dag Left = (RLWIMI Rotate.Left3Bytes, Swap4.Bits, 8, 8, 15); +} + +def RotateInsertByte1 { + dag Left = (RLWIMI RotateInsertByte3.Left, Swap4.Bits, 8, 24, 31); +} + +def : Pat<(i32 (bitreverse i32:$A)), + (RLDICL_32 RotateInsertByte1.Left, 0, 32)>; + +// Fast 64-bit reverse bits algorithm: +// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit): +// n = ((n >> 1) & 0x5555555555555555) | ((n << 1) & 0xAAAAAAAAAAAAAAAA); +// Step 2: 2-bit swap (swap odd 2-bit and even 2-bit): +// n = ((n >> 2) & 0x3333333333333333) | ((n << 2) & 0xCCCCCCCCCCCCCCCC); +// Step 3: 4-bit swap (swap odd 4-bit and even 4-bit): +// n = ((n >> 4) & 0x0F0F0F0F0F0F0F0F) | ((n << 4) & 0xF0F0F0F0F0F0F0F0); +// Step 4: byte reverse (Suppose n = [B0,B1,B2,B3,B4,B5,B6,B7]): +// Apply the same byte reverse algorithm mentioned above for the fast 32-bit +// reverse to both the high 32 bit and low 32 bit of the 64 bit value. And +// then OR them together to get the final result. +def MaskValues64 { + dag Lo1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo1, sub_32)); + dag Hi1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi1, sub_32)); + dag Lo2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo2, sub_32)); + dag Hi2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi2, sub_32)); + dag Lo4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo4, sub_32)); + dag Hi4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi4, sub_32)); +} + +def DWMaskValues { + dag Lo1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo1, 32, 31), 0x5555), 0x5555); + dag Hi1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi1, 32, 31), 0xAAAA), 0xAAAA); + dag Lo2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo2, 32, 31), 0x3333), 0x3333); + dag Hi2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi2, 32, 31), 0xCCCC), 0xCCCC); + dag Lo4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo4, 32, 31), 0x0F0F), 0x0F0F); + dag Hi4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi4, 32, 31), 0xF0F0), 0xF0F0); +} + +def DWSwapInByte { + dag Swap1 = (OR8 (AND8 (RLDICL $A, 63, 1), DWMaskValues.Lo1), + (AND8 (RLDICR $A, 1, 62), DWMaskValues.Hi1)); + dag Swap2 = (OR8 (AND8 (RLDICL Swap1, 62, 2), DWMaskValues.Lo2), + (AND8 (RLDICR Swap1, 2, 61), DWMaskValues.Hi2)); + dag Swap4 = (OR8 (AND8 (RLDICL Swap2, 60, 4), DWMaskValues.Lo4), + (AND8 (RLDICR Swap2, 4, 59), DWMaskValues.Hi4)); +} + +// Intra-byte swap is done, now start inter-byte swap. +def DWBytes4567 { + dag Word = (i32 (EXTRACT_SUBREG DWSwapInByte.Swap4, sub_32)); +} + +def DWBytes7456 { + dag Word = (RLWINM DWBytes4567.Word, 24, 0, 31); +} + +def DWBytes7656 { + dag Word = (RLWIMI DWBytes7456.Word, DWBytes4567.Word, 8, 8, 15); +} + +// B7 B6 B5 B4 in the right order +def DWBytes7654 { + dag Word = (RLWIMI DWBytes7656.Word, DWBytes4567.Word, 8, 24, 31); + dag DWord = + (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), Word, sub_32)); +} + +def DWBytes0123 { + dag Word = (i32 (EXTRACT_SUBREG (RLDICL DWSwapInByte.Swap4, 32, 32), sub_32)); +} + +def DWBytes3012 { + dag Word = (RLWINM DWBytes0123.Word, 24, 0, 31); +} + +def DWBytes3212 { + dag Word = (RLWIMI DWBytes3012.Word, DWBytes0123.Word, 8, 8, 15); +} + +// B3 B2 B1 B0 in the right order +def DWBytes3210 { + dag Word = (RLWIMI DWBytes3212.Word, DWBytes0123.Word, 8, 24, 31); + dag DWord = + (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), Word, sub_32)); +} + +// Now both high word and low word are reversed, next +// swap the high word and low word. +def : Pat<(i64 (bitreverse i64:$A)), + (OR8 (RLDICR DWBytes7654.DWord, 32, 31), DWBytes3210.DWord)>; diff --git a/capstone/suite/synctools/tablegen/PPC/PPCInstrQPX.td b/capstone/suite/synctools/tablegen/PPC/PPCInstrQPX.td new file mode 100644 index 000000000..c4bb02695 --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCInstrQPX.td @@ -0,0 +1,1216 @@ +//===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the QPX extension to the PowerPC instruction set. +// Reference: +// Book Q: QPX Architecture Definition. IBM (as updated in) 2011. +// +//===----------------------------------------------------------------------===// + +def PPCRegQFRCAsmOperand : AsmOperandClass { + let Name = "RegQFRC"; let PredicateMethod = "isRegNumber"; +} +def qfrc : RegisterOperand<QFRC> { + let ParserMatchClass = PPCRegQFRCAsmOperand; +} +def PPCRegQSRCAsmOperand : AsmOperandClass { + let Name = "RegQSRC"; let PredicateMethod = "isRegNumber"; +} +def qsrc : RegisterOperand<QSRC> { + let ParserMatchClass = PPCRegQSRCAsmOperand; +} +def PPCRegQBRCAsmOperand : AsmOperandClass { + let Name = "RegQBRC"; let PredicateMethod = "isRegNumber"; +} +def qbrc : RegisterOperand<QBRC> { + let ParserMatchClass = PPCRegQBRCAsmOperand; +} + +//===----------------------------------------------------------------------===// +// Helpers for defining instructions that directly correspond to intrinsics. + +// QPXA1_Int - A AForm_1 intrinsic definition. +class QPXA1_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID> + : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC), + !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_FPFused, + [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>; +// QPXA1s_Int - A AForm_1 intrinsic definition (simple instructions). +class QPXA1s_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID> + : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC), + !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_VecPerm, + [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>; +// QPXA2_Int - A AForm_2 intrinsic definition. +class QPXA2_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID> + : AForm_2<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPGeneral, + [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>; +// QPXA3_Int - A AForm_3 intrinsic definition. +class QPXA3_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID> + : AForm_3<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC), + !strconcat(opc, " $FRT, $FRA, $FRC"), IIC_FPGeneral, + [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRC))]>; +// QPXA4_Int - A AForm_4a intrinsic definition. +class QPXA4_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID> + : AForm_4a<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB), + !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral, + [(set v4f64:$FRT, (IntID v4f64:$FRB))]>; +// QPXX18_Int - A XForm_18 intrinsic definition. +class QPXX18_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID> + : XForm_18<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPCompare, + [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>; +// QPXX19_Int - A XForm_19 intrinsic definition. +class QPXX19_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID> + : XForm_19<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB), + !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral, + [(set v4f64:$FRT, (IntID v4f64:$FRB))]>; + +//===----------------------------------------------------------------------===// +// Pattern Frags. + +def extloadv4f32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4f32; +}]>; + +def truncstorev4f32 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32; +}]>; +def pre_truncstv4f32 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncst node:$val, + node:$base, node:$offset), [{ + return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32; +}]>; + +def fround_inexact : PatFrag<(ops node:$val), (fpround node:$val), [{ + return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 0; +}]>; + +def fround_exact : PatFrag<(ops node:$val), (fpround node:$val), [{ + return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 1; +}]>; + +let FastIselShouldIgnore = 1 in // FastIsel should ignore all u12 instrs. + def u12 : ImmLeaf<i32, [{ return (Imm & 0xFFF) == Imm; }]>; + +//===----------------------------------------------------------------------===// +// Instruction Definitions. + +def HasQPX : Predicate<"PPCSubTarget->hasQPX()">; +let Predicates = [HasQPX] in { +let DecoderNamespace = "QPX" in { +let hasSideEffects = 0 in { // QPX instructions don't have side effects. +let Uses = [RM] in { + // Add Instructions + let isCommutable = 1 in { + def QVFADD : AForm_2<4, 21, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfadd $FRT, $FRA, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (fadd v4f64:$FRA, v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFADDS : QPXA2_Int<0, 21, "qvfadds", int_ppc_qpx_qvfadds>; + def QVFADDSs : AForm_2<0, 21, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfadds $FRT, $FRA, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (fadd v4f32:$FRA, v4f32:$FRB))]>; + } + def QVFSUB : AForm_2<4, 20, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfsub $FRT, $FRA, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (fsub v4f64:$FRA, v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFSUBS : QPXA2_Int<0, 20, "qvfsubs", int_ppc_qpx_qvfsubs>; + def QVFSUBSs : AForm_2<0, 20, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfsubs $FRT, $FRA, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (fsub v4f32:$FRA, v4f32:$FRB))]>; + + // Estimate Instructions + def QVFRE : AForm_4a<4, 24, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfre $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (PPCfre v4f64:$FRB))]>; + def QVFRES : QPXA4_Int<0, 24, "qvfres", int_ppc_qpx_qvfres>; + let isCodeGenOnly = 1 in + def QVFRESs : AForm_4a<0, 24, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfres $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (PPCfre v4f32:$FRB))]>; + + def QVFRSQRTE : AForm_4a<4, 26, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfrsqrte $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (PPCfrsqrte v4f64:$FRB))]>; + def QVFRSQRTES : QPXA4_Int<0, 26, "qvfrsqrtes", int_ppc_qpx_qvfrsqrtes>; + let isCodeGenOnly = 1 in + def QVFRSQRTESs : AForm_4a<0, 26, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfrsqrtes $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (PPCfrsqrte v4f32:$FRB))]>; + + // Multiply Instructions + let isCommutable = 1 in { + def QVFMUL : AForm_3<4, 25, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC), + "qvfmul $FRT, $FRA, $FRC", IIC_FPGeneral, + [(set v4f64:$FRT, (fmul v4f64:$FRA, v4f64:$FRC))]>; + let isCodeGenOnly = 1 in + def QVFMULS : QPXA3_Int<0, 25, "qvfmuls", int_ppc_qpx_qvfmuls>; + def QVFMULSs : AForm_3<0, 25, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC), + "qvfmuls $FRT, $FRA, $FRC", IIC_FPGeneral, + [(set v4f32:$FRT, (fmul v4f32:$FRA, v4f32:$FRC))]>; + } + def QVFXMUL : QPXA3_Int<4, 17, "qvfxmul", int_ppc_qpx_qvfxmul>; + def QVFXMULS : QPXA3_Int<0, 17, "qvfxmuls", int_ppc_qpx_qvfxmuls>; + + // Multiply-add instructions + def QVFMADD : AForm_1<4, 29, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC, v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFMADDS : QPXA1_Int<0, 29, "qvfmadds", int_ppc_qpx_qvfmadds>; + def QVFMADDSs : AForm_1<0, 29, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC), + "qvfmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC, v4f32:$FRB))]>; + def QVFNMADD : AForm_1<4, 31, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfnmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC, + v4f64:$FRB)))]>; + let isCodeGenOnly = 1 in + def QVFNMADDS : QPXA1_Int<0, 31, "qvfnmadds", int_ppc_qpx_qvfnmadds>; + def QVFNMADDSs : AForm_1<0, 31, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC), + "qvfnmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC, + v4f32:$FRB)))]>; + def QVFMSUB : AForm_1<4, 28, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC, + (fneg v4f64:$FRB)))]>; + let isCodeGenOnly = 1 in + def QVFMSUBS : QPXA1_Int<0, 28, "qvfmsubs", int_ppc_qpx_qvfmsubs>; + def QVFMSUBSs : AForm_1<0, 28, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC), + "qvfmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC, + (fneg v4f32:$FRB)))]>; + def QVFNMSUB : AForm_1<4, 30, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfnmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC, + (fneg v4f64:$FRB))))]>; + let isCodeGenOnly = 1 in + def QVFNMSUBS : QPXA1_Int<0, 30, "qvfnmsubs", int_ppc_qpx_qvfnmsubs>; + def QVFNMSUBSs : AForm_1<0, 30, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC), + "qvfnmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC, + (fneg v4f32:$FRB))))]>; + def QVFXMADD : QPXA1_Int<4, 9, "qvfxmadd", int_ppc_qpx_qvfxmadd>; + def QVFXMADDS : QPXA1_Int<0, 9, "qvfxmadds", int_ppc_qpx_qvfxmadds>; + def QVFXXNPMADD : QPXA1_Int<4, 11, "qvfxxnpmadd", int_ppc_qpx_qvfxxnpmadd>; + def QVFXXNPMADDS : QPXA1_Int<0, 11, "qvfxxnpmadds", int_ppc_qpx_qvfxxnpmadds>; + def QVFXXCPNMADD : QPXA1_Int<4, 3, "qvfxxcpnmadd", int_ppc_qpx_qvfxxcpnmadd>; + def QVFXXCPNMADDS : QPXA1_Int<0, 3, "qvfxxcpnmadds", int_ppc_qpx_qvfxxcpnmadds>; + def QVFXXMADD : QPXA1_Int<4, 1, "qvfxxmadd", int_ppc_qpx_qvfxxmadd>; + def QVFXXMADDS : QPXA1_Int<0, 1, "qvfxxmadds", int_ppc_qpx_qvfxxmadds>; + + // Select Instruction + let isCodeGenOnly = 1 in + def QVFSEL : QPXA1s_Int<4, 23, "qvfsel", int_ppc_qpx_qvfsel>; + def QVFSELb : AForm_1<4, 23, (outs qfrc:$FRT), + (ins qbrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm, + [(set v4f64:$FRT, (vselect v4i1:$FRA, + v4f64:$FRC, v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFSELbs : AForm_1<4, 23, (outs qsrc:$FRT), + (ins qbrc:$FRA, qsrc:$FRB, qsrc:$FRC), + "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm, + [(set v4f32:$FRT, (vselect v4i1:$FRA, + v4f32:$FRC, v4f32:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFSELbb: AForm_1<4, 23, (outs qbrc:$FRT), + (ins qbrc:$FRA, qbrc:$FRB, qbrc:$FRC), + "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm, + [(set v4i1:$FRT, (vselect v4i1:$FRA, + v4i1:$FRC, v4i1:$FRB))]>; + + // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after + // instruction selection into a branch sequence. + let usesCustomInserter = 1 in { + def SELECT_CC_QFRC: Pseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F, + i32imm:$BROPC), "#SELECT_CC_QFRC", + []>; + def SELECT_CC_QSRC: Pseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F, + i32imm:$BROPC), "#SELECT_CC_QSRC", + []>; + def SELECT_CC_QBRC: Pseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F, + i32imm:$BROPC), "#SELECT_CC_QBRC", + []>; + + // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition + // register bit directly. + def SELECT_QFRC: Pseudo<(outs qfrc:$dst), (ins crbitrc:$cond, + qfrc:$T, qfrc:$F), "#SELECT_QFRC", + [(set v4f64:$dst, + (select i1:$cond, v4f64:$T, v4f64:$F))]>; + def SELECT_QSRC: Pseudo<(outs qsrc:$dst), (ins crbitrc:$cond, + qsrc:$T, qsrc:$F), "#SELECT_QSRC", + [(set v4f32:$dst, + (select i1:$cond, v4f32:$T, v4f32:$F))]>; + def SELECT_QBRC: Pseudo<(outs qbrc:$dst), (ins crbitrc:$cond, + qbrc:$T, qbrc:$F), "#SELECT_QBRC", + [(set v4i1:$dst, + (select i1:$cond, v4i1:$T, v4i1:$F))]>; + } + + // Convert and Round Instructions + def QVFCTID : QPXX19_Int<4, 814, "qvfctid", int_ppc_qpx_qvfctid>; + let isCodeGenOnly = 1 in + def QVFCTIDb : XForm_19<4, 814, (outs qbrc:$FRT), (ins qbrc:$FRB), + "qvfctid $FRT, $FRB", IIC_FPGeneral, []>; + + def QVFCTIDU : QPXX19_Int<4, 942, "qvfctidu", int_ppc_qpx_qvfctidu>; + def QVFCTIDZ : QPXX19_Int<4, 815, "qvfctidz", int_ppc_qpx_qvfctidz>; + def QVFCTIDUZ : QPXX19_Int<4, 943, "qvfctiduz", int_ppc_qpx_qvfctiduz>; + def QVFCTIW : QPXX19_Int<4, 14, "qvfctiw", int_ppc_qpx_qvfctiw>; + def QVFCTIWU : QPXX19_Int<4, 142, "qvfctiwu", int_ppc_qpx_qvfctiwu>; + def QVFCTIWZ : QPXX19_Int<4, 15, "qvfctiwz", int_ppc_qpx_qvfctiwz>; + def QVFCTIWUZ : QPXX19_Int<4, 143, "qvfctiwuz", int_ppc_qpx_qvfctiwuz>; + def QVFCFID : QPXX19_Int<4, 846, "qvfcfid", int_ppc_qpx_qvfcfid>; + let isCodeGenOnly = 1 in + def QVFCFIDb : XForm_19<4, 846, (outs qbrc:$FRT), (ins qbrc:$FRB), + "qvfcfid $FRT, $FRB", IIC_FPGeneral, []>; + + def QVFCFIDU : QPXX19_Int<4, 974, "qvfcfidu", int_ppc_qpx_qvfcfidu>; + def QVFCFIDS : QPXX19_Int<0, 846, "qvfcfids", int_ppc_qpx_qvfcfids>; + def QVFCFIDUS : QPXX19_Int<0, 974, "qvfcfidus", int_ppc_qpx_qvfcfidus>; + + let isCodeGenOnly = 1 in + def QVFRSP : QPXX19_Int<4, 12, "qvfrsp", int_ppc_qpx_qvfrsp>; + def QVFRSPs : XForm_19<4, 12, + (outs qsrc:$FRT), (ins qfrc:$FRB), + "qvfrsp $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (fround_inexact v4f64:$FRB))]>; + + def QVFRIZ : XForm_19<4, 424, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfriz $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (ftrunc v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFRIZs : XForm_19<4, 424, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfriz $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (ftrunc v4f32:$FRB))]>; + + def QVFRIN : XForm_19<4, 392, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfrin $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (fround v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFRINs : XForm_19<4, 392, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfrin $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (fround v4f32:$FRB))]>; + + def QVFRIP : XForm_19<4, 456, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfrip $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (fceil v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFRIPs : XForm_19<4, 456, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfrip $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (fceil v4f32:$FRB))]>; + + def QVFRIM : XForm_19<4, 488, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfrim $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (ffloor v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFRIMs : XForm_19<4, 488, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfrim $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (ffloor v4f32:$FRB))]>; + + // Move Instructions + def QVFMR : XForm_19<4, 72, + (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfmr $FRT, $FRB", IIC_VecPerm, + [/* (set v4f64:$FRT, v4f64:$FRB) */]>; + let isCodeGenOnly = 1 in { + def QVFMRs : XForm_19<4, 72, + (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfmr $FRT, $FRB", IIC_VecPerm, + [/* (set v4f32:$FRT, v4f32:$FRB) */]>; + def QVFMRb : XForm_19<4, 72, + (outs qbrc:$FRT), (ins qbrc:$FRB), + "qvfmr $FRT, $FRB", IIC_VecPerm, + [/* (set v4i1:$FRT, v4i1:$FRB) */]>; + } + def QVFNEG : XForm_19<4, 40, + (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfneg $FRT, $FRB", IIC_VecPerm, + [(set v4f64:$FRT, (fneg v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFNEGs : XForm_19<4, 40, + (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfneg $FRT, $FRB", IIC_VecPerm, + [(set v4f32:$FRT, (fneg v4f32:$FRB))]>; + def QVFABS : XForm_19<4, 264, + (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfabs $FRT, $FRB", IIC_VecPerm, + [(set v4f64:$FRT, (fabs v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFABSs : XForm_19<4, 264, + (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfabs $FRT, $FRB", IIC_VecPerm, + [(set v4f32:$FRT, (fabs v4f32:$FRB))]>; + def QVFNABS : XForm_19<4, 136, + (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfnabs $FRT, $FRB", IIC_VecPerm, + [(set v4f64:$FRT, (fneg (fabs v4f64:$FRB)))]>; + let isCodeGenOnly = 1 in + def QVFNABSs : XForm_19<4, 136, + (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfnabs $FRT, $FRB", IIC_VecPerm, + [(set v4f32:$FRT, (fneg (fabs v4f32:$FRB)))]>; + def QVFCPSGN : XForm_18<4, 8, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm, + [(set v4f64:$FRT, (fcopysign v4f64:$FRB, v4f64:$FRA))]>; + let isCodeGenOnly = 1 in + def QVFCPSGNs : XForm_18<4, 8, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm, + [(set v4f32:$FRT, (fcopysign v4f32:$FRB, v4f32:$FRA))]>; + + def QVALIGNI : Z23Form_1<4, 5, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u2imm:$idx), + "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm, + [(set v4f64:$FRT, + (PPCqvaligni v4f64:$FRA, v4f64:$FRB, + (i32 imm:$idx)))]>; + let isCodeGenOnly = 1 in + def QVALIGNIs : Z23Form_1<4, 5, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, u2imm:$idx), + "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm, + [(set v4f32:$FRT, + (PPCqvaligni v4f32:$FRA, v4f32:$FRB, + (i32 imm:$idx)))]>; + let isCodeGenOnly = 1 in + def QVALIGNIb : Z23Form_1<4, 5, + (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u2imm:$idx), + "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm, + [(set v4i1:$FRT, + (PPCqvaligni v4i1:$FRA, v4i1:$FRB, + (i32 imm:$idx)))]>; + + def QVESPLATI : Z23Form_2<4, 37, + (outs qfrc:$FRT), (ins qfrc:$FRA, u2imm:$idx), + "qvesplati $FRT, $FRA, $idx", IIC_VecPerm, + [(set v4f64:$FRT, + (PPCqvesplati v4f64:$FRA, (i32 imm:$idx)))]>; + let isCodeGenOnly = 1 in + def QVESPLATIs : Z23Form_2<4, 37, + (outs qsrc:$FRT), (ins qsrc:$FRA, u2imm:$idx), + "qvesplati $FRT, $FRA, $idx", IIC_VecPerm, + [(set v4f32:$FRT, + (PPCqvesplati v4f32:$FRA, (i32 imm:$idx)))]>; + let isCodeGenOnly = 1 in + def QVESPLATIb : Z23Form_2<4, 37, + (outs qbrc:$FRT), (ins qbrc:$FRA, u2imm:$idx), + "qvesplati $FRT, $FRA, $idx", IIC_VecPerm, + [(set v4i1:$FRT, + (PPCqvesplati v4i1:$FRA, (i32 imm:$idx)))]>; + + def QVFPERM : AForm_1<4, 6, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm, + [(set v4f64:$FRT, + (PPCqvfperm v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>; + let isCodeGenOnly = 1 in + def QVFPERMs : AForm_1<4, 6, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qfrc:$FRC), + "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm, + [(set v4f32:$FRT, + (PPCqvfperm v4f32:$FRA, v4f32:$FRB, v4f64:$FRC))]>; + + let isReMaterializable = 1, isAsCheapAsAMove = 1 in + def QVGPCI : Z23Form_3<4, 133, + (outs qfrc:$FRT), (ins u12imm:$idx), + "qvgpci $FRT, $idx", IIC_VecPerm, + [(set v4f64:$FRT, (PPCqvgpci (u12:$idx)))]>; + + // Compare Instruction + let isCodeGenOnly = 1 in + def QVFTSTNAN : QPXX18_Int<4, 64, "qvftstnan", int_ppc_qpx_qvftstnan>; + def QVFTSTNANb : XForm_18<4, 64, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f64:$FRA, v4f64:$FRB, SETUO))]>; + let isCodeGenOnly = 1 in + def QVFTSTNANbs : XForm_18<4, 64, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f32:$FRA, v4f32:$FRB, SETUO))]>; + let isCodeGenOnly = 1 in + def QVFCMPLT : QPXX18_Int<4, 96, "qvfcmplt", int_ppc_qpx_qvfcmplt>; + def QVFCMPLTb : XForm_18<4, 96, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f64:$FRA, v4f64:$FRB, SETOLT))]>; + let isCodeGenOnly = 1 in + def QVFCMPLTbs : XForm_18<4, 96, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f32:$FRA, v4f32:$FRB, SETOLT))]>; + let isCodeGenOnly = 1 in + def QVFCMPGT : QPXX18_Int<4, 32, "qvfcmpgt", int_ppc_qpx_qvfcmpgt>; + def QVFCMPGTb : XForm_18<4, 32, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f64:$FRA, v4f64:$FRB, SETOGT))]>; + let isCodeGenOnly = 1 in + def QVFCMPGTbs : XForm_18<4, 32, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f32:$FRA, v4f32:$FRB, SETOGT))]>; + let isCodeGenOnly = 1 in + def QVFCMPEQ : QPXX18_Int<4, 0, "qvfcmpeq", int_ppc_qpx_qvfcmpeq>; + def QVFCMPEQb : XForm_18<4, 0, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f64:$FRA, v4f64:$FRB, SETOEQ))]>; + let isCodeGenOnly = 1 in + def QVFCMPEQbs : XForm_18<4, 0, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f32:$FRA, v4f32:$FRB, SETOEQ))]>; + + let isCodeGenOnly = 1 in + def QVFLOGICAL : XForm_20<4, 4, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u12imm:$tttt), + "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>; + def QVFLOGICALb : XForm_20<4, 4, + (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt), + "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>; + let isCodeGenOnly = 1 in + def QVFLOGICALs : XForm_20<4, 4, + (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt), + "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>; + + // Load indexed instructions + let mayLoad = 1 in { + def QVLFDX : XForm_1_memOp<31, 583, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfdx $FRT, $src", IIC_LdStLFD, + [(set v4f64:$FRT, (load xoaddr:$src))]>; + let isCodeGenOnly = 1 in + def QVLFDXb : XForm_1_memOp<31, 583, + (outs qbrc:$FRT), (ins memrr:$src), + "qvlfdx $FRT, $src", IIC_LdStLFD, []>; + + let RC = 1 in + def QVLFDXA : XForm_1<31, 583, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfdxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFDUX : XForm_1<31, 615, + (outs qfrc:$FRT, ptr_rc_nor0:$ea_result), + (ins memrr:$src), + "qvlfdux $FRT, $src", IIC_LdStLFDU, []>, + RegConstraint<"$src.ptrreg = $ea_result">, + NoEncode<"$ea_result">; + let RC = 1 in + def QVLFDUXA : XForm_1<31, 615, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfduxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFSX : XForm_1_memOp<31, 519, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfsx $FRT, $src", IIC_LdStLFD, + [(set v4f64:$FRT, (extloadv4f32 xoaddr:$src))]>; + + let isCodeGenOnly = 1 in + def QVLFSXb : XForm_1<31, 519, + (outs qbrc:$FRT), (ins memrr:$src), + "qvlfsx $FRT, $src", IIC_LdStLFD, + [(set v4i1:$FRT, (PPCqvlfsb xoaddr:$src))]>; + let isCodeGenOnly = 1 in + def QVLFSXs : XForm_1_memOp<31, 519, + (outs qsrc:$FRT), (ins memrr:$src), + "qvlfsx $FRT, $src", IIC_LdStLFD, + [(set v4f32:$FRT, (load xoaddr:$src))]>; + + let RC = 1 in + def QVLFSXA : XForm_1<31, 519, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfsxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFSUX : XForm_1<31, 551, + (outs qsrc:$FRT, ptr_rc_nor0:$ea_result), + (ins memrr:$src), + "qvlfsux $FRT, $src", IIC_LdStLFDU, []>, + RegConstraint<"$src.ptrreg = $ea_result">, + NoEncode<"$ea_result">; + + let RC = 1 in + def QVLFSUXA : XForm_1<31, 551, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfsuxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFCDX : XForm_1<31, 71, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcdx $FRT, $src", IIC_LdStLFD, []>; + let RC = 1 in + def QVLFCDXA : XForm_1<31, 71, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcdxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFCDUX : XForm_1<31, 103, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcdux $FRT, $src", IIC_LdStLFD, []>; + let RC = 1 in + def QVLFCDUXA : XForm_1<31, 103, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcduxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFCSX : XForm_1<31, 7, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcsx $FRT, $src", IIC_LdStLFD, []>; + let isCodeGenOnly = 1 in + def QVLFCSXs : XForm_1<31, 7, + (outs qsrc:$FRT), (ins memrr:$src), + "qvlfcsx $FRT, $src", IIC_LdStLFD, []>; + + let RC = 1 in + def QVLFCSXA : XForm_1<31, 7, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcsxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFCSUX : XForm_1<31, 39, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcsux $FRT, $src", IIC_LdStLFD, []>; + let RC = 1 in + def QVLFCSUXA : XForm_1<31, 39, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcsuxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFIWAX : XForm_1<31, 871, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfiwax $FRT, $src", IIC_LdStLFD, []>; + let RC = 1 in + def QVLFIWAXA : XForm_1<31, 871, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfiwaxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFIWZX : XForm_1<31, 839, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfiwzx $FRT, $src", IIC_LdStLFD, []>; + let RC = 1 in + def QVLFIWZXA : XForm_1<31, 839, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfiwzxa $FRT, $src", IIC_LdStLFD, []>; + } + + + def QVLPCLDX : XForm_1<31, 582, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlpcldx $FRT, $src", IIC_LdStLFD, []>; + def QVLPCLSX : XForm_1<31, 518, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlpclsx $FRT, $src", IIC_LdStLFD, []>; + let isCodeGenOnly = 1 in + def QVLPCLSXint : XForm_11<31, 518, + (outs qfrc:$FRT), (ins G8RC:$src), + "qvlpclsx $FRT, 0, $src", IIC_LdStLFD, []>; + def QVLPCRDX : XForm_1<31, 70, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlpcrdx $FRT, $src", IIC_LdStLFD, []>; + def QVLPCRSX : XForm_1<31, 6, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlpcrsx $FRT, $src", IIC_LdStLFD, []>; + + // Store indexed instructions + let mayStore = 1 in { + def QVSTFDX : XForm_8_memOp<31, 711, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfdx $FRT, $dst", IIC_LdStSTFD, + [(store qfrc:$FRT, xoaddr:$dst)]>; + let isCodeGenOnly = 1 in + def QVSTFDXb : XForm_8_memOp<31, 711, + (outs), (ins qbrc:$FRT, memrr:$dst), + "qvstfdx $FRT, $dst", IIC_LdStSTFD, []>; + + let RC = 1 in + def QVSTFDXA : XForm_8<31, 711, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfdxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFDUX : XForm_8<31, 743, (outs ptr_rc_nor0:$ea_res), + (ins qfrc:$FRT, memrr:$dst), + "qvstfdux $FRT, $dst", IIC_LdStSTFDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">; + + let RC = 1 in + def QVSTFDUXA : XForm_8<31, 743, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfduxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFDXI : XForm_8<31, 709, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfdxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFDXIA : XForm_8<31, 709, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfdxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFDUXI : XForm_8<31, 741, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfduxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFDUXIA : XForm_8<31, 741, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfduxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFSX : XForm_8_memOp<31, 647, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsx $FRT, $dst", IIC_LdStSTFD, + [(truncstorev4f32 qfrc:$FRT, xoaddr:$dst)]>; + let isCodeGenOnly = 1 in + def QVSTFSXs : XForm_8_memOp<31, 647, + (outs), (ins qsrc:$FRT, memrr:$dst), + "qvstfsx $FRT, $dst", IIC_LdStSTFD, + [(store qsrc:$FRT, xoaddr:$dst)]>; + + let RC = 1 in + def QVSTFSXA : XForm_8<31, 647, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFSUX : XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res), + (ins qsrc:$FRT, memrr:$dst), + "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">; + let isCodeGenOnly = 1 in + def QVSTFSUXs: XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res), + (ins qfrc:$FRT, memrr:$dst), + "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">; + + let RC = 1 in + def QVSTFSUXA : XForm_8<31, 679, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsuxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFSXI : XForm_8<31, 645, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFSXIA : XForm_8<31, 645, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFSUXI : XForm_8<31, 677, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsuxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFSUXIA : XForm_8<31, 677, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsuxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCDX : XForm_8<31, 199, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcdx $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCDXA : XForm_8<31, 199, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcdxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCSX : XForm_8<31, 135, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>; + let isCodeGenOnly = 1 in + def QVSTFCSXs : XForm_8<31, 135, + (outs), (ins qsrc:$FRT, memrr:$dst), + "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>; + + let RC = 1 in + def QVSTFCSXA : XForm_8<31, 135, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCDUX : XForm_8<31, 231, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcdux $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCDUXA : XForm_8<31, 231, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcduxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCSUX : XForm_8<31, 167, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsux $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCSUXA : XForm_8<31, 167, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsuxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCDXI : XForm_8<31, 197, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcdxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCDXIA : XForm_8<31, 197, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcdxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCSXI : XForm_8<31, 133, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCSXIA : XForm_8<31, 133, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCDUXI : XForm_8<31, 229, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcduxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCDUXIA : XForm_8<31, 229, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcduxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCSUXI : XForm_8<31, 165, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsuxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCSUXIA : XForm_8<31, 165, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsuxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFIWX : XForm_8<31, 967, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfiwx $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFIWXA : XForm_8<31, 967, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfiwxa $FRT, $dst", IIC_LdStSTFD, []>; + } +} + +} // neverHasSideEffects +} + +def : InstAlias<"qvfclr $FRT", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 0)>; +def : InstAlias<"qvfand $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 1)>; +def : InstAlias<"qvfandc $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 4)>; +def : InstAlias<"qvfctfb $FRT, $FRA", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 5)>; +def : InstAlias<"qvfxor $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 6)>; +def : InstAlias<"qvfor $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 7)>; +def : InstAlias<"qvfnor $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 8)>; +def : InstAlias<"qvfequ $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 9)>; +def : InstAlias<"qvfnot $FRT, $FRA", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 10)>; +def : InstAlias<"qvforc $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 13)>; +def : InstAlias<"qvfnand $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 14)>; +def : InstAlias<"qvfset $FRT", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 15)>; + +//===----------------------------------------------------------------------===// +// Additional QPX Patterns +// + +def : Pat<(v4f64 (scalar_to_vector f64:$A)), + (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), $A, sub_64)>; +def : Pat<(v4f32 (scalar_to_vector f32:$A)), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $A, sub_64)>; + +def : Pat<(f64 (extractelt v4f64:$S, 0)), + (EXTRACT_SUBREG $S, sub_64)>; +def : Pat<(f32 (extractelt v4f32:$S, 0)), + (EXTRACT_SUBREG $S, sub_64)>; + +def : Pat<(f64 (extractelt v4f64:$S, 1)), + (EXTRACT_SUBREG (QVESPLATI $S, 1), sub_64)>; +def : Pat<(f64 (extractelt v4f64:$S, 2)), + (EXTRACT_SUBREG (QVESPLATI $S, 2), sub_64)>; +def : Pat<(f64 (extractelt v4f64:$S, 3)), + (EXTRACT_SUBREG (QVESPLATI $S, 3), sub_64)>; + +def : Pat<(f32 (extractelt v4f32:$S, 1)), + (EXTRACT_SUBREG (QVESPLATIs $S, 1), sub_64)>; +def : Pat<(f32 (extractelt v4f32:$S, 2)), + (EXTRACT_SUBREG (QVESPLATIs $S, 2), sub_64)>; +def : Pat<(f32 (extractelt v4f32:$S, 3)), + (EXTRACT_SUBREG (QVESPLATIs $S, 3), sub_64)>; + +def : Pat<(f64 (extractelt v4f64:$S, i64:$F)), + (EXTRACT_SUBREG (QVFPERM $S, $S, + (QVLPCLSXint (RLDICR $F, 2, + /* 63-2 = */ 61))), + sub_64)>; +def : Pat<(f32 (extractelt v4f32:$S, i64:$F)), + (EXTRACT_SUBREG (QVFPERMs $S, $S, + (QVLPCLSXint (RLDICR $F, 2, + /* 63-2 = */ 61))), + sub_64)>; + +def : Pat<(int_ppc_qpx_qvfperm v4f64:$A, v4f64:$B, v4f64:$C), + (QVFPERM $A, $B, $C)>; + +def : Pat<(int_ppc_qpx_qvfcpsgn v4f64:$A, v4f64:$B), + (QVFCPSGN $A, $B)>; + +// FCOPYSIGN's operand types need not agree. +def : Pat<(fcopysign v4f64:$frB, v4f32:$frA), + (QVFCPSGN (COPY_TO_REGCLASS $frA, QFRC), $frB)>; +def : Pat<(fcopysign QSRC:$frB, QFRC:$frA), + (QVFCPSGNs (COPY_TO_REGCLASS $frA, QSRC), $frB)>; + +def : Pat<(int_ppc_qpx_qvfneg v4f64:$A), (QVFNEG $A)>; +def : Pat<(int_ppc_qpx_qvfabs v4f64:$A), (QVFABS $A)>; +def : Pat<(int_ppc_qpx_qvfnabs v4f64:$A), (QVFNABS $A)>; + +def : Pat<(int_ppc_qpx_qvfriz v4f64:$A), (QVFRIZ $A)>; +def : Pat<(int_ppc_qpx_qvfrin v4f64:$A), (QVFRIN $A)>; +def : Pat<(int_ppc_qpx_qvfrip v4f64:$A), (QVFRIP $A)>; +def : Pat<(int_ppc_qpx_qvfrim v4f64:$A), (QVFRIM $A)>; + +def : Pat<(int_ppc_qpx_qvfre v4f64:$A), (QVFRE $A)>; +def : Pat<(int_ppc_qpx_qvfrsqrte v4f64:$A), (QVFRSQRTE $A)>; + +def : Pat<(int_ppc_qpx_qvfadd v4f64:$A, v4f64:$B), + (QVFADD $A, $B)>; +def : Pat<(int_ppc_qpx_qvfsub v4f64:$A, v4f64:$B), + (QVFSUB $A, $B)>; +def : Pat<(int_ppc_qpx_qvfmul v4f64:$A, v4f64:$B), + (QVFMUL $A, $B)>; + +// Additional QVFNMSUB patterns: -a*c + b == -(a*c - b) +def : Pat<(fma (fneg v4f64:$A), v4f64:$C, v4f64:$B), + (QVFNMSUB $A, $B, $C)>; +def : Pat<(fma v4f64:$A, (fneg v4f64:$C), v4f64:$B), + (QVFNMSUB $A, $B, $C)>; +def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B), + (QVFNMSUBSs $A, $B, $C)>; +def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B), + (QVFNMSUBSs $A, $B, $C)>; + +def : Pat<(int_ppc_qpx_qvfmadd v4f64:$A, v4f64:$B, v4f64:$C), + (QVFMADD $A, $B, $C)>; +def : Pat<(int_ppc_qpx_qvfnmadd v4f64:$A, v4f64:$B, v4f64:$C), + (QVFNMADD $A, $B, $C)>; +def : Pat<(int_ppc_qpx_qvfmsub v4f64:$A, v4f64:$B, v4f64:$C), + (QVFMSUB $A, $B, $C)>; +def : Pat<(int_ppc_qpx_qvfnmsub v4f64:$A, v4f64:$B, v4f64:$C), + (QVFNMSUB $A, $B, $C)>; + +def : Pat<(int_ppc_qpx_qvlfd xoaddr:$src), + (QVLFDX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src), + (QVLFDXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfs xoaddr:$src), + (QVLFSX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src), + (QVLFSXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfcda xoaddr:$src), + (QVLFCDXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfcd xoaddr:$src), + (QVLFCDX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfcsa xoaddr:$src), + (QVLFCSXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfcs xoaddr:$src), + (QVLFCSX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src), + (QVLFDXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfiwaa xoaddr:$src), + (QVLFIWAXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfiwa xoaddr:$src), + (QVLFIWAX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfiwza xoaddr:$src), + (QVLFIWZXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfiwz xoaddr:$src), + (QVLFIWZX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src), + (QVLFSXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlpcld xoaddr:$src), + (QVLPCLDX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlpcls xoaddr:$src), + (QVLPCLSX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlpcrd xoaddr:$src), + (QVLPCRDX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlpcrs xoaddr:$src), + (QVLPCRSX xoaddr:$src)>; + +def : Pat<(int_ppc_qpx_qvstfd v4f64:$T, xoaddr:$dst), + (QVSTFDX $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfs v4f64:$T, xoaddr:$dst), + (QVSTFSX $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfcda v4f64:$T, xoaddr:$dst), + (QVSTFCDXA $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfcd v4f64:$T, xoaddr:$dst), + (QVSTFCDX $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfcsa v4f64:$T, xoaddr:$dst), + (QVSTFCSXA $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfcs v4f64:$T, xoaddr:$dst), + (QVSTFCSX $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfda v4f64:$T, xoaddr:$dst), + (QVSTFDXA $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfiwa v4f64:$T, xoaddr:$dst), + (QVSTFIWXA $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfiw v4f64:$T, xoaddr:$dst), + (QVSTFIWX $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfsa v4f64:$T, xoaddr:$dst), + (QVSTFSXA $T, xoaddr:$dst)>; + +def : Pat<(pre_store v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (QVSTFDUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store v4f32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (QVSTFSUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_truncstv4f32 v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (QVSTFSUXs $rS, $ptrreg, $ptroff)>; + +def : Pat<(int_ppc_qpx_qvflogical v4f64:$A, v4f64:$B, (i32 imm:$idx)), + (QVFLOGICAL $A, $B, imm:$idx)>; +def : Pat<(int_ppc_qpx_qvgpci (u12:$idx)), + (QVGPCI imm:$idx)>; + +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOGE), + (QVFLOGICALb (QVFCMPLTb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOLE), + (QVFLOGICALb (QVFCMPGTb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETONE), + (QVFLOGICALb (QVFCMPEQb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETO), + (QVFLOGICALb (QVFTSTNANb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUEQ), + (QVFLOGICALb (QVFCMPEQb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGT), + (QVFLOGICALb (QVFCMPGTb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGE), + (QVFLOGICALb (QVFTSTNANb $FRA, $FRB), + (QVFCMPLTb $FRA, $FRB), (i32 13))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULT), + (QVFLOGICALb (QVFCMPLTb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULE), + (QVFLOGICALb (QVFTSTNANb $FRA, $FRB), + (QVFCMPGTb $FRA, $FRB), (i32 13))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUNE), + (QVFLOGICALb (QVFTSTNANb $FRA, $FRB), + (QVFCMPEQb $FRA, $FRB), (i32 13))>; + +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETEQ), + (QVFCMPEQb $FRA, $FRB)>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGT), + (QVFCMPGTb $FRA, $FRB)>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGE), + (QVFLOGICALb (QVFCMPLTb $FRA, $FRB), + (QVFCMPLTb $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLT), + (QVFCMPLTb $FRA, $FRB)>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLE), + (QVFLOGICALb (QVFCMPGTb $FRA, $FRB), + (QVFCMPGTb $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETNE), + (QVFLOGICALb (QVFCMPEQb $FRA, $FRB), + (QVFCMPEQb $FRA, $FRB), (i32 10))>; + +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOGE), + (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOLE), + (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETONE), + (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETO), + (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUEQ), + (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGT), + (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGE), + (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB), + (QVFCMPLTbs $FRA, $FRB), (i32 13))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULT), + (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULE), + (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB), + (QVFCMPGTbs $FRA, $FRB), (i32 13))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUNE), + (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB), + (QVFCMPEQbs $FRA, $FRB), (i32 13))>; + +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETEQ), + (QVFCMPEQbs $FRA, $FRB)>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGT), + (QVFCMPGTbs $FRA, $FRB)>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGE), + (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB), + (QVFCMPLTbs $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLT), + (QVFCMPLTbs $FRA, $FRB)>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLE), + (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB), + (QVFCMPGTbs $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETNE), + (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB), + (QVFCMPEQbs $FRA, $FRB), (i32 10))>; + +def : Pat<(and v4i1:$FRA, (not v4i1:$FRB)), + (QVFLOGICALb $FRA, $FRB, (i32 4))>; +def : Pat<(not (or v4i1:$FRA, v4i1:$FRB)), + (QVFLOGICALb $FRA, $FRB, (i32 8))>; +def : Pat<(not (xor v4i1:$FRA, v4i1:$FRB)), + (QVFLOGICALb $FRA, $FRB, (i32 9))>; +def : Pat<(or v4i1:$FRA, (not v4i1:$FRB)), + (QVFLOGICALb $FRA, $FRB, (i32 13))>; +def : Pat<(not (and v4i1:$FRA, v4i1:$FRB)), + (QVFLOGICALb $FRA, $FRB, (i32 14))>; + +def : Pat<(and v4i1:$FRA, v4i1:$FRB), + (QVFLOGICALb $FRA, $FRB, (i32 1))>; +def : Pat<(or v4i1:$FRA, v4i1:$FRB), + (QVFLOGICALb $FRA, $FRB, (i32 7))>; +def : Pat<(xor v4i1:$FRA, v4i1:$FRB), + (QVFLOGICALb $FRA, $FRB, (i32 6))>; +def : Pat<(not v4i1:$FRA), + (QVFLOGICALb $FRA, $FRA, (i32 10))>; + +def : Pat<(v4f64 (fpextend v4f32:$src)), + (COPY_TO_REGCLASS $src, QFRC)>; + +def : Pat<(v4f32 (fround_exact v4f64:$src)), + (COPY_TO_REGCLASS $src, QSRC)>; + +// Extract the underlying floating-point values from the +// QPX (-1.0, 1.0) boolean representation. +def : Pat<(v4f64 (PPCqbflt v4i1:$src)), + (COPY_TO_REGCLASS $src, QFRC)>; + +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLT)), + (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULT)), + (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLE)), + (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULE)), + (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETEQ)), + (SELECT_QFRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGE)), + (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGE)), + (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGT)), + (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGT)), + (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETNE)), + (SELECT_QFRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLT)), + (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULT)), + (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLE)), + (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULE)), + (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETEQ)), + (SELECT_QSRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGE)), + (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGE)), + (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGT)), + (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGT)), + (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETNE)), + (SELECT_QSRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLT)), + (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULT)), + (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLE)), + (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULE)), + (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETEQ)), + (SELECT_QBRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGE)), + (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGE)), + (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGT)), + (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGT)), + (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETNE)), + (SELECT_QBRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +} // end HasQPX + +let Predicates = [HasQPX, NoNaNsFPMath] in { +def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB), + (QVFSELb (QVFCMPLTb $FRA, $FRB), $FRB, $FRA)>; +def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB), + (QVFSELb (QVFCMPGTb $FRA, $FRB), $FRB, $FRA)>; + +def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB), + (QVFSELbs (QVFCMPLTbs $FRA, $FRB), $FRB, $FRA)>; +def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB), + (QVFSELbs (QVFCMPGTbs $FRA, $FRB), $FRB, $FRA)>; +} + +let Predicates = [HasQPX, NaNsFPMath] in { +// When either of these operands is NaN, we should return the other operand. +// QVFCMPLT/QVFCMPGT return false is either operand is NaN, which means we need +// to explicitly or with a NaN test on the second operand. +def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB), + (QVFSELb (QVFLOGICALb (QVFCMPLTb $FRA, $FRB), + (QVFTSTNANb $FRB, $FRB), (i32 7)), + $FRB, $FRA)>; +def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB), + (QVFSELb (QVFLOGICALb (QVFCMPGTb $FRA, $FRB), + (QVFTSTNANb $FRB, $FRB), (i32 7)), + $FRB, $FRA)>; + +def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB), + (QVFSELbs (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB), + (QVFTSTNANbs $FRB, $FRB), (i32 7)), + $FRB, $FRA)>; +def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB), + (QVFSELbs (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB), + (QVFTSTNANbs $FRB, $FRB), (i32 7)), + $FRB, $FRA)>; +} + diff --git a/capstone/suite/synctools/tablegen/PPC/PPCInstrSPE.td b/capstone/suite/synctools/tablegen/PPC/PPCInstrSPE.td new file mode 100644 index 000000000..96649efdc --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCInstrSPE.td @@ -0,0 +1,892 @@ +//=======-- PPCInstrSPE.td - The PowerPC SPE Extension -*- tablegen -*-=======// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Signal Processing Engine extension to +// the PowerPC instruction set. +// +//===----------------------------------------------------------------------===// + +class EFXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> : + I<4, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<5> RA; + bits<5> RB; + + let Pattern = pattern; + + let Inst{6-10} = RT; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-31} = xo; +} + +class EFXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> : + EFXForm_1<xo, OOL, IOL, asmstr, itin, pattern> { + let RB = 0; +} + +class EFXForm_2a<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> : + EFXForm_1<xo, OOL, IOL, asmstr, itin, pattern> { + let RA = 0; +} + +class EFXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> : + I<4, OOL, IOL, asmstr, itin> { + bits<3> crD; + bits<5> RA; + bits<5> RB; + + let Inst{6-8} = crD; + let Inst{9-10} = 0; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-31} = xo; +} + +class EVXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> : + I<4, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<5> RA; + bits<5> RB; + + let Pattern = pattern; + + let Inst{6-10} = RT; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-31} = xo; +} + +class EVXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> : + EVXForm_1<xo, OOL, IOL, asmstr, itin, pattern> { + let RB = 0; +} + +class EVXForm_2a<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> : + EVXForm_1<xo, OOL, IOL, asmstr, itin, pattern> { + let RA = 0; +} + +class EVXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> : + I<4, OOL, IOL, asmstr, itin> { + bits<3> crD; + bits<5> RA; + bits<5> RB; + + let Pattern = pattern; + + let Inst{6-8} = crD; + let Inst{9-10} = 0; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-31} = xo; +} + +class EVXForm_4<bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> : + I<4, OOL, IOL, asmstr, itin> { + bits<3> crD; + bits<5> RA; + bits<5> RB; + bits<5> RT; + + let Pattern = pattern; + + let Inst{6-10} = RT; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-28} = xo; + let Inst{29-31} = crD; +} + +class EVXForm_D<bits<11> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> : + I<4, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<21> D; + + let Pattern = pattern; + + let Inst{6-10} = RT; + let Inst{20} = D{0}; + let Inst{19} = D{1}; + let Inst{18} = D{2}; + let Inst{17} = D{3}; + let Inst{16} = D{4}; + let Inst{15} = D{5}; + let Inst{14} = D{6}; + let Inst{13} = D{7}; + let Inst{12} = D{8}; + let Inst{11} = D{9}; + let Inst{11-20} = D{0-9}; + let Inst{21-31} = xo; +} + +let DecoderNamespace = "SPE", Predicates = [HasSPE] in { + +def BRINC : EVXForm_1<527, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB), + "brinc $RT, $RA, $RB", IIC_IntSimple, []>; + +// Double-precision floating point +def EFDABS : EFXForm_2<740, (outs sperc:$RT), (ins sperc:$RA), + "efdabs $RT, $RA", IIC_FPDGeneral, + [(set f64:$RT, (fabs f64:$RA))]>; + +def EFDADD : EFXForm_1<736, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "efdadd $RT, $RA, $RB", IIC_FPAddSub, + [(set f64:$RT, (fadd f64:$RA, f64:$RB))]>; + +def EFDCFS : EFXForm_2a<751, (outs sperc:$RT), (ins spe4rc:$RB), + "efdcfs $RT, $RB", IIC_FPDGeneral, + [(set f64:$RT, (fpextend f32:$RB))]>; + +def EFDCFSF : EFXForm_2a<755, (outs sperc:$RT), (ins spe4rc:$RB), + "efdcfsf $RT, $RB", IIC_FPDGeneral, []>; + +def EFDCFSI : EFXForm_2a<753, (outs sperc:$RT), (ins gprc:$RB), + "efdcfsi $RT, $RB", IIC_FPDGeneral, + [(set f64:$RT, (sint_to_fp i32:$RB))]>; + +def EFDCFSID : EFXForm_2a<739, (outs sperc:$RT), (ins gprc:$RB), + "efdcfsid $RT, $RB", IIC_FPDGeneral, + []>; + +def EFDCFUF : EFXForm_2a<754, (outs sperc:$RT), (ins spe4rc:$RB), + "efdcfuf $RT, $RB", IIC_FPDGeneral, []>; + +def EFDCFUI : EFXForm_2a<752, (outs sperc:$RT), (ins gprc:$RB), + "efdcfui $RT, $RB", IIC_FPDGeneral, + [(set f64:$RT, (uint_to_fp i32:$RB))]>; + +def EFDCFUID : EFXForm_2a<738, (outs sperc:$RT), (ins gprc:$RB), + "efdcfuid $RT, $RB", IIC_FPDGeneral, + []>; + +let isCompare = 1 in { +def EFDCMPEQ : EFXForm_3<750, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efdcmpeq $crD, $RA, $RB", IIC_FPDGeneral>; +def EFDCMPGT : EFXForm_3<748, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efdcmpgt $crD, $RA, $RB", IIC_FPDGeneral>; +def EFDCMPLT : EFXForm_3<749, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efdcmplt $crD, $RA, $RB", IIC_FPDGeneral>; +} + +def EFDCTSF : EFXForm_2a<759, (outs sperc:$RT), (ins spe4rc:$RB), + "efdctsf $RT, $RB", IIC_FPDGeneral, []>; + +def EFDCTSI : EFXForm_2a<757, (outs gprc:$RT), (ins sperc:$RB), + "efdctsi $RT, $RB", IIC_FPDGeneral, + []>; + +def EFDCTSIDZ : EFXForm_2a<747, (outs gprc:$RT), (ins sperc:$RB), + "efdctsidz $RT, $RB", IIC_FPDGeneral, + []>; + +def EFDCTSIZ : EFXForm_2a<762, (outs gprc:$RT), (ins sperc:$RB), + "efdctsiz $RT, $RB", IIC_FPDGeneral, + [(set i32:$RT, (fp_to_sint f64:$RB))]>; + +def EFDCTUF : EFXForm_2a<758, (outs sperc:$RT), (ins spe4rc:$RB), + "efdctuf $RT, $RB", IIC_FPDGeneral, []>; + +def EFDCTUI : EFXForm_2a<756, (outs gprc:$RT), (ins sperc:$RB), + "efdctui $RT, $RB", IIC_FPDGeneral, + []>; + +def EFDCTUIDZ : EFXForm_2a<746, (outs gprc:$RT), (ins sperc:$RB), + "efdctuidz $RT, $RB", IIC_FPDGeneral, + []>; + +def EFDCTUIZ : EFXForm_2a<760, (outs gprc:$RT), (ins sperc:$RB), + "efdctuiz $RT, $RB", IIC_FPDGeneral, + [(set i32:$RT, (fp_to_uint f64:$RB))]>; + +def EFDDIV : EFXForm_1<745, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "efddiv $RT, $RA, $RB", IIC_FPDivD, + [(set f64:$RT, (fdiv f64:$RA, f64:$RB))]>; + +def EFDMUL : EFXForm_1<744, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "efdmul $RT, $RA, $RB", IIC_FPDGeneral, + [(set f64:$RT, (fmul f64:$RA, f64:$RB))]>; + +def EFDNABS : EFXForm_2<741, (outs sperc:$RT), (ins sperc:$RA), + "efdnabs $RT, $RA", IIC_FPDGeneral, + [(set f64:$RT, (fneg (fabs f64:$RA)))]>; + +def EFDNEG : EFXForm_2<742, (outs sperc:$RT), (ins sperc:$RA), + "efdneg $RT, $RA", IIC_FPDGeneral, + [(set f64:$RT, (fneg f64:$RA))]>; + +def EFDSUB : EFXForm_1<737, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "efdsub $RT, $RA, $RB", IIC_FPDGeneral, + [(set f64:$RT, (fsub f64:$RA, f64:$RB))]>; + +let isCompare = 1 in { +def EFDTSTEQ : EFXForm_3<766, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efdtsteq $crD, $RA, $RB", IIC_FPDGeneral>; +def EFDTSTGT : EFXForm_3<764, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efdtstgt $crD, $RA, $RB", IIC_FPDGeneral>; +def EFDTSTLT : EFXForm_3<765, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efdtstlt $crD, $RA, $RB", IIC_FPDGeneral>; +} + +// Single-precision floating point +def EFSABS : EFXForm_2<708, (outs spe4rc:$RT), (ins spe4rc:$RA), + "efsabs $RT, $RA", IIC_FPSGeneral, + [(set f32:$RT, (fabs f32:$RA))]>; + +def EFSADD : EFXForm_1<704, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB), + "efsadd $RT, $RA, $RB", IIC_FPAddSub, + [(set f32:$RT, (fadd f32:$RA, f32:$RB))]>; + +def EFSCFD : EFXForm_2a<719, (outs spe4rc:$RT), (ins sperc:$RB), + "efscfd $RT, $RB", IIC_FPSGeneral, + [(set f32:$RT, (fpround f64:$RB))]>; + +def EFSCFSF : EFXForm_2a<723, (outs spe4rc:$RT), (ins spe4rc:$RB), + "efscfsf $RT, $RB", IIC_FPSGeneral, []>; + +def EFSCFSI : EFXForm_2a<721, (outs spe4rc:$RT), (ins gprc:$RB), + "efscfsi $RT, $RB", IIC_FPSGeneral, + [(set f32:$RT, (sint_to_fp i32:$RB))]>; + +def EFSCFUF : EFXForm_2a<722, (outs spe4rc:$RT), (ins spe4rc:$RB), + "efscfuf $RT, $RB", IIC_FPSGeneral, []>; + +def EFSCFUI : EFXForm_2a<720, (outs spe4rc:$RT), (ins gprc:$RB), + "efscfui $RT, $RB", IIC_FPSGeneral, + [(set f32:$RT, (uint_to_fp i32:$RB))]>; + +let isCompare = 1 in { +def EFSCMPEQ : EFXForm_3<718, (outs crrc:$crD), (ins spe4rc:$RA, spe4rc:$RB), + "efscmpeq $crD, $RA, $RB", IIC_FPCompare>; +def EFSCMPGT : EFXForm_3<716, (outs crrc:$crD), (ins spe4rc:$RA, spe4rc:$RB), + "efscmpgt $crD, $RA, $RB", IIC_FPCompare>; +def EFSCMPLT : EFXForm_3<717, (outs crrc:$crD), (ins spe4rc:$RA, spe4rc:$RB), + "efscmplt $crD, $RA, $RB", IIC_FPCompare>; +} + +def EFSCTSF : EFXForm_2a<727, (outs spe4rc:$RT), (ins spe4rc:$RB), + "efsctsf $RT, $RB", IIC_FPSGeneral, []>; + +def EFSCTSI : EFXForm_2a<725, (outs gprc:$RT), (ins spe4rc:$RB), + "efsctsi $RT, $RB", IIC_FPSGeneral, + []>; + +def EFSCTSIZ : EFXForm_2a<730, (outs gprc:$RT), (ins spe4rc:$RB), + "efsctsiz $RT, $RB", IIC_FPSGeneral, + [(set i32:$RT, (fp_to_sint f32:$RB))]>; + +def EFSCTUF : EFXForm_2a<726, (outs sperc:$RT), (ins spe4rc:$RB), + "efsctuf $RT, $RB", IIC_FPSGeneral, []>; + +def EFSCTUI : EFXForm_2a<724, (outs gprc:$RT), (ins spe4rc:$RB), + "efsctui $RT, $RB", IIC_FPSGeneral, + []>; + +def EFSCTUIZ : EFXForm_2a<728, (outs gprc:$RT), (ins spe4rc:$RB), + "efsctuiz $RT, $RB", IIC_FPSGeneral, + [(set i32:$RT, (fp_to_uint f32:$RB))]>; + +def EFSDIV : EFXForm_1<713, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB), + "efsdiv $RT, $RA, $RB", IIC_FPDivD, + [(set f32:$RT, (fdiv f32:$RA, f32:$RB))]>; + +def EFSMUL : EFXForm_1<712, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB), + "efsmul $RT, $RA, $RB", IIC_FPGeneral, + [(set f32:$RT, (fmul f32:$RA, f32:$RB))]>; + +def EFSNABS : EFXForm_2<709, (outs spe4rc:$RT), (ins spe4rc:$RA), + "efsnabs $RT, $RA", IIC_FPGeneral, + [(set f32:$RT, (fneg (fabs f32:$RA)))]>; + +def EFSNEG : EFXForm_2<710, (outs spe4rc:$RT), (ins spe4rc:$RA), + "efsneg $RT, $RA", IIC_FPGeneral, + [(set f32:$RT, (fneg f32:$RA))]>; + +def EFSSUB : EFXForm_1<705, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB), + "efssub $RT, $RA, $RB", IIC_FPSGeneral, + [(set f32:$RT, (fsub f32:$RA, f32:$RB))]>; + +let isCompare = 1 in { +def EFSTSTEQ : EFXForm_3<734, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efststeq $crD, $RA, $RB", IIC_FPCompare>; +def EFSTSTGT : EFXForm_3<732, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efststgt $crD, $RA, $RB", IIC_FPCompare>; +def EFSTSTLT : EFXForm_3<733, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "efststlt $crD, $RA, $RB", IIC_FPCompare>; +} + +// SPE Vector operations + +def EVABS : EVXForm_2<520, (outs sperc:$RT), (ins sperc:$RA), + "evabs $RT, $RA", IIC_VecGeneral, + []>; + +def EVADDIW : EVXForm_1<514, (outs sperc:$RT), (ins sperc:$RA, u5imm:$RB), + "evaddiw $RT, $RB, $RA", IIC_VecGeneral, []>; +def EVADDSMIAAW : EVXForm_2<1225, (outs sperc:$RT), (ins sperc:$RA), + "evaddsmiaaw $RT, $RA", IIC_VecComplex, []>; +def EVADDSSIAAW : EVXForm_2<1217, (outs sperc:$RT), (ins sperc:$RA), + "evaddssiaaw $RT, $RA", IIC_VecComplex, []>; +def EVADDUSIAAW : EVXForm_2<1216, (outs sperc:$RT), (ins sperc:$RA), + "evaddusiaaw $RT, $RA", IIC_VecComplex, []>; +def EVADDUMIAAW : EVXForm_2<1224, (outs sperc:$RT), (ins sperc:$RA), + "evaddumiaaw $RT, $RA", IIC_VecComplex, []>; +def EVADDW : EVXForm_1<512, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evaddw $RT, $RA, $RB", IIC_VecGeneral, + []>; + +def EVAND : EVXForm_1<529, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evand $RT, $RA, $RB", IIC_VecGeneral, + []>; +def EVANDC : EVXForm_1<530, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evandc $RT, $RA, $RB", IIC_VecGeneral, + []>; + +let isCompare = 1 in { +def EVCMPEQ : EVXForm_3<564, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evcmpeq $crD, $RA, $RB", IIC_VecGeneral, []>; +def EVCMPGTS : EVXForm_3<561, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evcmpgts $crD, $RA, $RB", IIC_VecGeneral, []>; +def EVCMPGTU : EVXForm_3<560, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evcmpgtu $crD, $RA, $RB", IIC_VecGeneral, []>; +def EVCMPLTS : EVXForm_3<563, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evcmplts $crD, $RA, $RB", IIC_VecGeneral, []>; +def EVCMPLTU : EVXForm_3<562, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evcmpltu $crD, $RA, $RB", IIC_VecGeneral, []>; +} + +def EVCNTLSW : EVXForm_2<526, (outs sperc:$RT), (ins sperc:$RA), + "evcntlsw $RT, $RA", IIC_VecGeneral, []>; +def EVCNTLZW : EVXForm_2<525, (outs sperc:$RT), (ins sperc:$RA), + "evcntlzw $RT, $RA", IIC_VecGeneral, + []>; + +def EVDIVWS : EVXForm_1<1222, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evdivws $RT, $RA, $RB", IIC_VecComplex, + []>; +def EVDIVWU : EVXForm_1<1223, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evdivwu $RT, $RA, $RB", IIC_VecComplex, + []>; + +def EVEQV : EVXForm_1<537, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "eveqv $RT, $RA, $RB", IIC_VecGeneral, + []>; + +def EVEXTSB : EVXForm_2<522, (outs sperc:$RT), (ins sperc:$RA), + "evextsb $RT, $RA", IIC_VecGeneral, + []>; +def EVEXTSH : EVXForm_2<523, (outs sperc:$RT), (ins sperc:$RA), + "evextsh $RT, $RA", IIC_VecGeneral, + []>; + +def EVFSABS : EVXForm_2<644, (outs sperc:$RT), (ins sperc:$RA), + "evfsabs $RT, $RA", IIC_VecGeneral, + []>; +def EVFSADD : EVXForm_1<640, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evfsadd $RT, $RA, $RB", IIC_VecComplex, + []>; +def EVFSCFSF : EVXForm_2a<659, (outs sperc:$RT), (ins sperc:$RB), + "evfscfsf $RT, $RB", IIC_VecComplex, []>; +def EVFSCFSI : EVXForm_2a<657, (outs sperc:$RT), (ins sperc:$RB), + "evfscfsi $RT, $RB", IIC_VecComplex, + []>; +def EVFSCFUF : EVXForm_2a<658, (outs sperc:$RT), (ins sperc:$RB), + "evfscfuf $RT, $RB", IIC_VecComplex, []>; +def EVFSCFUI : EVXForm_2a<650, (outs sperc:$RT), (ins sperc:$RB), + "evfscfui $RT, $RB", IIC_VecComplex, + []>; +let isCompare = 1 in { +def EVFSCMPEQ : EVXForm_3<654, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evfscmpeq $crD, $RA, $RB", IIC_FPSGeneral, []>; +def EVFSCMPGT : EVXForm_3<652, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evfscmpgt $crD, $RA, $RB", IIC_FPSGeneral, []>; +def EVFSCMPLT : EVXForm_3<653, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evfscmplt $crD, $RA, $RB", IIC_FPSGeneral, []>; +} + +def EVFSCTSF : EVXForm_2a<663, (outs sperc:$RT), (ins sperc:$RB), + "evfsctsf $RT, $RB", IIC_VecComplex, []>; +def EVFSCTSI : EVXForm_2a<661, (outs sperc:$RT), (ins sperc:$RB), + "evfsctsi $RT, $RB", IIC_VecComplex, + []>; +def EVFSCTSIZ : EVXForm_2a<666, (outs sperc:$RT), (ins sperc:$RB), + "evfsctsiz $RT, $RB", IIC_VecComplex, + []>; +def EVFSCTUF : EVXForm_2a<662, (outs sperc:$RT), (ins sperc:$RB), + "evfsctsf $RT, $RB", IIC_VecComplex, []>; +def EVFSCTUI : EVXForm_2a<660, (outs sperc:$RT), (ins sperc:$RB), + "evfsctui $RT, $RB", IIC_VecComplex, + []>; +def EVFSCTUIZ : EVXForm_2a<664, (outs sperc:$RT), (ins sperc:$RB), + "evfsctsiz $RT, $RB", IIC_VecComplex, + []>; +def EVFSDIV : EVXForm_1<649, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evfsdiv $RT, $RA, $RB", IIC_FPDivD, + []>; +def EVFSMUL : EVXForm_1<648, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evfsmul $RT, $RA, $RB", IIC_VecComplex, + []>; +def EVFSNABS : EVXForm_2<645, (outs sperc:$RT), (ins sperc:$RA), + "evfsnabs $RT, $RA", IIC_VecGeneral, + []>; +def EVFSNEG : EVXForm_2<646, (outs sperc:$RT), (ins sperc:$RA), + "evfsneg $RT, $RA", IIC_VecGeneral, + []>; +def EVFSSUB : EVXForm_1<641, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evfssub $RT, $RA, $RB", IIC_VecComplex, + []>; + +let isCompare = 1 in { +def EVFSTSTEQ : EVXForm_3<670, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evfststeq $crD, $RA, $RB", IIC_VecGeneral, []>; +def EVFSTSTGT : EVXForm_3<668, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evfststgt $crD, $RA, $RB", IIC_VecGeneral, []>; +def EVFSTSTLT : EVXForm_3<669, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB), + "evfststlt $crD, $RA, $RB", IIC_VecGeneral, []>; +} + +def EVLDD : EVXForm_D<769, (outs sperc:$RT), (ins spe8dis:$dst), + "evldd $RT, $dst", IIC_LdStLoad, + [(set f64:$RT, (load iaddr:$dst))]>; +def EVLDDX : EVXForm_1<768, (outs sperc:$RT), (ins memrr:$src), + "evlddx $RT, $src", IIC_LdStLoad, + [(set f64:$RT, (load xaddr:$src))]>; +def EVLDH : EVXForm_D<773, (outs sperc:$RT), (ins spe8dis:$dst), + "evldh $RT, $dst", IIC_LdStLoad, []>; +def EVLDHX : EVXForm_1<772, (outs sperc:$RT), (ins memrr:$src), + "evldhx $RT, $src", IIC_LdStLoad, []>; +def EVLDW : EVXForm_D<771, (outs sperc:$RT), (ins spe8dis:$dst), + "evldw $RT, $dst", IIC_LdStLoad, + []>; +def EVLDWX : EVXForm_1<770, (outs sperc:$RT), (ins memrr:$src), + "evldwx $RT, $src", IIC_LdStLoad, + []>; +def EVLHHESPLAT : EVXForm_D<777, (outs sperc:$RT), (ins spe2dis:$dst), + "evlhhesplat $RT, $dst", IIC_LdStLoad, []>; +def EVLHHESPLATX : EVXForm_1<776, (outs sperc:$RT), (ins memrr:$src), + "evlhhesplatx $RT, $src", IIC_LdStLoad, []>; +def EVLHHOUSPLAT : EVXForm_D<781, (outs sperc:$RT), (ins spe2dis:$dst), + "evlhhousplat $RT, $dst", IIC_LdStLoad, []>; +def EVLHHOUSPLATX : EVXForm_1<780, (outs sperc:$RT), (ins memrr:$src), + "evlhhousplatx $RT, $src", IIC_LdStLoad, []>; +def EVLHHOSSPLAT : EVXForm_D<783, (outs sperc:$RT), (ins spe2dis:$dst), + "evlhhossplat $RT, $dst", IIC_LdStLoad, []>; +def EVLHHOSSPLATX : EVXForm_1<782, (outs sperc:$RT), (ins memrr:$src), + "evlhhossplatx $RT, $src", IIC_LdStLoad, []>; +def EVLWHE : EVXForm_D<785, (outs sperc:$RT), (ins spe4dis:$dst), + "evlwhe $RT, $dst", IIC_LdStLoad, []>; +def EVLWHEX : EVXForm_1<784, (outs sperc:$RT), (ins memrr:$src), + "evlwhex $RT, $src", IIC_LdStLoad, []>; +def EVLWHOS : EVXForm_D<791, (outs sperc:$RT), (ins spe4dis:$dst), + "evlwhos $RT, $dst", IIC_LdStLoad, []>; +def EVLWHOSX : EVXForm_1<790, (outs sperc:$RT), (ins memrr:$src), + "evlwhosx $RT, $src", IIC_LdStLoad, []>; +def EVLWHOU : EVXForm_D<789, (outs sperc:$RT), (ins spe4dis:$dst), + "evlwhou $RT, $dst", IIC_LdStLoad, []>; +def EVLWHOUX : EVXForm_1<788, (outs sperc:$RT), (ins memrr:$src), + "evlwhoux $RT, $src", IIC_LdStLoad, []>; +def EVLWHSPLAT : EVXForm_D<797, (outs sperc:$RT), (ins spe4dis:$dst), + "evlwhsplat $RT, $dst", IIC_LdStLoad, []>; +def EVLWHSPLATX : EVXForm_1<796, (outs sperc:$RT), (ins memrr:$src), + "evlwhsplatx $RT, $src", IIC_LdStLoad, []>; +def EVLWWSPLAT : EVXForm_D<793, (outs sperc:$RT), (ins spe4dis:$dst), + "evlwwsplat $RT, $dst", IIC_LdStLoad, []>; +def EVLWWSPLATX : EVXForm_1<792, (outs sperc:$RT), (ins memrr:$src), + "evlwwsplatx $RT, $src", IIC_LdStLoad, []>; + +def EVMERGEHI : EVXForm_1<556, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmergehi $RT, $RA, $RB", IIC_VecGeneral, []>; +def EVMERGELO : EVXForm_1<557, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmergelo $RT, $RA, $RB", IIC_VecGeneral, []>; +def EVMERGEHILO : EVXForm_1<558, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmergehilo $RT, $RA, $RB", IIC_VecGeneral, []>; +def EVMERGELOHI : EVXForm_1<559, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmergelohi $RT, $RA, $RB", IIC_VecGeneral, []>; + +def EVMHEGSMFAA : EVXForm_1<1323, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhegsmfaa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHEGSMFAN : EVXForm_1<1451, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhegsmfan $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHEGSMIAA : EVXForm_1<1321, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhegsmiaa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHEGSMIAN : EVXForm_1<1449, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhegsmian $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHEGUMIAA : EVXForm_1<1320, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhegumiaa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHEGUMIAN : EVXForm_1<1448, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhegumian $RT, $RA, $RB", IIC_VecComplex, []>; + +def EVMHESMF : EVXForm_1<1035, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhesmf $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHESMFA : EVXForm_1<1067, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhesmfa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHESMFAAW : EVXForm_1<1291, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhesmfaaw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHESMFANW : EVXForm_1<1419, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhesmfanw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHESMI : EVXForm_1<1033, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhesmi $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHESMIA : EVXForm_1<1065, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhesmia $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHESMIAAW : EVXForm_1<1289, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhesmiaaw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHESMIANW : EVXForm_1<1417, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhesmianw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHESSF : EVXForm_1<1027, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhessf $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHESSFA : EVXForm_1<1059, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhessfa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHESSFAAW : EVXForm_1<1283, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhessfaaw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHESSFANW : EVXForm_1<1411, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhessfanw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHESSIAAW : EVXForm_1<1281, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhessiaaw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHESSIANW : EVXForm_1<1409, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhessianw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHEUMI : EVXForm_1<1032, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmheumi $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHEUMIA : EVXForm_1<1064, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmheumia $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHEUMIAAW : EVXForm_1<1288, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmheumiaaw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHEUMIANW : EVXForm_1<1416, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmheumianw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHEUSIAAW : EVXForm_1<1280, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmheusiaaw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHEUSIANW : EVXForm_1<1408, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmheusianw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOGSMFAA : EVXForm_1<1327, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhogsmfaa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOGSMFAN : EVXForm_1<1455, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhogsmfan $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOGSMIAA : EVXForm_1<1325, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhogsmiaa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOGSMIAN : EVXForm_1<1453, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhogsmian $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOGUMIAA : EVXForm_1<1324, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhogumiaa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOGUMIAN : EVXForm_1<1452, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhogumian $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOSMF : EVXForm_1<1039, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhosmf $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOSMFA : EVXForm_1<1071, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhosmfa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOSMFAAW : EVXForm_1<1295, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhosmfaaw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOSMFANW : EVXForm_1<1423, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhosmfanw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOSMI : EVXForm_1<1037, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhosmi $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOSMIA : EVXForm_1<1069, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhosmia $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOSMIAAW : EVXForm_1<1293, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhosmiaaw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOSMIANW : EVXForm_1<1421, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhosmianw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOSSF : EVXForm_1<1031, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhossf $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOSSFA : EVXForm_1<1063, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhossfa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOSSFAAW : EVXForm_1<1287, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhossfaaw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOSSFANW : EVXForm_1<1415, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhossfanw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOSSIAAW : EVXForm_1<1285, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhossiaaw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOSSIANW : EVXForm_1<1413, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhossianw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOUMI : EVXForm_1<1036, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhoumi $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOUMIA : EVXForm_1<1068, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhoumia $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOUMIAAW : EVXForm_1<1292, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhoumiaaw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOUMIANW : EVXForm_1<1420, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhoumianw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOUSIAAW : EVXForm_1<1284, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhousiaaw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMHOUSIANW : EVXForm_1<1412, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmhousianw $RT, $RA, $RB", IIC_VecComplex, []>; + +def EVMRA : EVXForm_2<1220, (outs sperc:$RT), (ins sperc:$RA), + "evmra $RT, $RA", IIC_VecComplex, []>; + +def EVMWHSMF : EVXForm_1<1103, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwhsmf $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWHSMFA : EVXForm_1<1135, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwhsmfa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWHSMI : EVXForm_1<1101, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwhsmi $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWHSMIA : EVXForm_1<1133, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwhsmia $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWHSSF : EVXForm_1<1095, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwhssf $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWHSSFA : EVXForm_1<1127, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwhssfa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWHUMI : EVXForm_1<1100, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwhumi $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWHUMIA : EVXForm_1<1132, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwhumia $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWLSMIAAW : EVXForm_1<1353, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlsmiaaw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWLSMIANW : EVXForm_1<1481, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlsmianw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWLSSIAAW : EVXForm_1<1345, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlssiaaw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWLSSIANW : EVXForm_1<1473, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlssianw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWLUMI : EVXForm_1<1096, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlumi $RT, $RA, $RB", IIC_VecComplex, + []>; +def EVMWLUMIA : EVXForm_1<1128, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlumia $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWLUMIAAW : EVXForm_1<1352, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlumiaaw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWLUMIANW : EVXForm_1<1480, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlumianw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWLUSIAAW : EVXForm_1<1344, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlusiaaw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWLUSIANW : EVXForm_1<1472, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwlusianw $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWSMF : EVXForm_1<1115, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwsmf $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWSMFA : EVXForm_1<1147, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwsmfa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWSMFAA : EVXForm_1<1371, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwsmfaa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWSMFAN : EVXForm_1<1499, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwsmfan $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWSMI : EVXForm_1<1113, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwsmi $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWSMIA : EVXForm_1<1145, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwsmia $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWSMIAA : EVXForm_1<1369, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwsmiaa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWSMIAN : EVXForm_1<1497, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwsmian $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWSSF : EVXForm_1<1107, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwssf $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWSSFA : EVXForm_1<1139, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwssfa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWSSFAA : EVXForm_1<1363, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwssfaa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWSSFAN : EVXForm_1<1491, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwssfan $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWUMI : EVXForm_1<1112, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwumi $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWUMIA : EVXForm_1<1144, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwumia $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWUMIAA : EVXForm_1<1368, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwumiaa $RT, $RA, $RB", IIC_VecComplex, []>; +def EVMWUMIAN : EVXForm_1<1496, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmwumian $RT, $RA, $RB", IIC_VecComplex, []>; + + +def EVNAND : EVXForm_1<542, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evnand $RT, $RA, $RB", IIC_VecGeneral, + []>; + +def EVNEG : EVXForm_2<521, (outs sperc:$RT), (ins sperc:$RA), + "evneg $RT, $RA", IIC_VecGeneral, + []>; + +def EVNOR : EVXForm_1<536, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evnor $RT, $RA, $RB", IIC_VecGeneral, + []>; +def EVOR : EVXForm_1<535, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evor $RT, $RA, $RB", IIC_VecGeneral, + []>; +def EVORC : EVXForm_1<539, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evorc $RT, $RA, $RB", IIC_VecGeneral, + []>; + +def EVRLWI : EVXForm_1<554, (outs sperc:$RT), (ins sperc:$RA, u5imm:$RB), + "evrlwi $RT, $RA, $RB", IIC_VecGeneral, []>; +def EVRLW : EVXForm_1<552, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evrlw $RT, $RA, $RB", IIC_VecGeneral, + []>; + +def EVRNDW : EVXForm_2<524, (outs sperc:$RT), (ins sperc:$RA), + "evrndw $RT, $RA", IIC_VecGeneral, []>; + +def EVSEL : EVXForm_4<79, (outs sperc:$RT), + (ins sperc:$RA, sperc:$RB, crrc:$crD), + "evsel crD,$RT,$RA,$RB", IIC_VecGeneral, []>; + +def EVSLWI : EVXForm_1<550, (outs sperc:$RT), (ins sperc:$RA, u5imm:$RB), + "evslwi $RT, $RA, $RB", IIC_VecGeneral, []>; +def EVSLW : EVXForm_1<548, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evslw $RT, $RA, $RB", IIC_VecGeneral, + []>; + +def EVSPLATFI : EVXForm_2<555, (outs sperc:$RT), (ins s5imm:$RA), + "evsplatfi $RT, $RA", IIC_VecGeneral, []>; +def EVSPLATI : EVXForm_2<553, (outs sperc:$RT), (ins s5imm:$RA), + "evsplati $RT, $RA", IIC_VecGeneral, []>; + +def EVSRWIS : EVXForm_1<547, (outs sperc:$RT), (ins sperc:$RA, u5imm:$RB), + "evsrwis $RT, $RA, $RB", IIC_VecGeneral, []>; +def EVSRWIU : EVXForm_1<546, (outs sperc:$RT), (ins sperc:$RA, u5imm:$RB), + "evsrwiu $RT, $RA, $RB", IIC_VecGeneral, []>; +def EVSRWS : EVXForm_1<545, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evsrws $RT, $RA, $RB", IIC_VecGeneral, + []>; +def EVSRWU : EVXForm_1<544, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evsrwu $RT, $RA, $RB", IIC_VecGeneral, + []>; + +def EVSTDD : EVXForm_D<801, (outs), (ins sperc:$RT, spe8dis:$dst), + "evstdd $RT, $dst", IIC_LdStStore, + [(store f64:$RT, iaddr:$dst)]>; +def EVSTDDX : EVXForm_1<800, (outs), (ins sperc:$RT, memrr:$dst), + "evstddx $RT, $dst", IIC_LdStStore, + [(store f64:$RT, xaddr:$dst)]>; +def EVSTDH : EVXForm_D<805, (outs), (ins sperc:$RT, spe8dis:$dst), + "evstdh $RT, $dst", IIC_LdStStore, []>; +def EVSTDHX : EVXForm_1<804, (outs), (ins sperc:$RT, memrr:$dst), + "evstdhx $RT, $dst", IIC_LdStStore, []>; +def EVSTDW : EVXForm_D<803, (outs), (ins sperc:$RT, spe8dis:$dst), + "evstdw $RT, $dst", IIC_LdStStore, + []>; +def EVSTDWX : EVXForm_1<802, (outs), (ins sperc:$RT, memrr:$dst), + "evstdwx $RT, $dst", IIC_LdStStore, + []>; +def EVSTWHE : EVXForm_D<817, (outs), (ins sperc:$RT, spe4dis:$dst), + "evstwhe $RT, $dst", IIC_LdStStore, []>; +def EVSTWHEX : EVXForm_1<816, (outs), (ins sperc:$RT, memrr:$dst), + "evstwhex $RT, $dst", IIC_LdStStore, []>; +def EVSTWHO : EVXForm_D<821, (outs), (ins sperc:$RT, spe4dis:$dst), + "evstwho $RT, $dst", IIC_LdStStore, []>; +def EVSTWHOX : EVXForm_1<820, (outs), (ins sperc:$RT, memrr:$dst), + "evstwhox $RT, $dst", IIC_LdStStore, []>; +def EVSTWWE : EVXForm_D<825, (outs), (ins sperc:$RT, spe4dis:$dst), + "evstwwe $RT, $dst", IIC_LdStStore, []>; +def EVSTWWEX : EVXForm_1<824, (outs), (ins sperc:$RT, memrr:$dst), + "evstwwex $RT, $dst", IIC_LdStStore, []>; +def EVSTWWO : EVXForm_D<829, (outs), (ins sperc:$RT, spe4dis:$dst), + "evstwwo $RT, $dst", IIC_LdStStore, []>; +def EVSTWWOX : EVXForm_1<828, (outs), (ins sperc:$RT, memrr:$dst), + "evstwwox $RT, $dst", IIC_LdStStore, []>; + +def EVSUBFSSIAAW : EVXForm_2<1219, (outs sperc:$RT), (ins sperc:$RA), + "evsubfssiaaw $RT, $RA", IIC_VecComplex, []>; +def EVSUBFSMIAAW : EVXForm_2<1227, (outs sperc:$RT), (ins sperc:$RA), + "evsubfsmiaaw $RT, $RA", IIC_VecComplex, []>; +def EVSUBFUMIAAW : EVXForm_2<1226, (outs sperc:$RT), (ins sperc:$RA), + "evsubfumiaaw $RT, $RA", IIC_VecComplex, []>; +def EVSUBFUSIAAW : EVXForm_2<1218, (outs sperc:$RT), (ins sperc:$RA), + "evsubfusiaaw $RT, $RA", IIC_VecComplex, []>; +def EVSUBFW : EVXForm_1<516, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evsubfw $RT, $RA, $RB", IIC_VecGeneral, + []>; +def EVSUBIFW : EVXForm_1<518, (outs sperc:$RT), (ins u5imm:$RA, sperc:$RB), + "evsubifw $RT, $RA, $RB", IIC_VecGeneral, []>; +def EVXOR : EVXForm_1<534, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evxor $RT, $RA, $RB", IIC_VecGeneral, + []>; + +let isAsmParserOnly = 1 in { +// Identical to the integer Load/Stores, but to handle floats +def SPELWZ : DForm_1<32, (outs spe4rc:$rD), (ins memri:$src), + "lwz $rD, $src", IIC_LdStLoad, + [(set f32:$rD, (load iaddr:$src))]>; +def SPELWZX : XForm_1<31, 23, (outs spe4rc:$rD), (ins memrr:$src), + "lwzx $rD, $src", IIC_LdStLoad, + [(set f32:$rD, (load xaddr:$src))]>; +def SPESTW : DForm_1<36, (outs), (ins spe4rc:$rS, memri:$src), + "stw $rS, $src", IIC_LdStStore, + [(store f32:$rS, iaddr:$src)]>; +def SPESTWX : XForm_8<31, 151, (outs), (ins spe4rc:$rS, memrr:$dst), + "stwx $rS, $dst", IIC_LdStStore, + [(store f32:$rS, xaddr:$dst)]>; +} + +} // HasSPE + +let Predicates = [HasSPE] in { +def : Pat<(f64 (extloadf32 iaddr:$src)), + (COPY_TO_REGCLASS (SPELWZ iaddr:$src), SPERC)>; +def : Pat<(f64 (extloadf32 xaddr:$src)), + (COPY_TO_REGCLASS (SPELWZX xaddr:$src), SPERC)>; + +def : Pat<(f64 (fpextend f32:$src)), + (COPY_TO_REGCLASS $src, SPERC)>; +} + +let Predicates = [HasSPE] in { + let usesCustomInserter = 1 in { +def SELECT_CC_SPE4 : Pseudo<(outs spe4rc:$dst), + (ins crrc:$cond, spe4rc:$T, spe4rc:$F, + i32imm:$BROPC), "#SELECT_CC_SPE4", + []>; +def SELECT_CC_SPE : Pseudo<(outs sperc:$dst), + (ins crrc:$cond, sperc:$T, sperc:$F, i32imm:$BROPC), + "#SELECT_CC_SPE", + []>; +def SELECT_SPE4 : Pseudo<(outs spe4rc:$dst), (ins crbitrc:$cond, + spe4rc:$T, spe4rc:$F), "#SELECT_SPE4", + [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>; +def SELECT_SPE : Pseudo<(outs sperc:$dst), (ins crbitrc:$cond, + sperc:$T, sperc:$F), "#SELECT_SPE", + [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>; + } + +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), + (SELECT_SPE4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)), + (SELECT_SPE4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), + (SELECT_SPE4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)), + (SELECT_SPE4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), + (SELECT_SPE4 (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), + (SELECT_SPE4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)), + (SELECT_SPE4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), + (SELECT_SPE4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)), + (SELECT_SPE4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), + (SELECT_SPE4 (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), + (SELECT_SPE (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)), + (SELECT_SPE (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), + (SELECT_SPE (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)), + (SELECT_SPE (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), + (SELECT_SPE (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), + (SELECT_SPE (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)), + (SELECT_SPE (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), + (SELECT_SPE (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)), + (SELECT_SPE (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), + (SELECT_SPE (CRXOR $lhs, $rhs), $tval, $fval)>; +} diff --git a/capstone/suite/synctools/tablegen/PPC/PPCInstrVSX.td b/capstone/suite/synctools/tablegen/PPC/PPCInstrVSX.td new file mode 100644 index 000000000..781a32774 --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCInstrVSX.td @@ -0,0 +1,4007 @@ +//===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the VSX extension to the PowerPC instruction set. +// +//===----------------------------------------------------------------------===// + +// *********************************** NOTE *********************************** +// ** For POWER8 Little Endian, the VSX swap optimization relies on knowing ** +// ** which VMX and VSX instructions are lane-sensitive and which are not. ** +// ** A lane-sensitive instruction relies, implicitly or explicitly, on ** +// ** whether lanes are numbered from left to right. An instruction like ** +// ** VADDFP is not lane-sensitive, because each lane of the result vector ** +// ** relies only on the corresponding lane of the source vectors. However, ** +// ** an instruction like VMULESB is lane-sensitive, because "even" and ** +// ** "odd" lanes are different for big-endian and little-endian numbering. ** +// ** ** +// ** When adding new VMX and VSX instructions, please consider whether they ** +// ** are lane-sensitive. If so, they must be added to a switch statement ** +// ** in PPCVSXSwapRemoval::gatherVectorInstructions(). ** +// **************************************************************************** + +def PPCRegVSRCAsmOperand : AsmOperandClass { + let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber"; +} +def vsrc : RegisterOperand<VSRC> { + let ParserMatchClass = PPCRegVSRCAsmOperand; +} + +def PPCRegVSFRCAsmOperand : AsmOperandClass { + let Name = "RegVSFRC"; let PredicateMethod = "isVSRegNumber"; +} +def vsfrc : RegisterOperand<VSFRC> { + let ParserMatchClass = PPCRegVSFRCAsmOperand; +} + +def PPCRegVSSRCAsmOperand : AsmOperandClass { + let Name = "RegVSSRC"; let PredicateMethod = "isVSRegNumber"; +} +def vssrc : RegisterOperand<VSSRC> { + let ParserMatchClass = PPCRegVSSRCAsmOperand; +} + +def PPCRegSPILLTOVSRRCAsmOperand : AsmOperandClass { + let Name = "RegSPILLTOVSRRC"; let PredicateMethod = "isVSRegNumber"; +} + +def spilltovsrrc : RegisterOperand<SPILLTOVSRRC> { + let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand; +} +// Little-endian-specific nodes. +def SDT_PPClxvd2x : SDTypeProfile<1, 1, [ + SDTCisVT<0, v2f64>, SDTCisPtrTy<1> +]>; +def SDT_PPCstxvd2x : SDTypeProfile<0, 2, [ + SDTCisVT<0, v2f64>, SDTCisPtrTy<1> +]>; +def SDT_PPCxxswapd : SDTypeProfile<1, 1, [ + SDTCisSameAs<0, 1> +]>; +def SDTVecConv : SDTypeProfile<1, 2, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2> +]>; + +def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x, + [SDNPHasChain, SDNPMayStore]>; +def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>; +def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; +def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; +def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>; +def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>; +def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>; +def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>; + +multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase, + string asmstr, InstrItinClass itin, Intrinsic Int, + ValueType OutTy, ValueType InTy> { + let BaseName = asmbase in { + def NAME : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + [(set OutTy:$XT, (Int InTy:$XA, InTy:$XB))]>; + let Defs = [CR6] in + def o : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + [(set InTy:$XT, + (InTy (PPCvcmp_o InTy:$XA, InTy:$XB, xo)))]>, + isDOT; + } +} + +// Instruction form with a single input register for instructions such as +// XXPERMDI. The reason for defining this is that specifying multiple chained +// operands (such as loads) to an instruction will perform both chained +// operations rather than coalescing them into a single register - even though +// the source memory location is the same. This simply forces the instruction +// to use the same register for both inputs. +// For example, an output DAG such as this: +// (XXPERMDI (LXSIBZX xoaddr:$src), (LXSIBZX xoaddr:$src ), 0)) +// would result in two load instructions emitted and used as separate inputs +// to the XXPERMDI instruction. +class XX3Form_2s<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XX3Form_2<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let XB = XA; +} + +def HasVSX : Predicate<"PPCSubTarget->hasVSX()">; +def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">; +def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">; +def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">; + +let Predicates = [HasVSX] in { +let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. +let UseVSXReg = 1 in { +let hasSideEffects = 0 in { // VSX instructions don't have side effects. +let Uses = [RM] in { + + // Load indexed instructions + let mayLoad = 1, mayStore = 0 in { + let CodeSize = 3 in + def LXSDX : XX1Form_memOp<31, 588, + (outs vsfrc:$XT), (ins memrr:$src), + "lxsdx $XT, $src", IIC_LdStLFD, + []>; + + // Pseudo instruction XFLOADf64 will be expanded to LXSDX or LFDX later + let isPseudo = 1, CodeSize = 3 in + def XFLOADf64 : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), + "#XFLOADf64", + [(set f64:$XT, (load xoaddr:$src))]>; + + let Predicates = [HasVSX, HasOnlySwappingMemOps] in + def LXVD2X : XX1Form_memOp<31, 844, + (outs vsrc:$XT), (ins memrr:$src), + "lxvd2x $XT, $src", IIC_LdStLFD, + [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>; + + def LXVDSX : XX1Form_memOp<31, 332, + (outs vsrc:$XT), (ins memrr:$src), + "lxvdsx $XT, $src", IIC_LdStLFD, []>; + + let Predicates = [HasVSX, HasOnlySwappingMemOps] in + def LXVW4X : XX1Form_memOp<31, 780, + (outs vsrc:$XT), (ins memrr:$src), + "lxvw4x $XT, $src", IIC_LdStLFD, + []>; + } // mayLoad + + // Store indexed instructions + let mayStore = 1, mayLoad = 0 in { + let CodeSize = 3 in + def STXSDX : XX1Form_memOp<31, 716, + (outs), (ins vsfrc:$XT, memrr:$dst), + "stxsdx $XT, $dst", IIC_LdStSTFD, + []>; + + // Pseudo instruction XFSTOREf64 will be expanded to STXSDX or STFDX later + let isPseudo = 1, CodeSize = 3 in + def XFSTOREf64 : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst), + "#XFSTOREf64", + [(store f64:$XT, xoaddr:$dst)]>; + + let Predicates = [HasVSX, HasOnlySwappingMemOps] in { + // The behaviour of this instruction is endianness-specific so we provide no + // pattern to match it without considering endianness. + def STXVD2X : XX1Form_memOp<31, 972, + (outs), (ins vsrc:$XT, memrr:$dst), + "stxvd2x $XT, $dst", IIC_LdStSTFD, + []>; + + def STXVW4X : XX1Form_memOp<31, 908, + (outs), (ins vsrc:$XT, memrr:$dst), + "stxvw4x $XT, $dst", IIC_LdStSTFD, + []>; + } + } // mayStore + + // Add/Mul Instructions + let isCommutable = 1 in { + def XSADDDP : XX3Form<60, 32, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsadddp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fadd f64:$XA, f64:$XB))]>; + def XSMULDP : XX3Form<60, 48, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsmuldp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fmul f64:$XA, f64:$XB))]>; + + def XVADDDP : XX3Form<60, 96, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvadddp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fadd v2f64:$XA, v2f64:$XB))]>; + + def XVADDSP : XX3Form<60, 64, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvaddsp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fadd v4f32:$XA, v4f32:$XB))]>; + + def XVMULDP : XX3Form<60, 112, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmuldp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fmul v2f64:$XA, v2f64:$XB))]>; + + def XVMULSP : XX3Form<60, 80, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmulsp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fmul v4f32:$XA, v4f32:$XB))]>; + } + + // Subtract Instructions + def XSSUBDP : XX3Form<60, 40, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xssubdp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fsub f64:$XA, f64:$XB))]>; + + def XVSUBDP : XX3Form<60, 104, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvsubdp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fsub v2f64:$XA, v2f64:$XB))]>; + def XVSUBSP : XX3Form<60, 72, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvsubsp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fsub v4f32:$XA, v4f32:$XB))]>; + + // FMA Instructions + let BaseName = "XSMADDADP" in { + let isCommutable = 1 in + def XSMADDADP : XX3Form<60, 33, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsmaddadp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSMADDMDP : XX3Form<60, 41, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSMSUBADP" in { + let isCommutable = 1 in + def XSMSUBADP : XX3Form<60, 49, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsmsubadp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSMSUBMDP : XX3Form<60, 57, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSNMADDADP" in { + let isCommutable = 1 in + def XSNMADDADP : XX3Form<60, 161, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsnmaddadp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSNMADDMDP : XX3Form<60, 169, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSNMSUBADP" in { + let isCommutable = 1 in + def XSNMSUBADP : XX3Form<60, 177, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsnmsubadp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSNMSUBMDP : XX3Form<60, 185, + (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), + "xsnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVMADDADP" in { + let isCommutable = 1 in + def XVMADDADP : XX3Form<60, 97, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmaddadp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVMADDMDP : XX3Form<60, 105, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVMADDASP" in { + let isCommutable = 1 in + def XVMADDASP : XX3Form<60, 65, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmaddasp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVMADDMSP : XX3Form<60, 73, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVMSUBADP" in { + let isCommutable = 1 in + def XVMSUBADP : XX3Form<60, 113, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmsubadp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVMSUBMDP : XX3Form<60, 121, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVMSUBASP" in { + let isCommutable = 1 in + def XVMSUBASP : XX3Form<60, 81, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmsubasp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVMSUBMSP : XX3Form<60, 89, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVNMADDADP" in { + let isCommutable = 1 in + def XVNMADDADP : XX3Form<60, 225, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmaddadp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVNMADDMDP : XX3Form<60, 233, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVNMADDASP" in { + let isCommutable = 1 in + def XVNMADDASP : XX3Form<60, 193, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmaddasp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVNMADDMSP : XX3Form<60, 201, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVNMSUBADP" in { + let isCommutable = 1 in + def XVNMSUBADP : XX3Form<60, 241, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmsubadp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVNMSUBMDP : XX3Form<60, 249, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XVNMSUBASP" in { + let isCommutable = 1 in + def XVNMSUBASP : XX3Form<60, 209, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmsubasp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XVNMSUBMSP : XX3Form<60, 217, + (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), + "xvnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + // Division Instructions + def XSDIVDP : XX3Form<60, 56, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsdivdp $XT, $XA, $XB", IIC_FPDivD, + [(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>; + def XSSQRTDP : XX2Form<60, 75, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xssqrtdp $XT, $XB", IIC_FPSqrtD, + [(set f64:$XT, (fsqrt f64:$XB))]>; + + def XSREDP : XX2Form<60, 90, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsredp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfre f64:$XB))]>; + def XSRSQRTEDP : XX2Form<60, 74, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrsqrtedp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfrsqrte f64:$XB))]>; + + def XSTDIVDP : XX3Form_1<60, 61, + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), + "xstdivdp $crD, $XA, $XB", IIC_FPCompare, []>; + def XSTSQRTDP : XX2Form_1<60, 106, + (outs crrc:$crD), (ins vsfrc:$XB), + "xstsqrtdp $crD, $XB", IIC_FPCompare, []>; + + def XVDIVDP : XX3Form<60, 120, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvdivdp $XT, $XA, $XB", IIC_FPDivD, + [(set v2f64:$XT, (fdiv v2f64:$XA, v2f64:$XB))]>; + def XVDIVSP : XX3Form<60, 88, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvdivsp $XT, $XA, $XB", IIC_FPDivS, + [(set v4f32:$XT, (fdiv v4f32:$XA, v4f32:$XB))]>; + + def XVSQRTDP : XX2Form<60, 203, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvsqrtdp $XT, $XB", IIC_FPSqrtD, + [(set v2f64:$XT, (fsqrt v2f64:$XB))]>; + def XVSQRTSP : XX2Form<60, 139, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvsqrtsp $XT, $XB", IIC_FPSqrtS, + [(set v4f32:$XT, (fsqrt v4f32:$XB))]>; + + def XVTDIVDP : XX3Form_1<60, 125, + (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), + "xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>; + def XVTDIVSP : XX3Form_1<60, 93, + (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), + "xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>; + + def XVTSQRTDP : XX2Form_1<60, 234, + (outs crrc:$crD), (ins vsrc:$XB), + "xvtsqrtdp $crD, $XB", IIC_FPCompare, []>; + def XVTSQRTSP : XX2Form_1<60, 170, + (outs crrc:$crD), (ins vsrc:$XB), + "xvtsqrtsp $crD, $XB", IIC_FPCompare, []>; + + def XVREDP : XX2Form<60, 218, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvredp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (PPCfre v2f64:$XB))]>; + def XVRESP : XX2Form<60, 154, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvresp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (PPCfre v4f32:$XB))]>; + + def XVRSQRTEDP : XX2Form<60, 202, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrsqrtedp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (PPCfrsqrte v2f64:$XB))]>; + def XVRSQRTESP : XX2Form<60, 138, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrsqrtesp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (PPCfrsqrte v4f32:$XB))]>; + + // Compare Instructions + def XSCMPODP : XX3Form_1<60, 43, + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), + "xscmpodp $crD, $XA, $XB", IIC_FPCompare, []>; + def XSCMPUDP : XX3Form_1<60, 35, + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), + "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>; + + defm XVCMPEQDP : XX3Form_Rcr<60, 99, + "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, + int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>; + defm XVCMPEQSP : XX3Form_Rcr<60, 67, + "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare, + int_ppc_vsx_xvcmpeqsp, v4i32, v4f32>; + defm XVCMPGEDP : XX3Form_Rcr<60, 115, + "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare, + int_ppc_vsx_xvcmpgedp, v2i64, v2f64>; + defm XVCMPGESP : XX3Form_Rcr<60, 83, + "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare, + int_ppc_vsx_xvcmpgesp, v4i32, v4f32>; + defm XVCMPGTDP : XX3Form_Rcr<60, 107, + "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare, + int_ppc_vsx_xvcmpgtdp, v2i64, v2f64>; + defm XVCMPGTSP : XX3Form_Rcr<60, 75, + "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare, + int_ppc_vsx_xvcmpgtsp, v4i32, v4f32>; + + // Move Instructions + def XSABSDP : XX2Form<60, 345, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsabsdp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (fabs f64:$XB))]>; + def XSNABSDP : XX2Form<60, 361, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsnabsdp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (fneg (fabs f64:$XB)))]>; + def XSNEGDP : XX2Form<60, 377, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsnegdp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (fneg f64:$XB))]>; + def XSCPSGNDP : XX3Form<60, 176, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xscpsgndp $XT, $XA, $XB", IIC_VecFP, + [(set f64:$XT, (fcopysign f64:$XB, f64:$XA))]>; + + def XVABSDP : XX2Form<60, 473, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvabsdp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (fabs v2f64:$XB))]>; + + def XVABSSP : XX2Form<60, 409, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvabssp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (fabs v4f32:$XB))]>; + + def XVCPSGNDP : XX3Form<60, 240, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcpsgndp $XT, $XA, $XB", IIC_VecFP, + [(set v2f64:$XT, (fcopysign v2f64:$XB, v2f64:$XA))]>; + def XVCPSGNSP : XX3Form<60, 208, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcpsgnsp $XT, $XA, $XB", IIC_VecFP, + [(set v4f32:$XT, (fcopysign v4f32:$XB, v4f32:$XA))]>; + + def XVNABSDP : XX2Form<60, 489, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvnabsdp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (fneg (fabs v2f64:$XB)))]>; + def XVNABSSP : XX2Form<60, 425, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvnabssp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (fneg (fabs v4f32:$XB)))]>; + + def XVNEGDP : XX2Form<60, 505, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvnegdp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (fneg v2f64:$XB))]>; + def XVNEGSP : XX2Form<60, 441, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvnegsp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (fneg v4f32:$XB))]>; + + // Conversion Instructions + def XSCVDPSP : XX2Form<60, 265, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpsp $XT, $XB", IIC_VecFP, []>; + def XSCVDPSXDS : XX2Form<60, 344, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpsxds $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfctidz f64:$XB))]>; + let isCodeGenOnly = 1 in + def XSCVDPSXDSs : XX2Form<60, 344, + (outs vssrc:$XT), (ins vssrc:$XB), + "xscvdpsxds $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfctidz f32:$XB))]>; + def XSCVDPSXWS : XX2Form<60, 88, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpsxws $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfctiwz f64:$XB))]>; + let isCodeGenOnly = 1 in + def XSCVDPSXWSs : XX2Form<60, 88, + (outs vssrc:$XT), (ins vssrc:$XB), + "xscvdpsxws $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfctiwz f32:$XB))]>; + def XSCVDPUXDS : XX2Form<60, 328, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpuxds $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfctiduz f64:$XB))]>; + let isCodeGenOnly = 1 in + def XSCVDPUXDSs : XX2Form<60, 328, + (outs vssrc:$XT), (ins vssrc:$XB), + "xscvdpuxds $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfctiduz f32:$XB))]>; + def XSCVDPUXWS : XX2Form<60, 72, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpuxws $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfctiwuz f64:$XB))]>; + let isCodeGenOnly = 1 in + def XSCVDPUXWSs : XX2Form<60, 72, + (outs vssrc:$XT), (ins vssrc:$XB), + "xscvdpuxws $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfctiwuz f32:$XB))]>; + def XSCVSPDP : XX2Form<60, 329, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvspdp $XT, $XB", IIC_VecFP, []>; + def XSCVSXDDP : XX2Form<60, 376, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvsxddp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfcfid f64:$XB))]>; + def XSCVUXDDP : XX2Form<60, 360, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvuxddp $XT, $XB", IIC_VecFP, + [(set f64:$XT, (PPCfcfidu f64:$XB))]>; + + def XVCVDPSP : XX2Form<60, 393, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpsp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (int_ppc_vsx_xvcvdpsp v2f64:$XB))]>; + def XVCVDPSXDS : XX2Form<60, 472, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpsxds $XT, $XB", IIC_VecFP, + [(set v2i64:$XT, (fp_to_sint v2f64:$XB))]>; + def XVCVDPSXWS : XX2Form<60, 216, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpsxws $XT, $XB", IIC_VecFP, + [(set v4i32:$XT, (int_ppc_vsx_xvcvdpsxws v2f64:$XB))]>; + def XVCVDPUXDS : XX2Form<60, 456, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpuxds $XT, $XB", IIC_VecFP, + [(set v2i64:$XT, (fp_to_uint v2f64:$XB))]>; + def XVCVDPUXWS : XX2Form<60, 200, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpuxws $XT, $XB", IIC_VecFP, + [(set v4i32:$XT, (int_ppc_vsx_xvcvdpuxws v2f64:$XB))]>; + + def XVCVSPDP : XX2Form<60, 457, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvspdp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (int_ppc_vsx_xvcvspdp v4f32:$XB))]>; + def XVCVSPSXDS : XX2Form<60, 408, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvspsxds $XT, $XB", IIC_VecFP, []>; + def XVCVSPSXWS : XX2Form<60, 152, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvspsxws $XT, $XB", IIC_VecFP, + [(set v4i32:$XT, (fp_to_sint v4f32:$XB))]>; + def XVCVSPUXDS : XX2Form<60, 392, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvspuxds $XT, $XB", IIC_VecFP, []>; + def XVCVSPUXWS : XX2Form<60, 136, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvspuxws $XT, $XB", IIC_VecFP, + [(set v4i32:$XT, (fp_to_uint v4f32:$XB))]>; + def XVCVSXDDP : XX2Form<60, 504, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvsxddp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (sint_to_fp v2i64:$XB))]>; + def XVCVSXDSP : XX2Form<60, 440, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvsxdsp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (int_ppc_vsx_xvcvsxdsp v2i64:$XB))]>; + def XVCVSXWDP : XX2Form<60, 248, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvsxwdp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (int_ppc_vsx_xvcvsxwdp v4i32:$XB))]>; + def XVCVSXWSP : XX2Form<60, 184, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvsxwsp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (sint_to_fp v4i32:$XB))]>; + def XVCVUXDDP : XX2Form<60, 488, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvuxddp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (uint_to_fp v2i64:$XB))]>; + def XVCVUXDSP : XX2Form<60, 424, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvuxdsp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (int_ppc_vsx_xvcvuxdsp v2i64:$XB))]>; + def XVCVUXWDP : XX2Form<60, 232, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvuxwdp $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (int_ppc_vsx_xvcvuxwdp v4i32:$XB))]>; + def XVCVUXWSP : XX2Form<60, 168, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvuxwsp $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (uint_to_fp v4i32:$XB))]>; + + // Rounding Instructions + def XSRDPI : XX2Form<60, 73, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrdpi $XT, $XB", IIC_VecFP, + [(set f64:$XT, (fround f64:$XB))]>; + def XSRDPIC : XX2Form<60, 107, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrdpic $XT, $XB", IIC_VecFP, + [(set f64:$XT, (fnearbyint f64:$XB))]>; + def XSRDPIM : XX2Form<60, 121, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrdpim $XT, $XB", IIC_VecFP, + [(set f64:$XT, (ffloor f64:$XB))]>; + def XSRDPIP : XX2Form<60, 105, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrdpip $XT, $XB", IIC_VecFP, + [(set f64:$XT, (fceil f64:$XB))]>; + def XSRDPIZ : XX2Form<60, 89, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xsrdpiz $XT, $XB", IIC_VecFP, + [(set f64:$XT, (ftrunc f64:$XB))]>; + + def XVRDPI : XX2Form<60, 201, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrdpi $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (fround v2f64:$XB))]>; + def XVRDPIC : XX2Form<60, 235, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrdpic $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>; + def XVRDPIM : XX2Form<60, 249, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrdpim $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (ffloor v2f64:$XB))]>; + def XVRDPIP : XX2Form<60, 233, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrdpip $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (fceil v2f64:$XB))]>; + def XVRDPIZ : XX2Form<60, 217, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrdpiz $XT, $XB", IIC_VecFP, + [(set v2f64:$XT, (ftrunc v2f64:$XB))]>; + + def XVRSPI : XX2Form<60, 137, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrspi $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (fround v4f32:$XB))]>; + def XVRSPIC : XX2Form<60, 171, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrspic $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>; + def XVRSPIM : XX2Form<60, 185, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrspim $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (ffloor v4f32:$XB))]>; + def XVRSPIP : XX2Form<60, 169, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrspip $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (fceil v4f32:$XB))]>; + def XVRSPIZ : XX2Form<60, 153, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvrspiz $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (ftrunc v4f32:$XB))]>; + + // Max/Min Instructions + let isCommutable = 1 in { + def XSMAXDP : XX3Form<60, 160, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsmaxdp $XT, $XA, $XB", IIC_VecFP, + [(set vsfrc:$XT, + (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>; + def XSMINDP : XX3Form<60, 168, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xsmindp $XT, $XA, $XB", IIC_VecFP, + [(set vsfrc:$XT, + (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>; + + def XVMAXDP : XX3Form<60, 224, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmaxdp $XT, $XA, $XB", IIC_VecFP, + [(set vsrc:$XT, + (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>; + def XVMINDP : XX3Form<60, 232, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmindp $XT, $XA, $XB", IIC_VecFP, + [(set vsrc:$XT, + (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>; + + def XVMAXSP : XX3Form<60, 192, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvmaxsp $XT, $XA, $XB", IIC_VecFP, + [(set vsrc:$XT, + (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>; + def XVMINSP : XX3Form<60, 200, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvminsp $XT, $XA, $XB", IIC_VecFP, + [(set vsrc:$XT, + (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>; + } // isCommutable +} // Uses = [RM] + + // Logical Instructions + let isCommutable = 1 in + def XXLAND : XX3Form<60, 130, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxland $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (and v4i32:$XA, v4i32:$XB))]>; + def XXLANDC : XX3Form<60, 138, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlandc $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (and v4i32:$XA, + (vnot_ppc v4i32:$XB)))]>; + let isCommutable = 1 in { + def XXLNOR : XX3Form<60, 162, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlnor $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (vnot_ppc (or v4i32:$XA, + v4i32:$XB)))]>; + def XXLOR : XX3Form<60, 146, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlor $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (or v4i32:$XA, v4i32:$XB))]>; + let isCodeGenOnly = 1 in + def XXLORf: XX3Form<60, 146, + (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), + "xxlor $XT, $XA, $XB", IIC_VecGeneral, []>; + def XXLXOR : XX3Form<60, 154, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlxor $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>; + } // isCommutable + let isCodeGenOnly = 1 in + def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins), + "xxlxor $XT, $XT, $XT", IIC_VecGeneral, + [(set v4i32:$XT, (v4i32 immAllZerosV))]>; + + let isCodeGenOnly = 1 in { + def XXLXORdpz : XX3Form_SetZero<60, 154, + (outs vsfrc:$XT), (ins), + "xxlxor $XT, $XT, $XT", IIC_VecGeneral, + [(set f64:$XT, (fpimm0))]>; + def XXLXORspz : XX3Form_SetZero<60, 154, + (outs vssrc:$XT), (ins), + "xxlxor $XT, $XT, $XT", IIC_VecGeneral, + [(set f32:$XT, (fpimm0))]>; + } + + // Permutation Instructions + def XXMRGHW : XX3Form<60, 18, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxmrghw $XT, $XA, $XB", IIC_VecPerm, []>; + def XXMRGLW : XX3Form<60, 50, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxmrglw $XT, $XA, $XB", IIC_VecPerm, []>; + + def XXPERMDI : XX3Form_2<60, 10, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM), + "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, + [(set v2i64:$XT, (PPCxxpermdi v2i64:$XA, v2i64:$XB, + imm32SExt16:$DM))]>; + let isCodeGenOnly = 1 in + def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM), + "xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>; + def XXSEL : XX4Form<60, 3, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC), + "xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>; + + def XXSLDWI : XX3Form_2<60, 2, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW), + "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, + [(set v4i32:$XT, (PPCvecshl v4i32:$XA, v4i32:$XB, + imm32SExt16:$SHW))]>; + + let isCodeGenOnly = 1 in + def XXSLDWIs : XX3Form_2s<60, 2, + (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$SHW), + "xxsldwi $XT, $XA, $XA, $SHW", IIC_VecPerm, []>; + + def XXSPLTW : XX2Form_2<60, 164, + (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), + "xxspltw $XT, $XB, $UIM", IIC_VecPerm, + [(set v4i32:$XT, + (PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>; + let isCodeGenOnly = 1 in + def XXSPLTWs : XX2Form_2<60, 164, + (outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM), + "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; + +} // hasSideEffects +} // UseVSXReg = 1 + +// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after +// instruction selection into a branch sequence. +let usesCustomInserter = 1, // Expanded after instruction selection. + PPC970_Single = 1 in { + + def SELECT_CC_VSRC: Pseudo<(outs vsrc:$dst), + (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC), + "#SELECT_CC_VSRC", + []>; + def SELECT_VSRC: Pseudo<(outs vsrc:$dst), + (ins crbitrc:$cond, vsrc:$T, vsrc:$F), + "#SELECT_VSRC", + [(set v2f64:$dst, + (select i1:$cond, v2f64:$T, v2f64:$F))]>; + def SELECT_CC_VSFRC: Pseudo<(outs f8rc:$dst), + (ins crrc:$cond, f8rc:$T, f8rc:$F, + i32imm:$BROPC), "#SELECT_CC_VSFRC", + []>; + def SELECT_VSFRC: Pseudo<(outs f8rc:$dst), + (ins crbitrc:$cond, f8rc:$T, f8rc:$F), + "#SELECT_VSFRC", + [(set f64:$dst, + (select i1:$cond, f64:$T, f64:$F))]>; + def SELECT_CC_VSSRC: Pseudo<(outs f4rc:$dst), + (ins crrc:$cond, f4rc:$T, f4rc:$F, + i32imm:$BROPC), "#SELECT_CC_VSSRC", + []>; + def SELECT_VSSRC: Pseudo<(outs f4rc:$dst), + (ins crbitrc:$cond, f4rc:$T, f4rc:$F), + "#SELECT_VSSRC", + [(set f32:$dst, + (select i1:$cond, f32:$T, f32:$F))]>; +} // usesCustomInserter +} // AddedComplexity + +def : InstAlias<"xvmovdp $XT, $XB", + (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>; +def : InstAlias<"xvmovsp $XT, $XB", + (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>; + +def : InstAlias<"xxspltd $XT, $XB, 0", + (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>; +def : InstAlias<"xxspltd $XT, $XB, 1", + (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>; +def : InstAlias<"xxmrghd $XT, $XA, $XB", + (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>; +def : InstAlias<"xxmrgld $XT, $XA, $XB", + (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>; +def : InstAlias<"xxswapd $XT, $XB", + (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>; +def : InstAlias<"xxspltd $XT, $XB, 0", + (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>; +def : InstAlias<"xxspltd $XT, $XB, 1", + (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>; +def : InstAlias<"xxswapd $XT, $XB", + (XXPERMDIs vsrc:$XT, vsfrc:$XB, 2)>; + +let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. + +def : Pat<(v4i32 (vnot_ppc v4i32:$A)), + (v4i32 (XXLNOR $A, $A))>; +let Predicates = [IsBigEndian] in { +def : Pat<(v2f64 (scalar_to_vector f64:$A)), + (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>; + +def : Pat<(f64 (extractelt v2f64:$S, 0)), + (f64 (EXTRACT_SUBREG $S, sub_64))>; +def : Pat<(f64 (extractelt v2f64:$S, 1)), + (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>; +} + +let Predicates = [IsLittleEndian] in { +def : Pat<(v2f64 (scalar_to_vector f64:$A)), + (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64), + (SUBREG_TO_REG (i64 1), $A, sub_64), 0))>; + +def : Pat<(f64 (extractelt v2f64:$S, 0)), + (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>; +def : Pat<(f64 (extractelt v2f64:$S, 1)), + (f64 (EXTRACT_SUBREG $S, sub_64))>; +} + +// Additional fnmsub patterns: -a*c + b == -(a*c - b) +def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), + (XSNMSUBADP $B, $C, $A)>; +def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B), + (XSNMSUBADP $B, $C, $A)>; + +def : Pat<(fma (fneg v2f64:$A), v2f64:$C, v2f64:$B), + (XVNMSUBADP $B, $C, $A)>; +def : Pat<(fma v2f64:$A, (fneg v2f64:$C), v2f64:$B), + (XVNMSUBADP $B, $C, $A)>; + +def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B), + (XVNMSUBASP $B, $C, $A)>; +def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B), + (XVNMSUBASP $B, $C, $A)>; + +def : Pat<(v2f64 (bitconvert v4f32:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2f64 (bitconvert v4i32:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2f64 (bitconvert v8i16:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2f64 (bitconvert v16i8:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; + +def : Pat<(v4f32 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v4i32 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v8i16 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v16i8 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + +def : Pat<(v2i64 (bitconvert v4f32:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2i64 (bitconvert v4i32:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2i64 (bitconvert v8i16:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; +def : Pat<(v2i64 (bitconvert v16i8:$A)), + (COPY_TO_REGCLASS $A, VSRC)>; + +def : Pat<(v4f32 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v4i32 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v8i16 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v16i8 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + +def : Pat<(v2f64 (bitconvert v2i64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v2i64 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + +def : Pat<(v2f64 (bitconvert v1i128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v1i128 (bitconvert v2f64:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + +// sign extension patterns +// To extend "in place" from v2i32 to v2i64, we have input data like: +// | undef | i32 | undef | i32 | +// but xvcvsxwdp expects the input in big-Endian format: +// | i32 | undef | i32 | undef | +// so we need to shift everything to the left by one i32 (word) before +// the conversion. +def : Pat<(sext_inreg v2i64:$C, v2i32), + (XVCVDPSXDS (XVCVSXWDP (XXSLDWI $C, $C, 1)))>; +def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))), + (XVCVSXWDP (XXSLDWI $C, $C, 1))>; + +def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)), + (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>; +def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 1)), + (v2f64 (XVCVSXWDP (v2i64 (XXMRGLW $C, $C))))>; + +def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)), + (v2f64 (XVCVUXWDP (v2i64 (XXMRGHW $C, $C))))>; +def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)), + (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>; + +// Loads. +let Predicates = [HasVSX, HasOnlySwappingMemOps] in { + def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>; + + // Stores. + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), + (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst), + (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst), + (STXVW4X $rS, xoaddr:$dst)>; + def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; +} +let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in { + def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; + def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; + def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>; + def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), + (STXVW4X $rS, xoaddr:$dst)>; +} + +// Permutes. +def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>; +def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>; +def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>; +def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>; +def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>; + +// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and +// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable. +def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>; + +// Selects. +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), + (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)), + (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)), + (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)), + (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)), + (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)), + (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)), + (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)), + (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)), + (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)), + (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), + (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)), + (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), + (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)), + (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), + (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), + (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)), + (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), + (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)), + (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), + (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +// Divides. +def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B), + (XVDIVSP $A, $B)>; +def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B), + (XVDIVDP $A, $B)>; + +// Reciprocal estimate +def : Pat<(int_ppc_vsx_xvresp v4f32:$A), + (XVRESP $A)>; +def : Pat<(int_ppc_vsx_xvredp v2f64:$A), + (XVREDP $A)>; + +// Recip. square root estimate +def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A), + (XVRSQRTESP $A)>; +def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A), + (XVRSQRTEDP $A)>; + +let Predicates = [IsLittleEndian] in { +def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; +def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f64 (XSCVSXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; +def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; +def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f64 (XSCVUXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; +} // IsLittleEndian + +let Predicates = [IsBigEndian] in { +def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f64 (XSCVSXDDP (COPY_TO_REGCLASS $S, VSFRC)))>; +def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; +def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), + (f64 (XSCVUXDDP (COPY_TO_REGCLASS $S, VSFRC)))>; +def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), + (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; +} // IsBigEndian + +} // AddedComplexity +} // HasVSX + +def ScalarLoads { + dag Li8 = (i32 (extloadi8 xoaddr:$src)); + dag ZELi8 = (i32 (zextloadi8 xoaddr:$src)); + dag ZELi8i64 = (i64 (zextloadi8 xoaddr:$src)); + dag SELi8 = (i32 (sext_inreg (extloadi8 xoaddr:$src), i8)); + dag SELi8i64 = (i64 (sext_inreg (extloadi8 xoaddr:$src), i8)); + + dag Li16 = (i32 (extloadi16 xoaddr:$src)); + dag ZELi16 = (i32 (zextloadi16 xoaddr:$src)); + dag ZELi16i64 = (i64 (zextloadi16 xoaddr:$src)); + dag SELi16 = (i32 (sextloadi16 xoaddr:$src)); + dag SELi16i64 = (i64 (sextloadi16 xoaddr:$src)); + + dag Li32 = (i32 (load xoaddr:$src)); +} + +// The following VSX instructions were introduced in Power ISA 2.07 +/* FIXME: if the operands are v2i64, these patterns will not match. + we should define new patterns or otherwise match the same patterns + when the elements are larger than i32. +*/ +def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">; +def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">; +def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">; +let Predicates = [HasP8Vector] in { +let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. + let isCommutable = 1, UseVSXReg = 1 in { + def XXLEQV : XX3Form<60, 186, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxleqv $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (vnot_ppc (xor v4i32:$XA, v4i32:$XB)))]>; + def XXLNAND : XX3Form<60, 178, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlnand $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA, + v4i32:$XB)))]>; + } // isCommutable, UseVSXReg + + def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B), + (XXLEQV $A, $B)>; + + let UseVSXReg = 1 in { + def XXLORC : XX3Form<60, 170, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlorc $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>; + + // VSX scalar loads introduced in ISA 2.07 + let mayLoad = 1, mayStore = 0 in { + let CodeSize = 3 in + def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins memrr:$src), + "lxsspx $XT, $src", IIC_LdStLFD, []>; + def LXSIWAX : XX1Form_memOp<31, 76, (outs vsfrc:$XT), (ins memrr:$src), + "lxsiwax $XT, $src", IIC_LdStLFD, []>; + def LXSIWZX : XX1Form_memOp<31, 12, (outs vsfrc:$XT), (ins memrr:$src), + "lxsiwzx $XT, $src", IIC_LdStLFD, []>; + + // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it + // would cause these Pseudos are not expanded in expandPostRAPseudos() + let isPseudo = 1 in { + // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later + let CodeSize = 3 in + def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src), + "#XFLOADf32", + [(set f32:$XT, (load xoaddr:$src))]>; + // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later + def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), + "#LIWAX", + [(set f64:$XT, (PPClfiwax xoaddr:$src))]>; + // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later + def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), + "#LIWZX", + [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; + } + } // mayLoad + + // VSX scalar stores introduced in ISA 2.07 + let mayStore = 1, mayLoad = 0 in { + let CodeSize = 3 in + def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, memrr:$dst), + "stxsspx $XT, $dst", IIC_LdStSTFD, []>; + def STXSIWX : XX1Form_memOp<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst), + "stxsiwx $XT, $dst", IIC_LdStSTFD, []>; + + // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it + // would cause these Pseudos are not expanded in expandPostRAPseudos() + let isPseudo = 1 in { + // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later + let CodeSize = 3 in + def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst), + "#XFSTOREf32", + [(store f32:$XT, xoaddr:$dst)]>; + // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later + def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst), + "#STIWX", + [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; + } + } // mayStore + } // UseVSXReg = 1 + + def : Pat<(f64 (extloadf32 xoaddr:$src)), + (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>; + def : Pat<(f32 (fpround (f64 (extloadf32 xoaddr:$src)))), + (f32 (XFLOADf32 xoaddr:$src))>; + def : Pat<(f64 (fpextend f32:$src)), + (COPY_TO_REGCLASS $src, VSFRC)>; + + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), + (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)), + (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), + (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)), + (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), + (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), + (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)), + (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), + (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)), + (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), + (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; + + let UseVSXReg = 1 in { + // VSX Elementary Scalar FP arithmetic (SP) + let isCommutable = 1 in { + def XSADDSP : XX3Form<60, 0, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xsaddsp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fadd f32:$XA, f32:$XB))]>; + def XSMULSP : XX3Form<60, 16, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xsmulsp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fmul f32:$XA, f32:$XB))]>; + } // isCommutable + + def XSDIVSP : XX3Form<60, 24, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xsdivsp $XT, $XA, $XB", IIC_FPDivS, + [(set f32:$XT, (fdiv f32:$XA, f32:$XB))]>; + def XSRESP : XX2Form<60, 26, + (outs vssrc:$XT), (ins vssrc:$XB), + "xsresp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfre f32:$XB))]>; + def XSRSP : XX2Form<60, 281, + (outs vssrc:$XT), (ins vsfrc:$XB), + "xsrsp $XT, $XB", IIC_VecFP, []>; + def XSSQRTSP : XX2Form<60, 11, + (outs vssrc:$XT), (ins vssrc:$XB), + "xssqrtsp $XT, $XB", IIC_FPSqrtS, + [(set f32:$XT, (fsqrt f32:$XB))]>; + def XSRSQRTESP : XX2Form<60, 10, + (outs vssrc:$XT), (ins vssrc:$XB), + "xsrsqrtesp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfrsqrte f32:$XB))]>; + def XSSUBSP : XX3Form<60, 8, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xssubsp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>; + + // FMA Instructions + let BaseName = "XSMADDASP" in { + let isCommutable = 1 in + def XSMADDASP : XX3Form<60, 1, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsmaddasp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSMADDMSP : XX3Form<60, 9, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSMSUBASP" in { + let isCommutable = 1 in + def XSMSUBASP : XX3Form<60, 17, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsmsubasp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fma f32:$XA, f32:$XB, + (fneg f32:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSMSUBMSP : XX3Form<60, 25, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSNMADDASP" in { + let isCommutable = 1 in + def XSNMADDASP : XX3Form<60, 129, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsnmaddasp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, + f32:$XTi)))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSNMADDMSP : XX3Form<60, 137, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + let BaseName = "XSNMSUBASP" in { + let isCommutable = 1 in + def XSNMSUBASP : XX3Form<60, 145, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsnmsubasp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, + (fneg f32:$XTi))))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + let IsVSXFMAAlt = 1 in + def XSNMSUBMSP : XX3Form<60, 153, + (outs vssrc:$XT), + (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), + "xsnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + AltVSXFMARel; + } + + // Single Precision Conversions (FP <-> INT) + def XSCVSXDSP : XX2Form<60, 312, + (outs vssrc:$XT), (ins vsfrc:$XB), + "xscvsxdsp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfcfids f64:$XB))]>; + def XSCVUXDSP : XX2Form<60, 296, + (outs vssrc:$XT), (ins vsfrc:$XB), + "xscvuxdsp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfcfidus f64:$XB))]>; + + // Conversions between vector and scalar single precision + def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB), + "xscvdpspn $XT, $XB", IIC_VecFP, []>; + def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB), + "xscvspdpn $XT, $XB", IIC_VecFP, []>; + } // UseVSXReg = 1 + + let Predicates = [IsLittleEndian] in { + def : Pat<(f32 (PPCfcfids + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), + (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; + def : Pat<(f32 (PPCfcfids + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), + (f32 (XSCVSXDSP (COPY_TO_REGCLASS + (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; + def : Pat<(f32 (PPCfcfidus + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), + (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; + def : Pat<(f32 (PPCfcfidus + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), + (f32 (XSCVUXDSP (COPY_TO_REGCLASS + (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; + } + + let Predicates = [IsBigEndian] in { + def : Pat<(f32 (PPCfcfids + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), + (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>; + def : Pat<(f32 (PPCfcfids + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), + (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; + def : Pat<(f32 (PPCfcfidus + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), + (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>; + def : Pat<(f32 (PPCfcfidus + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), + (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; + } + + // Instructions for converting float to i64 feeding a store. + let Predicates = [NoP9Vector] in { + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 8), + (STXSDX (XSCVDPSXDS f64:$src), xoaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 8), + (STXSDX (XSCVDPUXDS f64:$src), xoaddr:$dst)>; + } + + // Instructions for converting float to i32 feeding a store. + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 4), + (STIWX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4), + (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; + +} // AddedComplexity = 400 +} // HasP8Vector + +let UseVSXReg = 1, AddedComplexity = 400 in { +let Predicates = [HasDirectMove] in { + // VSX direct move instructions + def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT), + "mfvsrd $rA, $XT", IIC_VecGeneral, + [(set i64:$rA, (PPCmfvsr f64:$XT))]>, + Requires<[In64BitMode]>; + let isCodeGenOnly = 1 in + def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vrrc:$XT), + "mfvsrd $rA, $XT", IIC_VecGeneral, + []>, + Requires<[In64BitMode]>; + def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT), + "mfvsrwz $rA, $XT", IIC_VecGeneral, + [(set i32:$rA, (PPCmfvsr f64:$XT))]>; + def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA), + "mtvsrd $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsra i64:$rA))]>, + Requires<[In64BitMode]>; + def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA), + "mtvsrwa $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsra i32:$rA))]>; + def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA), + "mtvsrwz $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; +} // HasDirectMove + +let Predicates = [IsISA3_0, HasDirectMove] in { + def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA), + "mtvsrws $XT, $rA", IIC_VecGeneral, []>; + + def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$rA, g8rc:$rB), + "mtvsrdd $XT, $rA, $rB", IIC_VecGeneral, + []>, Requires<[In64BitMode]>; + + def MFVSRLD: XX1_RS6_RD5_XO<31, 307, (outs g8rc:$rA), (ins vsrc:$XT), + "mfvsrld $rA, $XT", IIC_VecGeneral, + []>, Requires<[In64BitMode]>; + +} // IsISA3_0, HasDirectMove +} // UseVSXReg = 1 + +// We want to parse this from asm, but we don't want to emit this as it would +// be emitted with a VSX reg. So leave Emit = 0 here. +def : InstAlias<"mfvrd $rA, $XT", + (MFVRD g8rc:$rA, vrrc:$XT), 0>; +def : InstAlias<"mffprd $rA, $src", + (MFVSRD g8rc:$rA, f8rc:$src)>; + +/* Direct moves of various widths from GPR's into VSR's. Each move lines + the value up into element 0 (both BE and LE). Namely, entities smaller than + a doubleword are shifted left and moved for BE. For LE, they're moved, then + swapped to go into the least significant element of the VSR. +*/ +def MovesToVSR { + dag BE_BYTE_0 = + (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7)); + dag BE_HALF_0 = + (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15)); + dag BE_WORD_0 = + (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31)); + dag BE_DWORD_0 = (MTVSRD $A); + + dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32)); + dag LE_WORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), + LE_MTVSRW, sub_64)); + dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2); + dag LE_DWORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), + BE_DWORD_0, sub_64)); + dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2); +} + +/* Patterns for extracting elements out of vectors. Integer elements are + extracted using direct move operations. Patterns for extracting elements + whose indices are not available at compile time are also provided with + various _VARIABLE_ patterns. + The numbering for the DAG's is for LE, but when used on BE, the correct + LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13). +*/ +def VectorExtractions { + // Doubleword extraction + dag LE_DWORD_0 = + (MFVSRD + (EXTRACT_SUBREG + (XXPERMDI (COPY_TO_REGCLASS $S, VSRC), + (COPY_TO_REGCLASS $S, VSRC), 2), sub_64)); + dag LE_DWORD_1 = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); + + // Word extraction + dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64)); + dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64)); + dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); + dag LE_WORD_3 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 3), sub_64)); + + // Halfword extraction + dag LE_HALF_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 48), sub_32)); + dag LE_HALF_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 48), sub_32)); + dag LE_HALF_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 48), sub_32)); + dag LE_HALF_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 48), sub_32)); + dag LE_HALF_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 48), sub_32)); + dag LE_HALF_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 48), sub_32)); + dag LE_HALF_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 48), sub_32)); + dag LE_HALF_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 48), sub_32)); + + // Byte extraction + dag LE_BYTE_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 56), sub_32)); + dag LE_BYTE_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 56, 56), sub_32)); + dag LE_BYTE_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 56), sub_32)); + dag LE_BYTE_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 40, 56), sub_32)); + dag LE_BYTE_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 56), sub_32)); + dag LE_BYTE_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 24, 56), sub_32)); + dag LE_BYTE_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 56), sub_32)); + dag LE_BYTE_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 8, 56), sub_32)); + dag LE_BYTE_8 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 56), sub_32)); + dag LE_BYTE_9 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 56, 56), sub_32)); + dag LE_BYTE_10 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 56), sub_32)); + dag LE_BYTE_11 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 40, 56), sub_32)); + dag LE_BYTE_12 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 56), sub_32)); + dag LE_BYTE_13 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 24, 56), sub_32)); + dag LE_BYTE_14 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 56), sub_32)); + dag LE_BYTE_15 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 8, 56), sub_32)); + + /* Variable element number (BE and LE patterns must be specified separately) + This is a rather involved process. + + Conceptually, this is how the move is accomplished: + 1. Identify which doubleword contains the element + 2. Shift in the VMX register so that the correct doubleword is correctly + lined up for the MFVSRD + 3. Perform the move so that the element (along with some extra stuff) + is in the GPR + 4. Right shift within the GPR so that the element is right-justified + + Of course, the index is an element number which has a different meaning + on LE/BE so the patterns have to be specified separately. + + Note: The final result will be the element right-justified with high + order bits being arbitrarily defined (namely, whatever was in the + vector register to the left of the value originally). + */ + + /* LE variable byte + Number 1. above: + - For elements 0-7, we shift left by 8 bytes since they're on the right + - For elements 8-15, we need not shift (shift left by zero bytes) + This is accomplished by inverting the bits of the index and AND-ing + with 0x8 (i.e. clearing all bits of the index and inverting bit 60). + */ + dag LE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDC8 (LI8 8), $Idx))); + + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, LE_VBYTE_PERM_VEC)); + + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + dag LE_MV_VBYTE = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VBYTE_PERMUTE, VSRC)), + sub_64)); + + /* Number 4. above: + - Truncate the element number to the range 0-7 (8-15 are symmetrical + and out of range values are truncated accordingly) + - Multiply by 8 as we need to shift right by the number of bits, not bytes + - Shift right in the GPR by the calculated value + */ + dag LE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 7), $Idx), 3, 60), + sub_32); + dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT), + sub_32); + + /* LE variable halfword + Number 1. above: + - For elements 0-3, we shift left by 8 since they're on the right + - For elements 4-7, we need not shift (shift left by zero bytes) + Similarly to the byte pattern, we invert the bits of the index, but we + AND with 0x4 (i.e. clear all bits of the index and invert bit 61). + Of course, the shift is still by 8 bytes, so we must multiply by 2. + */ + dag LE_VHALF_PERM_VEC = + (v16i8 (LVSL ZERO8, (RLDICR (ANDC8 (LI8 4), $Idx), 1, 62))); + + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, LE_VHALF_PERM_VEC)); + + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + dag LE_MV_VHALF = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VHALF_PERMUTE, VSRC)), + sub_64)); + + /* Number 4. above: + - Truncate the element number to the range 0-3 (4-7 are symmetrical + and out of range values are truncated accordingly) + - Multiply by 16 as we need to shift right by the number of bits + - Shift right in the GPR by the calculated value + */ + dag LE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 3), $Idx), 4, 59), + sub_32); + dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT), + sub_32); + + /* LE variable word + Number 1. above: + - For elements 0-1, we shift left by 8 since they're on the right + - For elements 2-3, we need not shift + */ + dag LE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8, + (RLDICR (ANDC8 (LI8 2), $Idx), 2, 61))); + + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VWORD_PERM_VEC)); + + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + dag LE_MV_VWORD = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VWORD_PERMUTE, VSRC)), + sub_64)); + + /* Number 4. above: + - Truncate the element number to the range 0-1 (2-3 are symmetrical + and out of range values are truncated accordingly) + - Multiply by 32 as we need to shift right by the number of bits + - Shift right in the GPR by the calculated value + */ + dag LE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 1), $Idx), 5, 58), + sub_32); + dag LE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD LE_MV_VWORD, LE_VWORD_SHIFT), + sub_32); + + /* LE variable doubleword + Number 1. above: + - For element 0, we shift left by 8 since it's on the right + - For element 1, we need not shift + */ + dag LE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8, + (RLDICR (ANDC8 (LI8 1), $Idx), 3, 60))); + + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VDWORD_PERM_VEC)); + + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + // - Number 4. is not needed for the doubleword as the value is 64-bits + dag LE_VARIABLE_DWORD = + (MFVSRD (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VDWORD_PERMUTE, VSRC)), + sub_64)); + + /* LE variable float + - Shift the vector to line up the desired element to BE Word 0 + - Convert 32-bit float to a 64-bit single precision float + */ + dag LE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8, + (RLDICR (XOR8 (LI8 3), $Idx), 2, 61))); + dag LE_VFLOAT_PERMUTE = (VPERM $S, $S, LE_VFLOAT_PERM_VEC); + dag LE_VARIABLE_FLOAT = (XSCVSPDPN LE_VFLOAT_PERMUTE); + + /* LE variable double + Same as the LE doubleword except there is no move. + */ + dag LE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)), + (v16i8 (COPY_TO_REGCLASS $S, VRRC)), + LE_VDWORD_PERM_VEC)); + dag LE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS LE_VDOUBLE_PERMUTE, VSRC); + + /* BE variable byte + The algorithm here is the same as the LE variable byte except: + - The shift in the VMX register is by 0/8 for opposite element numbers so + we simply AND the element number with 0x8 + - The order of elements after the move to GPR is reversed, so we invert + the bits of the index prior to truncating to the range 0-7 + */ + dag BE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDIo8 $Idx, 8))); + dag BE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, BE_VBYTE_PERM_VEC)); + dag BE_MV_VBYTE = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)), + sub_64)); + dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60), + sub_32); + dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT), + sub_32); + + /* BE variable halfword + The algorithm here is the same as the LE variable halfword except: + - The shift in the VMX register is by 0/8 for opposite element numbers so + we simply AND the element number with 0x4 and multiply by 2 + - The order of elements after the move to GPR is reversed, so we invert + the bits of the index prior to truncating to the range 0-3 + */ + dag BE_VHALF_PERM_VEC = (v16i8 (LVSL ZERO8, + (RLDICR (ANDIo8 $Idx, 4), 1, 62))); + dag BE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, BE_VHALF_PERM_VEC)); + dag BE_MV_VHALF = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)), + sub_64)); + dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 59), + sub_32); + dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT), + sub_32); + + /* BE variable word + The algorithm is the same as the LE variable word except: + - The shift in the VMX register happens for opposite element numbers + - The order of elements after the move to GPR is reversed, so we invert + the bits of the index prior to truncating to the range 0-1 + */ + dag BE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8, + (RLDICR (ANDIo8 $Idx, 2), 2, 61))); + dag BE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VWORD_PERM_VEC)); + dag BE_MV_VWORD = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VWORD_PERMUTE, VSRC)), + sub_64)); + dag BE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 1), $Idx), 5, 58), + sub_32); + dag BE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD BE_MV_VWORD, BE_VWORD_SHIFT), + sub_32); + + /* BE variable doubleword + Same as the LE doubleword except we shift in the VMX register for opposite + element indices. + */ + dag BE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8, + (RLDICR (ANDIo8 $Idx, 1), 3, 60))); + dag BE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VDWORD_PERM_VEC)); + dag BE_VARIABLE_DWORD = + (MFVSRD (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VDWORD_PERMUTE, VSRC)), + sub_64)); + + /* BE variable float + - Shift the vector to line up the desired element to BE Word 0 + - Convert 32-bit float to a 64-bit single precision float + */ + dag BE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8, (RLDICR $Idx, 2, 61))); + dag BE_VFLOAT_PERMUTE = (VPERM $S, $S, BE_VFLOAT_PERM_VEC); + dag BE_VARIABLE_FLOAT = (XSCVSPDPN BE_VFLOAT_PERMUTE); + + /* BE variable double + Same as the BE doubleword except there is no move. + */ + dag BE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)), + (v16i8 (COPY_TO_REGCLASS $S, VRRC)), + BE_VDWORD_PERM_VEC)); + dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC); +} + +def NoP9Altivec : Predicate<"!PPCSubTarget->hasP9Altivec()">; +let AddedComplexity = 400 in { +// v4f32 scalar <-> vector conversions (BE) +let Predicates = [IsBigEndian, HasP8Vector] in { + def : Pat<(v4f32 (scalar_to_vector f32:$A)), + (v4f32 (XSCVDPSPN $A))>; + def : Pat<(f32 (vector_extract v4f32:$S, 0)), + (f32 (XSCVSPDPN $S))>; + def : Pat<(f32 (vector_extract v4f32:$S, 1)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; + def : Pat<(f32 (vector_extract v4f32:$S, 2)), + (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>; + def : Pat<(f32 (vector_extract v4f32:$S, 3)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; + def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), + (f32 VectorExtractions.BE_VARIABLE_FLOAT)>; +} // IsBigEndian, HasP8Vector + +// Variable index vector_extract for v2f64 does not require P8Vector +let Predicates = [IsBigEndian, HasVSX] in + def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), + (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>; + +let Predicates = [IsBigEndian, HasDirectMove] in { + // v16i8 scalar <-> vector conversions (BE) + def : Pat<(v16i8 (scalar_to_vector i32:$A)), + (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>; + def : Pat<(v8i16 (scalar_to_vector i32:$A)), + (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>; + def : Pat<(v4i32 (scalar_to_vector i32:$A)), + (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>; + def : Pat<(v2i64 (scalar_to_vector i64:$A)), + (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>; + + // v2i64 scalar <-> vector conversions (BE) + def : Pat<(i64 (vector_extract v2i64:$S, 0)), + (i64 VectorExtractions.LE_DWORD_1)>; + def : Pat<(i64 (vector_extract v2i64:$S, 1)), + (i64 VectorExtractions.LE_DWORD_0)>; + def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), + (i64 VectorExtractions.BE_VARIABLE_DWORD)>; +} // IsBigEndian, HasDirectMove + +let Predicates = [IsBigEndian, HasDirectMove, NoP9Altivec] in { + def : Pat<(i32 (vector_extract v16i8:$S, 0)), + (i32 VectorExtractions.LE_BYTE_15)>; + def : Pat<(i32 (vector_extract v16i8:$S, 1)), + (i32 VectorExtractions.LE_BYTE_14)>; + def : Pat<(i32 (vector_extract v16i8:$S, 2)), + (i32 VectorExtractions.LE_BYTE_13)>; + def : Pat<(i32 (vector_extract v16i8:$S, 3)), + (i32 VectorExtractions.LE_BYTE_12)>; + def : Pat<(i32 (vector_extract v16i8:$S, 4)), + (i32 VectorExtractions.LE_BYTE_11)>; + def : Pat<(i32 (vector_extract v16i8:$S, 5)), + (i32 VectorExtractions.LE_BYTE_10)>; + def : Pat<(i32 (vector_extract v16i8:$S, 6)), + (i32 VectorExtractions.LE_BYTE_9)>; + def : Pat<(i32 (vector_extract v16i8:$S, 7)), + (i32 VectorExtractions.LE_BYTE_8)>; + def : Pat<(i32 (vector_extract v16i8:$S, 8)), + (i32 VectorExtractions.LE_BYTE_7)>; + def : Pat<(i32 (vector_extract v16i8:$S, 9)), + (i32 VectorExtractions.LE_BYTE_6)>; + def : Pat<(i32 (vector_extract v16i8:$S, 10)), + (i32 VectorExtractions.LE_BYTE_5)>; + def : Pat<(i32 (vector_extract v16i8:$S, 11)), + (i32 VectorExtractions.LE_BYTE_4)>; + def : Pat<(i32 (vector_extract v16i8:$S, 12)), + (i32 VectorExtractions.LE_BYTE_3)>; + def : Pat<(i32 (vector_extract v16i8:$S, 13)), + (i32 VectorExtractions.LE_BYTE_2)>; + def : Pat<(i32 (vector_extract v16i8:$S, 14)), + (i32 VectorExtractions.LE_BYTE_1)>; + def : Pat<(i32 (vector_extract v16i8:$S, 15)), + (i32 VectorExtractions.LE_BYTE_0)>; + def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), + (i32 VectorExtractions.BE_VARIABLE_BYTE)>; + + // v8i16 scalar <-> vector conversions (BE) + def : Pat<(i32 (vector_extract v8i16:$S, 0)), + (i32 VectorExtractions.LE_HALF_7)>; + def : Pat<(i32 (vector_extract v8i16:$S, 1)), + (i32 VectorExtractions.LE_HALF_6)>; + def : Pat<(i32 (vector_extract v8i16:$S, 2)), + (i32 VectorExtractions.LE_HALF_5)>; + def : Pat<(i32 (vector_extract v8i16:$S, 3)), + (i32 VectorExtractions.LE_HALF_4)>; + def : Pat<(i32 (vector_extract v8i16:$S, 4)), + (i32 VectorExtractions.LE_HALF_3)>; + def : Pat<(i32 (vector_extract v8i16:$S, 5)), + (i32 VectorExtractions.LE_HALF_2)>; + def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 VectorExtractions.LE_HALF_1)>; + def : Pat<(i32 (vector_extract v8i16:$S, 7)), + (i32 VectorExtractions.LE_HALF_0)>; + def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), + (i32 VectorExtractions.BE_VARIABLE_HALF)>; + + // v4i32 scalar <-> vector conversions (BE) + def : Pat<(i32 (vector_extract v4i32:$S, 0)), + (i32 VectorExtractions.LE_WORD_3)>; + def : Pat<(i32 (vector_extract v4i32:$S, 1)), + (i32 VectorExtractions.LE_WORD_2)>; + def : Pat<(i32 (vector_extract v4i32:$S, 2)), + (i32 VectorExtractions.LE_WORD_1)>; + def : Pat<(i32 (vector_extract v4i32:$S, 3)), + (i32 VectorExtractions.LE_WORD_0)>; + def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 VectorExtractions.BE_VARIABLE_WORD)>; +} // IsBigEndian, HasDirectMove, NoP9Altivec + +// v4f32 scalar <-> vector conversions (LE) +let Predicates = [IsLittleEndian, HasP8Vector] in { + def : Pat<(v4f32 (scalar_to_vector f32:$A)), + (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>; + def : Pat<(f32 (vector_extract v4f32:$S, 0)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; + def : Pat<(f32 (vector_extract v4f32:$S, 1)), + (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>; + def : Pat<(f32 (vector_extract v4f32:$S, 2)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; + def : Pat<(f32 (vector_extract v4f32:$S, 3)), + (f32 (XSCVSPDPN $S))>; + def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), + (f32 VectorExtractions.LE_VARIABLE_FLOAT)>; +} // IsLittleEndian, HasP8Vector + +// Variable index vector_extract for v2f64 does not require P8Vector +let Predicates = [IsLittleEndian, HasVSX] in + def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), + (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>; + +def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; +def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; + +// Variable index unsigned vector_extract on Power9 +let Predicates = [HasP9Altivec, IsLittleEndian] in { + def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), + (VEXTUBRX $Idx, $S)>; + + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), + (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), + (VEXTUHRX (LI8 0), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), + (VEXTUHRX (LI8 2), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), + (VEXTUHRX (LI8 4), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), + (VEXTUHRX (LI8 6), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), + (VEXTUHRX (LI8 8), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), + (VEXTUHRX (LI8 10), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), + (VEXTUHRX (LI8 12), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), + (VEXTUHRX (LI8 14), $S)>; + + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), + (VEXTUWRX (LI8 0), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), + (VEXTUWRX (LI8 4), $S)>; + // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (i32 VectorExtractions.LE_WORD_2), sub_32)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), + (VEXTUWRX (LI8 12), $S)>; + + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), + (EXTSW (VEXTUWRX (LI8 0), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), + (EXTSW (VEXTUWRX (LI8 4), $S))>; + // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), + (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (i32 VectorExtractions.LE_WORD_2), sub_32))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), + (EXTSW (VEXTUWRX (LI8 12), $S))>; + + def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUBRX $Idx, $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 0), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 1)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 1), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 2)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 2), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 3), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 4)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 4), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 5)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 5), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 6), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 7)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 7), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 8)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 8), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 9)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 9), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 10)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 10), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 11)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 11), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 12)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 12), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 13)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 13), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 14)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 14), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 15)), + (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 15), $S), sub_32))>; + + def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUHRX + (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 0), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 1)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 2), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 2)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 4), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 6), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 4)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 8), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 5)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 10), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 12), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 14), $S), sub_32))>; + + def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUWRX + (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>; + def : Pat<(i32 (vector_extract v4i32:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 0), $S), sub_32))>; + def : Pat<(i32 (vector_extract v4i32:$S, 1)), + (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 4), $S), sub_32))>; + // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX + def : Pat<(i32 (vector_extract v4i32:$S, 2)), + (i32 VectorExtractions.LE_WORD_2)>; + def : Pat<(i32 (vector_extract v4i32:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 12), $S), sub_32))>; +} + +let Predicates = [HasP9Altivec, IsBigEndian] in { + def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), + (VEXTUBLX $Idx, $S)>; + + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), + (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), + (VEXTUHLX (LI8 0), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), + (VEXTUHLX (LI8 2), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), + (VEXTUHLX (LI8 4), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), + (VEXTUHLX (LI8 6), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), + (VEXTUHLX (LI8 8), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), + (VEXTUHLX (LI8 10), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), + (VEXTUHLX (LI8 12), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), + (VEXTUHLX (LI8 14), $S)>; + + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), + (VEXTUWLX (LI8 0), $S)>; + + // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (i32 VectorExtractions.LE_WORD_2), sub_32)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), + (VEXTUWLX (LI8 8), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), + (VEXTUWLX (LI8 12), $S)>; + + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), + (EXTSW (VEXTUWLX (LI8 0), $S))>; + // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), + (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (i32 VectorExtractions.LE_WORD_2), sub_32))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), + (EXTSW (VEXTUWLX (LI8 8), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), + (EXTSW (VEXTUWLX (LI8 12), $S))>; + + def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUBLX $Idx, $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 0), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 1)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 1), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 2)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 2), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 3), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 4)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 4), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 5)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 5), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 6), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 7)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 7), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 8)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 8), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 9)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 9), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 10)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 10), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 11)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 11), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 12)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 12), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 13)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 13), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 14)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 14), $S), sub_32))>; + def : Pat<(i32 (vector_extract v16i8:$S, 15)), + (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 15), $S), sub_32))>; + + def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUHLX + (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 0), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 1)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 2), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 2)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 4), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 6), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 4)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 8), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 5)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 10), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 12), $S), sub_32))>; + def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 14), $S), sub_32))>; + + def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 (EXTRACT_SUBREG (VEXTUWLX + (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>; + def : Pat<(i32 (vector_extract v4i32:$S, 0)), + (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 0), $S), sub_32))>; + // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX + def : Pat<(i32 (vector_extract v4i32:$S, 1)), + (i32 VectorExtractions.LE_WORD_2)>; + def : Pat<(i32 (vector_extract v4i32:$S, 2)), + (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 8), $S), sub_32))>; + def : Pat<(i32 (vector_extract v4i32:$S, 3)), + (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 12), $S), sub_32))>; +} + +let Predicates = [IsLittleEndian, HasDirectMove] in { + // v16i8 scalar <-> vector conversions (LE) + def : Pat<(v16i8 (scalar_to_vector i32:$A)), + (v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>; + def : Pat<(v8i16 (scalar_to_vector i32:$A)), + (v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>; + def : Pat<(v4i32 (scalar_to_vector i32:$A)), + (v4i32 MovesToVSR.LE_WORD_0)>; + def : Pat<(v2i64 (scalar_to_vector i64:$A)), + (v2i64 MovesToVSR.LE_DWORD_0)>; + // v2i64 scalar <-> vector conversions (LE) + def : Pat<(i64 (vector_extract v2i64:$S, 0)), + (i64 VectorExtractions.LE_DWORD_0)>; + def : Pat<(i64 (vector_extract v2i64:$S, 1)), + (i64 VectorExtractions.LE_DWORD_1)>; + def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), + (i64 VectorExtractions.LE_VARIABLE_DWORD)>; +} // IsLittleEndian, HasDirectMove + +let Predicates = [IsLittleEndian, HasDirectMove, NoP9Altivec] in { + def : Pat<(i32 (vector_extract v16i8:$S, 0)), + (i32 VectorExtractions.LE_BYTE_0)>; + def : Pat<(i32 (vector_extract v16i8:$S, 1)), + (i32 VectorExtractions.LE_BYTE_1)>; + def : Pat<(i32 (vector_extract v16i8:$S, 2)), + (i32 VectorExtractions.LE_BYTE_2)>; + def : Pat<(i32 (vector_extract v16i8:$S, 3)), + (i32 VectorExtractions.LE_BYTE_3)>; + def : Pat<(i32 (vector_extract v16i8:$S, 4)), + (i32 VectorExtractions.LE_BYTE_4)>; + def : Pat<(i32 (vector_extract v16i8:$S, 5)), + (i32 VectorExtractions.LE_BYTE_5)>; + def : Pat<(i32 (vector_extract v16i8:$S, 6)), + (i32 VectorExtractions.LE_BYTE_6)>; + def : Pat<(i32 (vector_extract v16i8:$S, 7)), + (i32 VectorExtractions.LE_BYTE_7)>; + def : Pat<(i32 (vector_extract v16i8:$S, 8)), + (i32 VectorExtractions.LE_BYTE_8)>; + def : Pat<(i32 (vector_extract v16i8:$S, 9)), + (i32 VectorExtractions.LE_BYTE_9)>; + def : Pat<(i32 (vector_extract v16i8:$S, 10)), + (i32 VectorExtractions.LE_BYTE_10)>; + def : Pat<(i32 (vector_extract v16i8:$S, 11)), + (i32 VectorExtractions.LE_BYTE_11)>; + def : Pat<(i32 (vector_extract v16i8:$S, 12)), + (i32 VectorExtractions.LE_BYTE_12)>; + def : Pat<(i32 (vector_extract v16i8:$S, 13)), + (i32 VectorExtractions.LE_BYTE_13)>; + def : Pat<(i32 (vector_extract v16i8:$S, 14)), + (i32 VectorExtractions.LE_BYTE_14)>; + def : Pat<(i32 (vector_extract v16i8:$S, 15)), + (i32 VectorExtractions.LE_BYTE_15)>; + def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), + (i32 VectorExtractions.LE_VARIABLE_BYTE)>; + + // v8i16 scalar <-> vector conversions (LE) + def : Pat<(i32 (vector_extract v8i16:$S, 0)), + (i32 VectorExtractions.LE_HALF_0)>; + def : Pat<(i32 (vector_extract v8i16:$S, 1)), + (i32 VectorExtractions.LE_HALF_1)>; + def : Pat<(i32 (vector_extract v8i16:$S, 2)), + (i32 VectorExtractions.LE_HALF_2)>; + def : Pat<(i32 (vector_extract v8i16:$S, 3)), + (i32 VectorExtractions.LE_HALF_3)>; + def : Pat<(i32 (vector_extract v8i16:$S, 4)), + (i32 VectorExtractions.LE_HALF_4)>; + def : Pat<(i32 (vector_extract v8i16:$S, 5)), + (i32 VectorExtractions.LE_HALF_5)>; + def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 VectorExtractions.LE_HALF_6)>; + def : Pat<(i32 (vector_extract v8i16:$S, 7)), + (i32 VectorExtractions.LE_HALF_7)>; + def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), + (i32 VectorExtractions.LE_VARIABLE_HALF)>; + + // v4i32 scalar <-> vector conversions (LE) + def : Pat<(i32 (vector_extract v4i32:$S, 0)), + (i32 VectorExtractions.LE_WORD_0)>; + def : Pat<(i32 (vector_extract v4i32:$S, 1)), + (i32 VectorExtractions.LE_WORD_1)>; + def : Pat<(i32 (vector_extract v4i32:$S, 2)), + (i32 VectorExtractions.LE_WORD_2)>; + def : Pat<(i32 (vector_extract v4i32:$S, 3)), + (i32 VectorExtractions.LE_WORD_3)>; + def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 VectorExtractions.LE_VARIABLE_WORD)>; +} // IsLittleEndian, HasDirectMove, NoP9Altivec + +let Predicates = [HasDirectMove, HasVSX] in { +// bitconvert f32 -> i32 +// (convert to 32-bit fp single, shift right 1 word, move to GPR) +def : Pat<(i32 (bitconvert f32:$S)), + (i32 (MFVSRWZ (EXTRACT_SUBREG + (XXSLDWI (XSCVDPSPN $S), (XSCVDPSPN $S), 3), + sub_64)))>; +// bitconvert i32 -> f32 +// (move to FPR, shift left 1 word, convert to 64-bit fp single) +def : Pat<(f32 (bitconvert i32:$A)), + (f32 (XSCVSPDPN + (XXSLDWI MovesToVSR.LE_WORD_1, MovesToVSR.LE_WORD_1, 1)))>; + +// bitconvert f64 -> i64 +// (move to GPR, nothing else needed) +def : Pat<(i64 (bitconvert f64:$S)), + (i64 (MFVSRD $S))>; + +// bitconvert i64 -> f64 +// (move to FPR, nothing else needed) +def : Pat<(f64 (bitconvert i64:$S)), + (f64 (MTVSRD $S))>; +} + +// Materialize a zero-vector of long long +def : Pat<(v2i64 immAllZerosV), + (v2i64 (XXLXORz))>; +} + +def AlignValues { + dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3)); + dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC); +} + +// The following VSX instructions were introduced in Power ISA 3.0 +def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">; +let AddedComplexity = 400, Predicates = [HasP9Vector] in { + + // [PO VRT XO VRB XO /] + class X_VT5_XO5_VB5<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc, + list<dag> pattern> + : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vrrc:$vB), + !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>; + + // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /] + class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc, + list<dag> pattern> + : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isDOT; + + // [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less), + // So we use different operand class for VRB + class X_VT5_XO5_VB5_TyVB<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc, + RegisterOperand vbtype, list<dag> pattern> + : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vbtype:$vB), + !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>; + + // [PO VRT XO VRB XO /] + class X_VT5_XO5_VB5_VSFR<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc, + list<dag> pattern> + : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vfrc:$vT), (ins vrrc:$vB), + !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>; + + // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /] + class X_VT5_XO5_VB5_VSFR_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc, + list<dag> pattern> + : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isDOT; + + let UseVSXReg = 1 in { + // [PO T XO B XO BX /] + class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc, + list<dag> pattern> + : XX2_RD5_XO5_RS6<opcode, xo2, xo, (outs g8rc:$rT), (ins vsfrc:$XB), + !strconcat(opc, " $rT, $XB"), IIC_VecFP, pattern>; + + // [PO T XO B XO BX TX] + class XX2_XT6_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc, + RegisterOperand vtype, list<dag> pattern> + : XX2_RD6_XO5_RS6<opcode, xo2, xo, (outs vtype:$XT), (ins vtype:$XB), + !strconcat(opc, " $XT, $XB"), IIC_VecFP, pattern>; + + // [PO T A B XO AX BX TX], src and dest register use different operand class + class XX3_XT5_XA5_XB5<bits<6> opcode, bits<8> xo, string opc, + RegisterOperand xty, RegisterOperand aty, RegisterOperand bty, + InstrItinClass itin, list<dag> pattern> + : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB), + !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>; + } // UseVSXReg = 1 + + // [PO VRT VRA VRB XO /] + class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc, + list<dag> pattern> + : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vA, vrrc:$vB), + !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>; + + // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /] + class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc, + list<dag> pattern> + : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isDOT; + + // [PO VRT VRA VRB XO /] + class X_VT5_VA5_VB5_FMA<bits<6> opcode, bits<10> xo, string opc, + list<dag> pattern> + : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vTi, vrrc:$vA, vrrc:$vB), + !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>, + RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">; + + // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /] + class X_VT5_VA5_VB5_FMA_Ro<bits<6> opcode, bits<10> xo, string opc, + list<dag> pattern> + : X_VT5_VA5_VB5_FMA<opcode, xo, opc, pattern>, isDOT; + + //===--------------------------------------------------------------------===// + // Quad-Precision Scalar Move Instructions: + + // Copy Sign + def XSCPSGNQP : X_VT5_VA5_VB5<63, 100, "xscpsgnqp", + [(set f128:$vT, + (fcopysign f128:$vB, f128:$vA))]>; + + // Absolute/Negative-Absolute/Negate + def XSABSQP : X_VT5_XO5_VB5<63, 0, 804, "xsabsqp", + [(set f128:$vT, (fabs f128:$vB))]>; + def XSNABSQP : X_VT5_XO5_VB5<63, 8, 804, "xsnabsqp", + [(set f128:$vT, (fneg (fabs f128:$vB)))]>; + def XSNEGQP : X_VT5_XO5_VB5<63, 16, 804, "xsnegqp", + [(set f128:$vT, (fneg f128:$vB))]>; + + //===--------------------------------------------------------------------===// + // Quad-Precision Scalar Floating-Point Arithmetic Instructions: + + // Add/Divide/Multiply/Subtract + let isCommutable = 1 in { + def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp", + [(set f128:$vT, (fadd f128:$vA, f128:$vB))]>; + def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo", + [(set f128:$vT, + (int_ppc_addf128_round_to_odd + f128:$vA, f128:$vB))]>; + def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp", + [(set f128:$vT, (fmul f128:$vA, f128:$vB))]>; + def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo", + [(set f128:$vT, + (int_ppc_mulf128_round_to_odd + f128:$vA, f128:$vB))]>; + } + + def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" , + [(set f128:$vT, (fsub f128:$vA, f128:$vB))]>; + def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo", + [(set f128:$vT, + (int_ppc_subf128_round_to_odd + f128:$vA, f128:$vB))]>; + def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp", + [(set f128:$vT, (fdiv f128:$vA, f128:$vB))]>; + def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo", + [(set f128:$vT, + (int_ppc_divf128_round_to_odd + f128:$vA, f128:$vB))]>; + + // Square-Root + def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp", + [(set f128:$vT, (fsqrt f128:$vB))]>; + def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo", + [(set f128:$vT, + (int_ppc_sqrtf128_round_to_odd f128:$vB))]>; + + // (Negative) Multiply-{Add/Subtract} + def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp", + [(set f128:$vT, + (fma f128:$vA, f128:$vB, + f128:$vTi))]>; + + def XSMADDQPO : X_VT5_VA5_VB5_FMA_Ro<63, 388, "xsmaddqpo", + [(set f128:$vT, + (int_ppc_fmaf128_round_to_odd + f128:$vA,f128:$vB,f128:$vTi))]>; + + def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" , + [(set f128:$vT, + (fma f128:$vA, f128:$vB, + (fneg f128:$vTi)))]>; + def XSMSUBQPO : X_VT5_VA5_VB5_FMA_Ro<63, 420, "xsmsubqpo" , + [(set f128:$vT, + (int_ppc_fmaf128_round_to_odd + f128:$vA, f128:$vB, (fneg f128:$vTi)))]>; + def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp", + [(set f128:$vT, + (fneg (fma f128:$vA, f128:$vB, + f128:$vTi)))]>; + def XSNMADDQPO: X_VT5_VA5_VB5_FMA_Ro<63, 452, "xsnmaddqpo", + [(set f128:$vT, + (fneg (int_ppc_fmaf128_round_to_odd + f128:$vA, f128:$vB, f128:$vTi)))]>; + def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp", + [(set f128:$vT, + (fneg (fma f128:$vA, f128:$vB, + (fneg f128:$vTi))))]>; + def XSNMSUBQPO: X_VT5_VA5_VB5_FMA_Ro<63, 484, "xsnmsubqpo", + [(set f128:$vT, + (fneg (int_ppc_fmaf128_round_to_odd + f128:$vA, f128:$vB, (fneg f128:$vTi))))]>; + + // Additional fnmsub patterns: -a*c + b == -(a*c - b) + def : Pat<(fma (fneg f128:$A), f128:$C, f128:$B), (XSNMSUBQP $B, $C, $A)>; + def : Pat<(fma f128:$A, (fneg f128:$C), f128:$B), (XSNMSUBQP $B, $C, $A)>; + + //===--------------------------------------------------------------------===// + // Quad/Double-Precision Compare Instructions: + + // [PO BF // VRA VRB XO /] + class X_BF3_VA5_VB5<bits<6> opcode, bits<10> xo, string opc, + list<dag> pattern> + : XForm_17<opcode, xo, (outs crrc:$crD), (ins vrrc:$VA, vrrc:$VB), + !strconcat(opc, " $crD, $VA, $VB"), IIC_FPCompare> { + let Pattern = pattern; + } + + // QP Compare Ordered/Unordered + def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>; + def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>; + + // DP/QP Compare Exponents + def XSCMPEXPDP : XX3Form_1<60, 59, + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), + "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>, + UseVSXReg; + def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>; + + // DP Compare ==, >=, >, != + // Use vsrc for XT, because the entire register of XT is set. + // XT.dword[1] = 0x0000_0000_0000_0000 + def XSCMPEQDP : XX3_XT5_XA5_XB5<60, 3, "xscmpeqdp", vsrc, vsfrc, vsfrc, + IIC_FPCompare, []>; + def XSCMPGEDP : XX3_XT5_XA5_XB5<60, 19, "xscmpgedp", vsrc, vsfrc, vsfrc, + IIC_FPCompare, []>; + def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc, + IIC_FPCompare, []>; + + //===--------------------------------------------------------------------===// + // Quad-Precision Floating-Point Conversion Instructions: + + // Convert DP -> QP + def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc, + [(set f128:$vT, (fpextend f64:$vB))]>; + + // Round & Convert QP -> DP (dword[1] is set to zero) + def XSCVQPDP : X_VT5_XO5_VB5_VSFR<63, 20, 836, "xscvqpdp" , []>; + def XSCVQPDPO : X_VT5_XO5_VB5_VSFR_Ro<63, 20, 836, "xscvqpdpo", + [(set f64:$vT, + (int_ppc_truncf128_round_to_odd + f128:$vB))]>; + + // Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero) + def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>; + def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz", []>; + def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>; + def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz", []>; + + // Convert (Un)Signed DWord -> QP. + def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vfrc, []>; + def : Pat<(f128 (sint_to_fp i64:$src)), + (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>; + def : Pat<(f128 (sint_to_fp (i64 (PPCmfvsr f64:$src)))), + (f128 (XSCVSDQP $src))>; + def : Pat<(f128 (sint_to_fp (i32 (PPCmfvsr f64:$src)))), + (f128 (XSCVSDQP (VEXTSW2Ds $src)))>; + + def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vfrc, []>; + def : Pat<(f128 (uint_to_fp i64:$src)), + (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>; + def : Pat<(f128 (uint_to_fp (i64 (PPCmfvsr f64:$src)))), + (f128 (XSCVUDQP $src))>; + + // Convert (Un)Signed Word -> QP. + def : Pat<(f128 (sint_to_fp i32:$src)), + (f128 (XSCVSDQP (MTVSRWA $src)))>; + def : Pat<(f128 (sint_to_fp (i32 (load xoaddr:$src)))), + (f128 (XSCVSDQP (LIWAX xoaddr:$src)))>; + def : Pat<(f128 (uint_to_fp i32:$src)), + (f128 (XSCVUDQP (MTVSRWZ $src)))>; + def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))), + (f128 (XSCVUDQP (LIWZX xoaddr:$src)))>; + + let UseVSXReg = 1 in { + //===--------------------------------------------------------------------===// + // Round to Floating-Point Integer Instructions + + // (Round &) Convert DP <-> HP + // Note! xscvdphp's src and dest register both use the left 64 bits, so we use + // vsfrc for src and dest register. xscvhpdp's src only use the left 16 bits, + // but we still use vsfrc for it. + def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>; + def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>; + + // Vector HP -> SP + def XVCVHPSP : XX2_XT6_XO5_XB6<60, 24, 475, "xvcvhpsp", vsrc, []>; + def XVCVSPHP : XX2_XT6_XO5_XB6<60, 25, 475, "xvcvsphp", vsrc, + [(set v4f32:$XT, + (int_ppc_vsx_xvcvsphp v4f32:$XB))]>; + + } // UseVSXReg = 1 + + // Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a + // separate pattern so that it can convert the input register class from + // VRRC(v8i16) to VSRC. + def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)), + (v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>; + + class Z23_VT5_R1_VB5_RMC2_EX1<bits<6> opcode, bits<8> xo, bit ex, string opc, + list<dag> pattern> + : Z23Form_8<opcode, xo, + (outs vrrc:$vT), (ins u1imm:$r, vrrc:$vB, u2imm:$rmc), + !strconcat(opc, " $r, $vT, $vB, $rmc"), IIC_VecFP, pattern> { + let RC = ex; + } + + // Round to Quad-Precision Integer [with Inexact] + def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>; + def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>; + + // Use current rounding mode + def : Pat<(f128 (fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>; + // Round to nearest, ties away from zero + def : Pat<(f128 (fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>; + // Round towards Zero + def : Pat<(f128 (ftrunc f128:$vB)), (f128 (XSRQPI 1, $vB, 1))>; + // Round towards +Inf + def : Pat<(f128 (fceil f128:$vB)), (f128 (XSRQPI 1, $vB, 2))>; + // Round towards -Inf + def : Pat<(f128 (ffloor f128:$vB)), (f128 (XSRQPI 1, $vB, 3))>; + + // Use current rounding mode, [with Inexact] + def : Pat<(f128 (frint f128:$vB)), (f128 (XSRQPIX 0, $vB, 3))>; + + // Round Quad-Precision to Double-Extended Precision (fp80) + def XSRQPXP : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>; + + //===--------------------------------------------------------------------===// + // Insert/Extract Instructions + + // Insert Exponent DP/QP + // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU + def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB), + "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>, UseVSXReg; + // vB NOTE: only vB.dword[0] is used, that's why we don't use + // X_VT5_VA5_VB5 form + def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB), + "xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>; + + // Extract Exponent/Significand DP/QP + def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>; + def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>; + + def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>; + def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>; + + // Vector Insert Word + let UseVSXReg = 1 in { + // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB. + def XXINSERTW : + XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT), + (ins vsrc:$XTi, vsrc:$XB, u4imm:$UIM), + "xxinsertw $XT, $XB, $UIM", IIC_VecFP, + [(set v4i32:$XT, (PPCvecinsert v4i32:$XTi, v4i32:$XB, + imm32SExt16:$UIM))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; + + // Vector Extract Unsigned Word + def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165, + (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM), + "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>; + } // UseVSXReg = 1 + + // Vector Insert Exponent DP/SP + def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc, + IIC_VecFP, [(set v2f64: $XT,(int_ppc_vsx_xviexpdp v2i64:$XA, v2i64:$XB))]>; + def XVIEXPSP : XX3_XT5_XA5_XB5<60, 216, "xviexpsp", vsrc, vsrc, vsrc, + IIC_VecFP, [(set v4f32: $XT,(int_ppc_vsx_xviexpsp v4i32:$XA, v4i32:$XB))]>; + + // Vector Extract Exponent/Significand DP/SP + def XVXEXPDP : XX2_XT6_XO5_XB6<60, 0, 475, "xvxexpdp", vsrc, + [(set v2i64: $XT, + (int_ppc_vsx_xvxexpdp v2f64:$XB))]>; + def XVXEXPSP : XX2_XT6_XO5_XB6<60, 8, 475, "xvxexpsp", vsrc, + [(set v4i32: $XT, + (int_ppc_vsx_xvxexpsp v4f32:$XB))]>; + def XVXSIGDP : XX2_XT6_XO5_XB6<60, 1, 475, "xvxsigdp", vsrc, + [(set v2i64: $XT, + (int_ppc_vsx_xvxsigdp v2f64:$XB))]>; + def XVXSIGSP : XX2_XT6_XO5_XB6<60, 9, 475, "xvxsigsp", vsrc, + [(set v4i32: $XT, + (int_ppc_vsx_xvxsigsp v4f32:$XB))]>; + + let AddedComplexity = 400, Predicates = [HasP9Vector] in { + // Extra patterns expanding to vector Extract Word/Insert Word + def : Pat<(v4i32 (int_ppc_vsx_xxinsertw v4i32:$A, v2i64:$B, imm:$IMM)), + (v4i32 (XXINSERTW $A, $B, imm:$IMM))>; + def : Pat<(v2i64 (int_ppc_vsx_xxextractuw v2i64:$A, imm:$IMM)), + (v2i64 (COPY_TO_REGCLASS (XXEXTRACTUW $A, imm:$IMM), VSRC))>; + } // AddedComplexity = 400, HasP9Vector + + //===--------------------------------------------------------------------===// + + // Test Data Class SP/DP/QP + let UseVSXReg = 1 in { + def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298, + (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), + "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>; + def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362, + (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), + "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>; + } // UseVSXReg = 1 + def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708, + (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB), + "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>; + + // Vector Test Data Class SP/DP + let UseVSXReg = 1 in { + def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5, + (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), + "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP, + [(set v4i32: $XT, + (int_ppc_vsx_xvtstdcsp v4f32:$XB, imm:$DCMX))]>; + def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5, + (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), + "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP, + [(set v2i64: $XT, + (int_ppc_vsx_xvtstdcdp v2f64:$XB, imm:$DCMX))]>; + } // UseVSXReg = 1 + + //===--------------------------------------------------------------------===// + + // Maximum/Minimum Type-C/Type-J DP + // XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU, so we use vsrc for XT + def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsrc, vsfrc, vsfrc, + IIC_VecFP, []>; + def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc, + IIC_VecFP, []>; + def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsrc, vsfrc, vsfrc, + IIC_VecFP, []>; + def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc, + IIC_VecFP, []>; + + //===--------------------------------------------------------------------===// + + // Vector Byte-Reverse H/W/D/Q Word + def XXBRH : XX2_XT6_XO5_XB6<60, 7, 475, "xxbrh", vsrc, []>; + def XXBRW : XX2_XT6_XO5_XB6<60, 15, 475, "xxbrw", vsrc, []>; + def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc, []>; + def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>; + + // Vector Reverse + def : Pat<(v8i16 (PPCxxreverse v8i16 :$A)), + (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; + def : Pat<(v4i32 (PPCxxreverse v4i32 :$A)), + (v4i32 (XXBRW $A))>; + def : Pat<(v2i64 (PPCxxreverse v2i64 :$A)), + (v2i64 (XXBRD $A))>; + def : Pat<(v1i128 (PPCxxreverse v1i128 :$A)), + (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; + + // Vector Permute + def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc, + IIC_VecPerm, []>; + def XXPERMR : XX3_XT5_XA5_XB5<60, 58, "xxpermr", vsrc, vsrc, vsrc, + IIC_VecPerm, []>; + + // Vector Splat Immediate Byte + def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8), + "xxspltib $XT, $IMM8", IIC_VecPerm, []>, UseVSXReg; + + //===--------------------------------------------------------------------===// + // Vector/Scalar Load/Store Instructions + + // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in + // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging. + let mayLoad = 1, mayStore = 0 in { + // Load Vector + def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src), + "lxv $XT, $src", IIC_LdStLFD, []>, UseVSXReg; + // Load DWord + def LXSD : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src), + "lxsd $vD, $src", IIC_LdStLFD, []>; + // Load SP from src, convert it to DP, and place in dword[0] + def LXSSP : DSForm_1<57, 3, (outs vfrc:$vD), (ins memrix:$src), + "lxssp $vD, $src", IIC_LdStLFD, []>; + + // [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different + // "out" and "in" dag + class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, + RegisterOperand vtype, list<dag> pattern> + : XX1Form_memOp<opcode, xo, (outs vtype:$XT), (ins memrr:$src), + !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>, UseVSXReg; + + // Load as Integer Byte/Halfword & Zero Indexed + def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc, + [(set f64:$XT, (PPClxsizx xoaddr:$src, 1))]>; + def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc, + [(set f64:$XT, (PPClxsizx xoaddr:$src, 2))]>; + + // Load Vector Halfword*8/Byte*16 Indexed + def LXVH8X : X_XT6_RA5_RB5<31, 812, "lxvh8x" , vsrc, []>; + def LXVB16X : X_XT6_RA5_RB5<31, 876, "lxvb16x", vsrc, []>; + + // Load Vector Indexed + def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc, + [(set v2f64:$XT, (load xaddr:$src))]>; + // Load Vector (Left-justified) with Length + def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB), + "lxvl $XT, $src, $rB", IIC_LdStLoad, + [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>, + UseVSXReg; + def LXVLL : XX1Form_memOp<31,301, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB), + "lxvll $XT, $src, $rB", IIC_LdStLoad, + [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>, + UseVSXReg; + + // Load Vector Word & Splat Indexed + def LXVWSX : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>; + } // mayLoad + + // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in + // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging. + let mayStore = 1, mayLoad = 0 in { + // Store Vector + def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst), + "stxv $XT, $dst", IIC_LdStSTFD, []>, UseVSXReg; + // Store DWord + def STXSD : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst), + "stxsd $vS, $dst", IIC_LdStSTFD, []>; + // Convert DP of dword[0] to SP, and Store to dst + def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$vS, memrix:$dst), + "stxssp $vS, $dst", IIC_LdStSTFD, []>; + + // [PO S RA RB XO SX] + class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, + RegisterOperand vtype, list<dag> pattern> + : XX1Form_memOp<opcode, xo, (outs), (ins vtype:$XT, memrr:$dst), + !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>, UseVSXReg; + + // Store as Integer Byte/Halfword Indexed + def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc, + [(PPCstxsix f64:$XT, xoaddr:$dst, 1)]>; + def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc, + [(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>; + let isCodeGenOnly = 1 in { + def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vrrc, []>; + def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vrrc, []>; + } + + // Store Vector Halfword*8/Byte*16 Indexed + def STXVH8X : X_XS6_RA5_RB5<31, 940, "stxvh8x" , vsrc, []>; + def STXVB16X : X_XS6_RA5_RB5<31, 1004, "stxvb16x", vsrc, []>; + + // Store Vector Indexed + def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc, + [(store v2f64:$XT, xaddr:$dst)]>; + + // Store Vector (Left-justified) with Length + def STXVL : XX1Form_memOp<31, 397, (outs), + (ins vsrc:$XT, memr:$dst, g8rc:$rB), + "stxvl $XT, $dst, $rB", IIC_LdStLoad, + [(int_ppc_vsx_stxvl v4i32:$XT, addr:$dst, + i64:$rB)]>, + UseVSXReg; + def STXVLL : XX1Form_memOp<31, 429, (outs), + (ins vsrc:$XT, memr:$dst, g8rc:$rB), + "stxvll $XT, $dst, $rB", IIC_LdStLoad, + [(int_ppc_vsx_stxvll v4i32:$XT, addr:$dst, + i64:$rB)]>, + UseVSXReg; + } // mayStore + + let Predicates = [IsLittleEndian] in { + def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>; + def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>; + def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>; + def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>; + def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>; + def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>; + def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>; + def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>; + } + + let Predicates = [IsBigEndian] in { + def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>; + def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>; + def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>; + def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), + (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>; + def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>; + def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>; + def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>; + def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), + (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>; + } + + // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead + // of f64 + def : Pat<(v8i16 (PPCmtvsrz i32:$A)), + (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>; + def : Pat<(v16i8 (PPCmtvsrz i32:$A)), + (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>; + + // Patterns for which instructions from ISA 3.0 are a better match + let Predicates = [IsLittleEndian, HasP9Vector] in { + def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>; + def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>; + def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>; + def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; + def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>; + def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>; + def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>; + def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; + } // IsLittleEndian, HasP9Vector + + let Predicates = [IsBigEndian, HasP9Vector] in { + def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; + def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>; + def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>; + def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>; + def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>; + def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>; + def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>; + def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), + (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; + } // IsLittleEndian, HasP9Vector + + // D-Form Load/Store + def : Pat<(v4i32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v4f32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2i64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(f128 (quadwOffsetLoad iqaddr:$src)), + (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iqaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iqaddr:$src)), (LXV memrix16:$src)>; + + def : Pat<(quadwOffsetStore v4f32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(quadwOffsetStore v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(quadwOffsetStore v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(quadwOffsetStore f128:$rS, iqaddr:$dst), + (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>; + def : Pat<(quadwOffsetStore v2i64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iqaddr:$dst), + (STXV $rS, memrix16:$dst)>; + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iqaddr:$dst), + (STXV $rS, memrix16:$dst)>; + + + def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(f128 (nonQuadwOffsetLoad xoaddr:$src)), + (COPY_TO_REGCLASS (LXVX xoaddr:$src), VRRC)>; + def : Pat<(nonQuadwOffsetStore f128:$rS, xoaddr:$dst), + (STXVX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; + def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + + let AddedComplexity = 400 in { + // LIWAX - This instruction is used for sign extending i32 -> i64. + // LIWZX - This instruction will be emitted for i32, f32, and when + // zero-extending i32 to i64 (zext i32 -> i64). + let Predicates = [IsLittleEndian] in { + + def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC), 2))>; + + def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; + + def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), + (v4i32 (XXPERMDIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; + + def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), + (v4f32 (XXPERMDIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; + } + + let Predicates = [IsBigEndian] in { + def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>; + + def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>; + + def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), + (v4i32 (XXSLDWIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; + + def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), + (v4f32 (XXSLDWIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; + } + + } + + // Build vectors from i8 loads + def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)), + (v16i8 (VSPLTBs 7, (LXSIBZX xoaddr:$src)))>; + def : Pat<(v8i16 (scalar_to_vector ScalarLoads.ZELi8)), + (v8i16 (VSPLTHs 3, (LXSIBZX xoaddr:$src)))>; + def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)), + (v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>; + def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi8i64)), + (v2i64 (XXPERMDIs (LXSIBZX xoaddr:$src), 0))>; + def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi8)), + (v4i32 (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1))>; + def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi8i64)), + (v2i64 (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0))>; + + // Build vectors from i16 loads + def : Pat<(v8i16 (scalar_to_vector ScalarLoads.Li16)), + (v8i16 (VSPLTHs 3, (LXSIHZX xoaddr:$src)))>; + def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi16)), + (v4i32 (XXSPLTWs (LXSIHZX xoaddr:$src), 1))>; + def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi16i64)), + (v2i64 (XXPERMDIs (LXSIHZX xoaddr:$src), 0))>; + def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi16)), + (v4i32 (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1))>; + def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)), + (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>; + + let Predicates = [IsBigEndian, HasP9Vector] in { + // Scalar stores of i8 + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 9)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 11)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 13)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 15)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), + (STXSIBXv $S, xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 1)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 3)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 5)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 7)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>; + + // Scalar stores of i16 + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), + (STXSIHXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), + (STXSIHXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), + (STXSIHXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), + (STXSIHXv $S, xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), + (STXSIHXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), + (STXSIHXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), + (STXSIHXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), + (STXSIHXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>; + } // IsBigEndian, HasP9Vector + + let Predicates = [IsLittleEndian, HasP9Vector] in { + // Scalar stores of i8 + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 7)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 5)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 3)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 1)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), + (STXSIBXv $S, xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 15)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 13)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 11)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>; + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), + (STXSIBXv (v16i8 (VSLDOI $S, $S, 9)), xoaddr:$dst)>; + + // Scalar stores of i16 + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), + (STXSIHXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), + (STXSIHXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), + (STXSIHXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), + (STXSIHXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), + (STXSIHXv $S, xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), + (STXSIHXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), + (STXSIHXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), + (STXSIHXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>; + } // IsLittleEndian, HasP9Vector + + + // Vector sign extensions + def : Pat<(f64 (PPCVexts f64:$A, 1)), + (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>; + def : Pat<(f64 (PPCVexts f64:$A, 2)), + (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>; + + let isPseudo = 1 in { + def DFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrix:$src), + "#DFLOADf32", + [(set f32:$XT, (load ixaddr:$src))]>; + def DFLOADf64 : Pseudo<(outs vsfrc:$XT), (ins memrix:$src), + "#DFLOADf64", + [(set f64:$XT, (load ixaddr:$src))]>; + def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst), + "#DFSTOREf32", + [(store f32:$XT, ixaddr:$dst)]>; + def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst), + "#DFSTOREf64", + [(store f64:$XT, ixaddr:$dst)]>; + } + def : Pat<(f64 (extloadf32 ixaddr:$src)), + (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>; + def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))), + (f32 (DFLOADf32 ixaddr:$src))>; + + + let AddedComplexity = 400 in { + // The following pseudoinstructions are used to ensure the utilization + // of all 64 VSX registers. + let Predicates = [IsLittleEndian, HasP9Vector] in { + def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>; + def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>; + + def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))), + (v2f64 (XXPERMDIs + (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>; + def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))), + (v2f64 (XXPERMDIs + (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>; + } + + let Predicates = [IsBigEndian, HasP9Vector] in { + def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>; + def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>; + + def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))), + (v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>; + def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))), + (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>; + } + } + + let Predicates = [IsBigEndian, HasP9Vector] in { + + // (Un)Signed DWord vector extract -> QP + def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))), + (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>; + def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))), + (f128 (XSCVSDQP + (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; + def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))), + (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>; + def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))), + (f128 (XSCVUDQP + (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; + + // (Un)Signed Word vector extract -> QP + def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 1)))), + (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>; + foreach Idx = [0,2,3] in { + def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, Idx)))), + (f128 (XSCVSDQP (EXTRACT_SUBREG + (VEXTSW2D (VSPLTW Idx, $src)), sub_64)))>; + } + foreach Idx = 0-3 in { + def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, Idx)))), + (f128 (XSCVUDQP (XXEXTRACTUW $src, !shl(Idx, 2))))>; + } + + // (Un)Signed HWord vector extract -> QP + foreach Idx = 0-7 in { + def : Pat<(f128 (sint_to_fp + (i32 (sext_inreg + (vector_extract v8i16:$src, Idx), i16)))), + (f128 (XSCVSDQP (EXTRACT_SUBREG + (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)), + sub_64)))>; + // The SDAG adds the `and` since an `i16` is being extracted as an `i32`. + def : Pat<(f128 (uint_to_fp + (and (i32 (vector_extract v8i16:$src, Idx)), 65535))), + (f128 (XSCVUDQP (EXTRACT_SUBREG + (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>; + } + + // (Un)Signed Byte vector extract -> QP + foreach Idx = 0-15 in { + def : Pat<(f128 (sint_to_fp + (i32 (sext_inreg (vector_extract v16i8:$src, Idx), + i8)))), + (f128 (XSCVSDQP (EXTRACT_SUBREG + (VEXTSB2D (VEXTRACTUB Idx, $src)), sub_64)))>; + def : Pat<(f128 (uint_to_fp + (and (i32 (vector_extract v16i8:$src, Idx)), 255))), + (f128 (XSCVUDQP + (EXTRACT_SUBREG (VEXTRACTUB Idx, $src), sub_64)))>; + } + + // Unsiged int in vsx register -> QP + def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))), + (f128 (XSCVUDQP + (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 4)))>; + } // IsBigEndian, HasP9Vector + + let Predicates = [IsLittleEndian, HasP9Vector] in { + + // (Un)Signed DWord vector extract -> QP + def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))), + (f128 (XSCVSDQP + (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; + def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))), + (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>; + def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))), + (f128 (XSCVUDQP + (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; + def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))), + (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>; + + // (Un)Signed Word vector extract -> QP + foreach Idx = [[0,3],[1,2],[3,0]] in { + def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))), + (f128 (XSCVSDQP (EXTRACT_SUBREG + (VEXTSW2D (VSPLTW !head(!tail(Idx)), $src)), + sub_64)))>; + } + def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 2)))), + (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>; + + foreach Idx = [[0,12],[1,8],[2,4],[3,0]] in { + def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))), + (f128 (XSCVUDQP (XXEXTRACTUW $src, !head(!tail(Idx)))))>; + } + + // (Un)Signed HWord vector extract -> QP + // The Nested foreach lists identifies the vector element and corresponding + // register byte location. + foreach Idx = [[0,14],[1,12],[2,10],[3,8],[4,6],[5,4],[6,2],[7,0]] in { + def : Pat<(f128 (sint_to_fp + (i32 (sext_inreg + (vector_extract v8i16:$src, !head(Idx)), i16)))), + (f128 (XSCVSDQP + (EXTRACT_SUBREG (VEXTSH2D + (VEXTRACTUH !head(!tail(Idx)), $src)), + sub_64)))>; + def : Pat<(f128 (uint_to_fp + (and (i32 (vector_extract v8i16:$src, !head(Idx))), + 65535))), + (f128 (XSCVUDQP (EXTRACT_SUBREG + (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>; + } + + // (Un)Signed Byte vector extract -> QP + foreach Idx = [[0,15],[1,14],[2,13],[3,12],[4,11],[5,10],[6,9],[7,8],[8,7], + [9,6],[10,5],[11,4],[12,3],[13,2],[14,1],[15,0]] in { + def : Pat<(f128 (sint_to_fp + (i32 (sext_inreg + (vector_extract v16i8:$src, !head(Idx)), i8)))), + (f128 (XSCVSDQP + (EXTRACT_SUBREG + (VEXTSB2D (VEXTRACTUB !head(!tail(Idx)), $src)), + sub_64)))>; + def : Pat<(f128 (uint_to_fp + (and (i32 (vector_extract v16i8:$src, !head(Idx))), + 255))), + (f128 (XSCVUDQP + (EXTRACT_SUBREG + (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>; + } + + // Unsiged int in vsx register -> QP + def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))), + (f128 (XSCVUDQP + (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 8)))>; + } // IsLittleEndian, HasP9Vector + + // Convert (Un)Signed DWord in memory -> QP + def : Pat<(f128 (sint_to_fp (i64 (load xaddr:$src)))), + (f128 (XSCVSDQP (LXSDX xaddr:$src)))>; + def : Pat<(f128 (sint_to_fp (i64 (load ixaddr:$src)))), + (f128 (XSCVSDQP (LXSD ixaddr:$src)))>; + def : Pat<(f128 (uint_to_fp (i64 (load xaddr:$src)))), + (f128 (XSCVUDQP (LXSDX xaddr:$src)))>; + def : Pat<(f128 (uint_to_fp (i64 (load ixaddr:$src)))), + (f128 (XSCVUDQP (LXSD ixaddr:$src)))>; + + // Convert Unsigned HWord in memory -> QP + def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)), + (f128 (XSCVUDQP (LXSIHZX xaddr:$src)))>; + + // Convert Unsigned Byte in memory -> QP + def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)), + (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>; + + // Truncate & Convert QP -> (Un)Signed (D)Word. + def : Pat<(i64 (fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>; + def : Pat<(i64 (fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>; + def : Pat<(i32 (fp_to_sint f128:$src)), + (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>; + def : Pat<(i32 (fp_to_uint f128:$src)), + (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>; + + // Instructions for store(fptosi). + // The 8-byte version is repeated here due to availability of D-Form STXSD. + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddr:$dst, 8), + (STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), + xaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), ixaddr:$dst, 8), + (STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), + ixaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4), + (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 2), + (STXSIHX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1), + (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddr:$dst, 8), + (STXSDX (XSCVDPSXDS f64:$src), xaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ixaddr:$dst, 8), + (STXSD (XSCVDPSXDS f64:$src), ixaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2), + (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 1), + (STXSIBX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; + + // Instructions for store(fptoui). + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddr:$dst, 8), + (STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), + xaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), ixaddr:$dst, 8), + (STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), + ixaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4), + (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 2), + (STXSIHX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1), + (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddr:$dst, 8), + (STXSDX (XSCVDPUXDS f64:$src), xaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ixaddr:$dst, 8), + (STXSD (XSCVDPUXDS f64:$src), ixaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2), + (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; + def : Pat<(PPCstore_scal_int_from_vsr + (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 1), + (STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; + + // Round & Convert QP -> DP/SP + def : Pat<(f64 (fpround f128:$src)), (f64 (XSCVQPDP $src))>; + def : Pat<(f32 (fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>; + + // Convert SP -> QP + def : Pat<(f128 (fpextend f32:$src)), + (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>; + +} // end HasP9Vector, AddedComplexity + +let AddedComplexity = 400 in { + let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsBigEndian] in { + def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)), + (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>; + } + let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsLittleEndian] in { + def : Pat<(f128 (PPCbuild_fp128 i64:$rA, i64:$rB)), + (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>; + } +} + +let Predicates = [HasP9Vector] in { + let isPseudo = 1 in { + let mayStore = 1 in { + def SPILLTOVSR_STX : PseudoXFormMemOp<(outs), + (ins spilltovsrrc:$XT, memrr:$dst), + "#SPILLTOVSR_STX", []>; + def SPILLTOVSR_ST : Pseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst), + "#SPILLTOVSR_ST", []>; + } + let mayLoad = 1 in { + def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT), + (ins memrr:$src), + "#SPILLTOVSR_LDX", []>; + def SPILLTOVSR_LD : Pseudo<(outs spilltovsrrc:$XT), (ins memrix:$src), + "#SPILLTOVSR_LD", []>; + + } + } +} +// Integer extend helper dags 32 -> 64 +def AnyExts { + dag A = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32); + dag B = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $B, sub_32); + dag C = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $C, sub_32); + dag D = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $D, sub_32); +} + +def DblToFlt { + dag A0 = (f32 (fpround (f64 (extractelt v2f64:$A, 0)))); + dag A1 = (f32 (fpround (f64 (extractelt v2f64:$A, 1)))); + dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0)))); + dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1)))); +} + +def ExtDbl { + dag A0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 0)))))); + dag A1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 1)))))); + dag B0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 0)))))); + dag B1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 1)))))); + dag A0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 0)))))); + dag A1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 1)))))); + dag B0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 0)))))); + dag B1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 1)))))); +} + +def ByteToWord { + dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8)); + dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8)); + dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8)); + dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8)); + dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 3)), i8)); + dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 7)), i8)); + dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 11)), i8)); + dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 15)), i8)); +} + +def ByteToDWord { + dag LE_A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8)); + dag LE_A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8)); + dag BE_A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 7)))), i8)); + dag BE_A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 15)))), i8)); +} + +def HWordToWord { + dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16)); + dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16)); + dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16)); + dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16)); + dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 1)), i16)); + dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 3)), i16)); + dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 5)), i16)); + dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 7)), i16)); +} + +def HWordToDWord { + dag LE_A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16)); + dag LE_A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16)); + dag BE_A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 3)))), i16)); + dag BE_A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 7)))), i16)); +} + +def WordToDWord { + dag LE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0)))); + dag LE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2)))); + dag BE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 1)))); + dag BE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 3)))); +} + +def FltToIntLoad { + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A))))); +} +def FltToUIntLoad { + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (extloadf32 xoaddr:$A))))); +} +def FltToLongLoad { + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A))))); +} +def FltToLongLoadP9 { + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 ixaddr:$A))))); +} +def FltToULongLoad { + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A))))); +} +def FltToULongLoadP9 { + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 ixaddr:$A))))); +} +def FltToLong { + dag A = (i64 (PPCmfvsr (f64 (PPCfctidz (fpextend f32:$A))))); +} +def FltToULong { + dag A = (i64 (PPCmfvsr (f64 (PPCfctiduz (fpextend f32:$A))))); +} +def DblToInt { + dag A = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$A)))); + dag B = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$B)))); + dag C = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$C)))); + dag D = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$D)))); +} +def DblToUInt { + dag A = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$A)))); + dag B = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$B)))); + dag C = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$C)))); + dag D = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$D)))); +} +def DblToLong { + dag A = (i64 (PPCmfvsr (f64 (PPCfctidz f64:$A)))); +} +def DblToULong { + dag A = (i64 (PPCmfvsr (f64 (PPCfctiduz f64:$A)))); +} +def DblToIntLoad { + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A))))); +} +def DblToIntLoadP9 { + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load ixaddr:$A))))); +} +def DblToUIntLoad { + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A))))); +} +def DblToUIntLoadP9 { + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load ixaddr:$A))))); +} +def DblToLongLoad { + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A))))); +} +def DblToULongLoad { + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (load xoaddr:$A))))); +} + +// FP merge dags (for f32 -> v4f32) +def MrgFP { + dag AC = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $A, VSRC), + (COPY_TO_REGCLASS $C, VSRC), 0)); + dag BD = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $B, VSRC), + (COPY_TO_REGCLASS $D, VSRC), 0)); + dag ABhToFlt = (XVCVDPSP (XXPERMDI $A, $B, 0)); + dag ABlToFlt = (XVCVDPSP (XXPERMDI $A, $B, 3)); + dag BAhToFlt = (XVCVDPSP (XXPERMDI $B, $A, 0)); + dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3)); +} + +// Word-element merge dags - conversions from f64 to i32 merged into vectors. +def MrgWords { + // For big endian, we merge low and hi doublewords (A, B). + dag A0B0 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 0)); + dag A1B1 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 3)); + dag CVA1B1S = (v4i32 (XVCVDPSXWS A1B1)); + dag CVA0B0S = (v4i32 (XVCVDPSXWS A0B0)); + dag CVA1B1U = (v4i32 (XVCVDPUXWS A1B1)); + dag CVA0B0U = (v4i32 (XVCVDPUXWS A0B0)); + + // For little endian, we merge low and hi doublewords (B, A). + dag B1A1 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 0)); + dag B0A0 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 3)); + dag CVB1A1S = (v4i32 (XVCVDPSXWS B1A1)); + dag CVB0A0S = (v4i32 (XVCVDPSXWS B0A0)); + dag CVB1A1U = (v4i32 (XVCVDPUXWS B1A1)); + dag CVB0A0U = (v4i32 (XVCVDPUXWS B0A0)); + + // For big endian, we merge hi doublewords of (A, C) and (B, D), convert + // then merge. + dag AC = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$A, VSRC), + (COPY_TO_REGCLASS f64:$C, VSRC), 0)); + dag BD = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$B, VSRC), + (COPY_TO_REGCLASS f64:$D, VSRC), 0)); + dag CVACS = (v4i32 (XVCVDPSXWS AC)); + dag CVBDS = (v4i32 (XVCVDPSXWS BD)); + dag CVACU = (v4i32 (XVCVDPUXWS AC)); + dag CVBDU = (v4i32 (XVCVDPUXWS BD)); + + // For little endian, we merge hi doublewords of (D, B) and (C, A), convert + // then merge. + dag DB = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$D, VSRC), + (COPY_TO_REGCLASS f64:$B, VSRC), 0)); + dag CA = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$C, VSRC), + (COPY_TO_REGCLASS f64:$A, VSRC), 0)); + dag CVDBS = (v4i32 (XVCVDPSXWS DB)); + dag CVCAS = (v4i32 (XVCVDPSXWS CA)); + dag CVDBU = (v4i32 (XVCVDPUXWS DB)); + dag CVCAU = (v4i32 (XVCVDPUXWS CA)); +} + +// Patterns for BUILD_VECTOR nodes. +let AddedComplexity = 400 in { + + let Predicates = [HasVSX] in { + // Build vectors of floating point converted to i32. + def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A, + DblToInt.A, DblToInt.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS $A), VSRC), 1))>; + def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A, + DblToUInt.A, DblToUInt.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS $A), VSRC), 1))>; + def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)), + (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), + (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 0))>; + def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)), + (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), + (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>; + def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; + def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; + def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), + (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; + + // Build vectors of floating point converted to i64. + def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>; + def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>; + def : Pat<(v2i64 (scalar_to_vector DblToLongLoad.A)), + (v2i64 (XVCVDPSXDS (LXVDSX xoaddr:$A)))>; + def : Pat<(v2i64 (scalar_to_vector DblToULongLoad.A)), + (v2i64 (XVCVDPUXDS (LXVDSX xoaddr:$A)))>; + } + + let Predicates = [HasVSX, NoP9Vector] in { + // Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads). + def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>; + def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>; + def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)), + (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS + (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; + def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)), + (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS + (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; + } + + // Big endian, available on all targets with VSX + let Predicates = [IsBigEndian, HasVSX] in { + def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), + (v2f64 (XXPERMDI + (COPY_TO_REGCLASS $A, VSRC), + (COPY_TO_REGCLASS $B, VSRC), 0))>; + + def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)), + (VMRGEW MrgFP.AC, MrgFP.BD)>; + def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1, + DblToFlt.B0, DblToFlt.B1)), + (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>; + + // Convert 4 doubles to a vector of ints. + def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B, + DblToInt.C, DblToInt.D)), + (v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>; + def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B, + DblToUInt.C, DblToUInt.D)), + (v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>; + def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S, + ExtDbl.B0S, ExtDbl.B1S)), + (v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>; + def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U, + ExtDbl.B0U, ExtDbl.B1U)), + (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>; + } + + let Predicates = [IsLittleEndian, HasVSX] in { + // Little endian, available on all targets with VSX + def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), + (v2f64 (XXPERMDI + (COPY_TO_REGCLASS $B, VSRC), + (COPY_TO_REGCLASS $A, VSRC), 0))>; + + def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)), + (VMRGEW MrgFP.AC, MrgFP.BD)>; + def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1, + DblToFlt.B0, DblToFlt.B1)), + (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>; + + // Convert 4 doubles to a vector of ints. + def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B, + DblToInt.C, DblToInt.D)), + (v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>; + def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B, + DblToUInt.C, DblToUInt.D)), + (v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>; + def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S, + ExtDbl.B0S, ExtDbl.B1S)), + (v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>; + def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U, + ExtDbl.B0U, ExtDbl.B1U)), + (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>; + } + + let Predicates = [HasDirectMove] in { + // Endianness-neutral constant splat on P8 and newer targets. The reason + // for this pattern is that on targets with direct moves, we don't expand + // BUILD_VECTOR nodes for v4i32. + def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A, + immSExt5NonZero:$A, immSExt5NonZero:$A)), + (v4i32 (VSPLTISW imm:$A))>; + } + + let Predicates = [IsBigEndian, HasDirectMove, NoP9Vector] in { + // Big endian integer vectors using direct moves. + def : Pat<(v2i64 (build_vector i64:$A, i64:$B)), + (v2i64 (XXPERMDI + (COPY_TO_REGCLASS (MTVSRD $A), VSRC), + (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>; + def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), + (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), + (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), 0), + (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), + (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), 0))>; + def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), + (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; + } + + let Predicates = [IsLittleEndian, HasDirectMove, NoP9Vector] in { + // Little endian integer vectors using direct moves. + def : Pat<(v2i64 (build_vector i64:$A, i64:$B)), + (v2i64 (XXPERMDI + (COPY_TO_REGCLASS (MTVSRD $B), VSRC), + (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>; + def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), + (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), + (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), 0), + (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), + (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 0))>; + def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), + (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; + } + + let Predicates = [HasP9Vector] in { + // Endianness-neutral patterns for const splats with ISA 3.0 instructions. + def : Pat<(v4i32 (scalar_to_vector i32:$A)), + (v4i32 (MTVSRWS $A))>; + def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), + (v4i32 (MTVSRWS $A))>; + def : Pat<(v16i8 (build_vector immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, + immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, + immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, + immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, + immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, + immAnyExt8:$A)), + (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>; + def : Pat<(v16i8 immAllOnesV), + (v16i8 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>; + def : Pat<(v8i16 immAllOnesV), + (v8i16 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>; + def : Pat<(v4i32 immAllOnesV), + (v4i32 (XXSPLTIB 255))>; + def : Pat<(v2i64 immAllOnesV), + (v2i64 (XXSPLTIB 255))>; + def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), + (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>; + def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), + (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>; + def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPSXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>; + def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)), + (v4i32 (XXSPLTW (COPY_TO_REGCLASS + (XSCVDPUXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>; + def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)), + (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS + (DFLOADf32 ixaddr:$A), + VSFRC)), 0))>; + def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)), + (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS + (DFLOADf32 ixaddr:$A), + VSFRC)), 0))>; + } + + let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in { + def : Pat<(i64 (extractelt v2i64:$A, 1)), + (i64 (MFVSRLD $A))>; + // Better way to build integer vectors if we have MTVSRDD. Big endian. + def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)), + (v2i64 (MTVSRDD $rB, $rA))>; + def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), + (VMRGOW + (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.A, AnyExts.C), VSRC)), + (v4i32 + (COPY_TO_REGCLASS (MTVSRDD AnyExts.B, AnyExts.D), VSRC)))>; + } + + let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in { + def : Pat<(i64 (extractelt v2i64:$A, 0)), + (i64 (MFVSRLD $A))>; + // Better way to build integer vectors if we have MTVSRDD. Little endian. + def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)), + (v2i64 (MTVSRDD $rB, $rA))>; + def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), + (VMRGOW + (v4i32 (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC)), + (v4i32 + (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC)))>; + } + // P9 Altivec instructions that can be used to build vectors. + // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete + // with complexities of existing build vector patterns in this file. + let Predicates = [HasP9Altivec, IsLittleEndian] in { + def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)), + (v2i64 (VEXTSW2D $A))>; + def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)), + (v2i64 (VEXTSH2D $A))>; + def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1, + HWordToWord.LE_A2, HWordToWord.LE_A3)), + (v4i32 (VEXTSH2W $A))>; + def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1, + ByteToWord.LE_A2, ByteToWord.LE_A3)), + (v4i32 (VEXTSB2W $A))>; + def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)), + (v2i64 (VEXTSB2D $A))>; + } + + let Predicates = [HasP9Altivec, IsBigEndian] in { + def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)), + (v2i64 (VEXTSW2D $A))>; + def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)), + (v2i64 (VEXTSH2D $A))>; + def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1, + HWordToWord.BE_A2, HWordToWord.BE_A3)), + (v4i32 (VEXTSH2W $A))>; + def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1, + ByteToWord.BE_A2, ByteToWord.BE_A3)), + (v4i32 (VEXTSB2W $A))>; + def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)), + (v2i64 (VEXTSB2D $A))>; + } + + let Predicates = [HasP9Altivec] in { + def: Pat<(v2i64 (PPCSExtVElems v16i8:$A)), + (v2i64 (VEXTSB2D $A))>; + def: Pat<(v2i64 (PPCSExtVElems v8i16:$A)), + (v2i64 (VEXTSH2D $A))>; + def: Pat<(v2i64 (PPCSExtVElems v4i32:$A)), + (v2i64 (VEXTSW2D $A))>; + def: Pat<(v4i32 (PPCSExtVElems v16i8:$A)), + (v4i32 (VEXTSB2W $A))>; + def: Pat<(v4i32 (PPCSExtVElems v8i16:$A)), + (v4i32 (VEXTSH2W $A))>; + } +} + diff --git a/capstone/suite/synctools/tablegen/PPC/PPCRegisterInfo.td b/capstone/suite/synctools/tablegen/PPC/PPCRegisterInfo.td new file mode 100644 index 000000000..e88590f6b --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCRegisterInfo.td @@ -0,0 +1,386 @@ +//===-- PPCRegisterInfo.td - The PowerPC Register File -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +let Namespace = "PPC" in { +def sub_lt : SubRegIndex<1>; +def sub_gt : SubRegIndex<1, 1>; +def sub_eq : SubRegIndex<1, 2>; +def sub_un : SubRegIndex<1, 3>; +def sub_32 : SubRegIndex<32>; +def sub_64 : SubRegIndex<64>; +} + + +class PPCReg<string n> : Register<n> { + let Namespace = "PPC"; +} + +// We identify all our registers with a 5-bit ID, for consistency's sake. + +// GPR - One of the 32 32-bit general-purpose registers +class GPR<bits<5> num, string n> : PPCReg<n> { + let HWEncoding{4-0} = num; +} + +// GP8 - One of the 32 64-bit general-purpose registers +class GP8<GPR SubReg, string n> : PPCReg<n> { + let HWEncoding = SubReg.HWEncoding; + let SubRegs = [SubReg]; + let SubRegIndices = [sub_32]; +} + +// SPE - One of the 32 64-bit general-purpose registers (SPE) +class SPE<GPR SubReg, string n> : PPCReg<n> { + let HWEncoding = SubReg.HWEncoding; + let SubRegs = [SubReg]; + let SubRegIndices = [sub_32]; +} + +// SPR - One of the 32-bit special-purpose registers +class SPR<bits<10> num, string n> : PPCReg<n> { + let HWEncoding{9-0} = num; +} + +// FPR - One of the 32 64-bit floating-point registers +class FPR<bits<5> num, string n> : PPCReg<n> { + let HWEncoding{4-0} = num; +} + +// QFPR - One of the 32 256-bit floating-point vector registers (used for QPX) +class QFPR<FPR SubReg, string n> : PPCReg<n> { + let HWEncoding = SubReg.HWEncoding; + let SubRegs = [SubReg]; + let SubRegIndices = [sub_64]; +} + +// VF - One of the 32 64-bit floating-point subregisters of the vector +// registers (used by VSX). +class VF<bits<5> num, string n> : PPCReg<n> { + let HWEncoding{4-0} = num; + let HWEncoding{5} = 1; +} + +// VR - One of the 32 128-bit vector registers +class VR<VF SubReg, string n> : PPCReg<n> { + let HWEncoding{4-0} = SubReg.HWEncoding{4-0}; + let HWEncoding{5} = 0; + let SubRegs = [SubReg]; + let SubRegIndices = [sub_64]; +} + +// VSRL - One of the 32 128-bit VSX registers that overlap with the scalar +// floating-point registers. +class VSRL<FPR SubReg, string n> : PPCReg<n> { + let HWEncoding = SubReg.HWEncoding; + let SubRegs = [SubReg]; + let SubRegIndices = [sub_64]; +} + +// CR - One of the 8 4-bit condition registers +class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> { + let HWEncoding{2-0} = num; + let SubRegs = subregs; +} + +// CRBIT - One of the 32 1-bit condition register fields +class CRBIT<bits<5> num, string n> : PPCReg<n> { + let HWEncoding{4-0} = num; +} + +// General-purpose registers +foreach Index = 0-31 in { + def R#Index : GPR<Index, "r"#Index>, DwarfRegNum<[-2, Index]>; +} + +// 64-bit General-purpose registers +foreach Index = 0-31 in { + def X#Index : GP8<!cast<GPR>("R"#Index), "r"#Index>, + DwarfRegNum<[Index, -2]>; +} + +// SPE registers +foreach Index = 0-31 in { + def S#Index : SPE<!cast<GPR>("R"#Index), "r"#Index>, + DwarfRegNum<[!add(Index, 1200), !add(Index, 1200)]>; +} + +// Floating-point registers +foreach Index = 0-31 in { + def F#Index : FPR<Index, "f"#Index>, + DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>; +} + +// 64-bit Floating-point subregisters of Altivec registers +// Note: the register names are v0-v31 or vs32-vs63 depending on the use. +// Custom C++ code is used to produce the correct name and encoding. +foreach Index = 0-31 in { + def VF#Index : VF<Index, "v" #Index>, + DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; +} + +// QPX Floating-point registers +foreach Index = 0-31 in { + def QF#Index : QFPR<!cast<FPR>("F"#Index), "q"#Index>, + DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>; +} + +// Vector registers +foreach Index = 0-31 in { + def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>, + DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; +} + +// VSX registers +foreach Index = 0-31 in { + def VSL#Index : VSRL<!cast<FPR>("F"#Index), "vs"#Index>, + DwarfRegAlias<!cast<FPR>("F"#Index)>; +} + +// Dummy VSX registers, this defines string: "vs32"-"vs63", and is only used for +// asm printing. +foreach Index = 32-63 in { + def VSX#Index : PPCReg<"vs"#Index>; +} + +// The reprsentation of r0 when treated as the constant 0. +def ZERO : GPR<0, "0">, DwarfRegAlias<R0>; +def ZERO8 : GP8<ZERO, "0">, DwarfRegAlias<X0>; + +// Representations of the frame pointer used by ISD::FRAMEADDR. +def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">; +def FP8 : GP8<FP, "**FRAME POINTER**">; + +// Representations of the base pointer used by setjmp. +def BP : GPR<0 /* arbitrary */, "**BASE POINTER**">; +def BP8 : GP8<BP, "**BASE POINTER**">; + +// Condition register bits +def CR0LT : CRBIT< 0, "cr0lt">; +def CR0GT : CRBIT< 1, "cr0gt">; +def CR0EQ : CRBIT< 2, "cr0eq">; +def CR0UN : CRBIT< 3, "cr0un">; +def CR1LT : CRBIT< 4, "cr1lt">; +def CR1GT : CRBIT< 5, "cr1gt">; +def CR1EQ : CRBIT< 6, "cr1eq">; +def CR1UN : CRBIT< 7, "cr1un">; +def CR2LT : CRBIT< 8, "cr2lt">; +def CR2GT : CRBIT< 9, "cr2gt">; +def CR2EQ : CRBIT<10, "cr2eq">; +def CR2UN : CRBIT<11, "cr2un">; +def CR3LT : CRBIT<12, "cr3lt">; +def CR3GT : CRBIT<13, "cr3gt">; +def CR3EQ : CRBIT<14, "cr3eq">; +def CR3UN : CRBIT<15, "cr3un">; +def CR4LT : CRBIT<16, "cr4lt">; +def CR4GT : CRBIT<17, "cr4gt">; +def CR4EQ : CRBIT<18, "cr4eq">; +def CR4UN : CRBIT<19, "cr4un">; +def CR5LT : CRBIT<20, "cr5lt">; +def CR5GT : CRBIT<21, "cr5gt">; +def CR5EQ : CRBIT<22, "cr5eq">; +def CR5UN : CRBIT<23, "cr5un">; +def CR6LT : CRBIT<24, "cr6lt">; +def CR6GT : CRBIT<25, "cr6gt">; +def CR6EQ : CRBIT<26, "cr6eq">; +def CR6UN : CRBIT<27, "cr6un">; +def CR7LT : CRBIT<28, "cr7lt">; +def CR7GT : CRBIT<29, "cr7gt">; +def CR7EQ : CRBIT<30, "cr7eq">; +def CR7UN : CRBIT<31, "cr7un">; + +// Condition registers +let SubRegIndices = [sub_lt, sub_gt, sub_eq, sub_un] in { +def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68, 68]>; +def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69, 69]>; +def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70, 70]>; +def CR3 : CR<3, "cr3", [CR3LT, CR3GT, CR3EQ, CR3UN]>, DwarfRegNum<[71, 71]>; +def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72, 72]>; +def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73, 73]>; +def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>; +def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>; +} + +// Link register +def LR : SPR<8, "lr">, DwarfRegNum<[-2, 65]>; +//let Aliases = [LR] in +def LR8 : SPR<8, "lr">, DwarfRegNum<[65, -2]>; + +// Count register +def CTR : SPR<9, "ctr">, DwarfRegNum<[-2, 66]>; +def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]>; + +// VRsave register +def VRSAVE: SPR<256, "vrsave">, DwarfRegNum<[109]>; + +// SPE extra registers +// SPE Accumulator for multiply-accumulate SPE operations. Never directly +// accessed, so there's no real encoding for it. +def SPEACC: DwarfRegNum<[99, 111]>; +def SPEFSCR: SPR<512, "spefscr">, DwarfRegNum<[612, 112]>; + +def XER: SPR<1, "xer">, DwarfRegNum<[76]>; + +// Carry bit. In the architecture this is really bit 0 of the XER register +// (which really is SPR register 1); this is the only bit interesting to a +// compiler. +def CARRY: SPR<1, "xer">, DwarfRegNum<[76]> { + let Aliases = [XER]; +} + +// FP rounding mode: bits 30 and 31 of the FP status and control register +// This is not allocated as a normal register; it appears only in +// Uses and Defs. The ABI says it needs to be preserved by a function, +// but this is not achieved by saving and restoring it as with +// most registers, it has to be done in code; to make this work all the +// return and call instructions are described as Uses of RM, so instructions +// that do nothing but change RM will not get deleted. +def RM: PPCReg<"**ROUNDING MODE**">; + +/// Register classes +// Allocate volatiles first +// then nonvolatiles in reverse order since stmw/lmw save from rN to r31 +def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12), + (sequence "R%u", 30, 13), + R31, R0, R1, FP, BP)> { + // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so + // put it at the end of the list. + let AltOrders = [(add (sub GPRC, R2), R2)]; + let AltOrderSelect = [{ + const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>(); + return S.isPPC64() && S.isSVR4ABI(); + }]; +} + +def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12), + (sequence "X%u", 30, 14), + X31, X13, X0, X1, FP8, BP8)> { + // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so + // put it at the end of the list. + let AltOrders = [(add (sub G8RC, X2), X2)]; + let AltOrderSelect = [{ + const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>(); + return S.isPPC64() && S.isSVR4ABI(); + }]; +} + +// For some instructions r0 is special (representing the value 0 instead of +// the value in the r0 register), and we use these register subclasses to +// prevent r0 from being allocated for use by those instructions. +def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)> { + // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so + // put it at the end of the list. + let AltOrders = [(add (sub GPRC_NOR0, R2), R2)]; + let AltOrderSelect = [{ + const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>(); + return S.isPPC64() && S.isSVR4ABI(); + }]; +} + +def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)> { + // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so + // put it at the end of the list. + let AltOrders = [(add (sub G8RC_NOX0, X2), X2)]; + let AltOrderSelect = [{ + const PPCSubtarget &S = MF.getSubtarget<PPCSubtarget>(); + return S.isPPC64() && S.isSVR4ABI(); + }]; +} + +def SPERC : RegisterClass<"PPC", [f64], 64, (add (sequence "S%u", 2, 12), + (sequence "S%u", 30, 13), + S31, S0, S1)>; + +def SPE4RC : RegisterClass<"PPC", [f32], 32, (add GPRC)>; + +// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4 +// ABI the size of the Floating-point register save area is determined by the +// allocated non-volatile register with the lowest register number, as FP +// register N is spilled to offset 8 * (32 - N) below the back chain word of the +// previous stack frame. By allocating non-volatiles in reverse order we make +// sure that the Floating-point register save area is always as small as +// possible because there aren't any unused spill slots. +def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13), + (sequence "F%u", 31, 14))>; +def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>; + +def VRRC : RegisterClass<"PPC", + [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32,v2f64, f128], + 128, + (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11, + V12, V13, V14, V15, V16, V17, V18, V19, V31, V30, + V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>; + +// VSX register classes (the allocation order mirrors that of the corresponding +// subregister classes). +def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, + (add (sequence "VSL%u", 0, 13), + (sequence "VSL%u", 31, 14))>; +def VSRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, + (add VSLRC, VRRC)>; + +// Register classes for the 64-bit "scalar" VSX subregisters. +def VFRC : RegisterClass<"PPC", [f64], 64, + (add VF2, VF3, VF4, VF5, VF0, VF1, VF6, VF7, + VF8, VF9, VF10, VF11, VF12, VF13, VF14, + VF15, VF16, VF17, VF18, VF19, VF31, VF30, + VF29, VF28, VF27, VF26, VF25, VF24, VF23, + VF22, VF21, VF20)>; +def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>; + +// Allow spilling GPR's into caller-saved VSR's. +def SPILLTOVSRRC : RegisterClass<"PPC", [i64, f64], 64, (add G8RC, (sub VSFRC, + (sequence "VF%u", 31, 20), + (sequence "F%u", 31, 14)))>; + +// Register class for single precision scalars in VSX registers +def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)>; + +// For QPX +def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13), + (sequence "QF%u", 31, 14))>; +def QSRC : RegisterClass<"PPC", [v4f32], 128, (add QFRC)>; +def QBRC : RegisterClass<"PPC", [v4i1], 256, (add QFRC)> { + // These are actually stored as floating-point values where a positive + // number is true and anything else (including NaN) is false. + let Size = 256; +} + +def CRBITRC : RegisterClass<"PPC", [i1], 32, + (add CR2LT, CR2GT, CR2EQ, CR2UN, + CR3LT, CR3GT, CR3EQ, CR3UN, + CR4LT, CR4GT, CR4EQ, CR4UN, + CR5LT, CR5GT, CR5EQ, CR5UN, + CR6LT, CR6GT, CR6EQ, CR6UN, + CR7LT, CR7GT, CR7EQ, CR7UN, + CR1LT, CR1GT, CR1EQ, CR1UN, + CR0LT, CR0GT, CR0EQ, CR0UN)> { + let Size = 32; +} + +def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6, + CR7, CR2, CR3, CR4)>; + +// The CTR registers are not allocatable because they're used by the +// decrement-and-branch instructions, and thus need to stay live across +// multiple basic blocks. +def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)> { + let isAllocatable = 0; +} +def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)> { + let isAllocatable = 0; +} + +def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>; +def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> { + let CopyCost = -1; +} + diff --git a/capstone/suite/synctools/tablegen/PPC/PPCSchedule.td b/capstone/suite/synctools/tablegen/PPC/PPCSchedule.td new file mode 100644 index 000000000..5ad0a517c --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCSchedule.td @@ -0,0 +1,140 @@ +//===-- PPCSchedule.td - PowerPC Scheduling Definitions ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction Itinerary classes used for PowerPC +// +def IIC_IntSimple : InstrItinClass; +def IIC_IntGeneral : InstrItinClass; +def IIC_IntCompare : InstrItinClass; +def IIC_IntISEL : InstrItinClass; +def IIC_IntDivD : InstrItinClass; +def IIC_IntDivW : InstrItinClass; +def IIC_IntMFFS : InstrItinClass; +def IIC_IntMFVSCR : InstrItinClass; +def IIC_IntMTFSB0 : InstrItinClass; +def IIC_IntMTSRD : InstrItinClass; +def IIC_IntMulHD : InstrItinClass; +def IIC_IntMulHW : InstrItinClass; +def IIC_IntMulHWU : InstrItinClass; +def IIC_IntMulLI : InstrItinClass; +def IIC_IntRFID : InstrItinClass; +def IIC_IntRotateD : InstrItinClass; +def IIC_IntRotateDI : InstrItinClass; +def IIC_IntRotate : InstrItinClass; +def IIC_IntShift : InstrItinClass; +def IIC_IntTrapD : InstrItinClass; +def IIC_IntTrapW : InstrItinClass; +def IIC_BrB : InstrItinClass; +def IIC_BrCR : InstrItinClass; +def IIC_BrMCR : InstrItinClass; +def IIC_BrMCRX : InstrItinClass; +def IIC_LdStDCBA : InstrItinClass; +def IIC_LdStDCBF : InstrItinClass; +def IIC_LdStDCBI : InstrItinClass; +def IIC_LdStLoad : InstrItinClass; +def IIC_LdStLoadUpd : InstrItinClass; +def IIC_LdStLoadUpdX : InstrItinClass; +def IIC_LdStStore : InstrItinClass; +def IIC_LdStStoreUpd : InstrItinClass; +def IIC_LdStDSS : InstrItinClass; +def IIC_LdStICBI : InstrItinClass; +def IIC_LdStLD : InstrItinClass; +def IIC_LdStLDU : InstrItinClass; +def IIC_LdStLDUX : InstrItinClass; +def IIC_LdStLDARX : InstrItinClass; +def IIC_LdStLFD : InstrItinClass; +def IIC_LdStLFDU : InstrItinClass; +def IIC_LdStLFDUX : InstrItinClass; +def IIC_LdStLHA : InstrItinClass; +def IIC_LdStLHAU : InstrItinClass; +def IIC_LdStLHAUX : InstrItinClass; +def IIC_LdStLMW : InstrItinClass; +def IIC_LdStLVecX : InstrItinClass; +def IIC_LdStLWA : InstrItinClass; +def IIC_LdStLWARX : InstrItinClass; +def IIC_LdStSLBIA : InstrItinClass; +def IIC_LdStSLBIE : InstrItinClass; +def IIC_LdStSTD : InstrItinClass; +def IIC_LdStSTDCX : InstrItinClass; +def IIC_LdStSTDU : InstrItinClass; +def IIC_LdStSTDUX : InstrItinClass; +def IIC_LdStSTFD : InstrItinClass; +def IIC_LdStSTFDU : InstrItinClass; +def IIC_LdStSTVEBX : InstrItinClass; +def IIC_LdStSTWCX : InstrItinClass; +def IIC_LdStSync : InstrItinClass; +def IIC_LdStCOPY : InstrItinClass; +def IIC_LdStPASTE : InstrItinClass; +def IIC_SprISYNC : InstrItinClass; +def IIC_SprMFSR : InstrItinClass; +def IIC_SprMTMSR : InstrItinClass; +def IIC_SprMTSR : InstrItinClass; +def IIC_SprTLBSYNC : InstrItinClass; +def IIC_SprMFCR : InstrItinClass; +def IIC_SprMFCRF : InstrItinClass; +def IIC_SprMFMSR : InstrItinClass; +def IIC_SprMFSPR : InstrItinClass; +def IIC_SprMFTB : InstrItinClass; +def IIC_SprMTSPR : InstrItinClass; +def IIC_SprMTSRIN : InstrItinClass; +def IIC_SprRFI : InstrItinClass; +def IIC_SprSC : InstrItinClass; +def IIC_FPGeneral : InstrItinClass; +def IIC_FPDGeneral : InstrItinClass; +def IIC_FPSGeneral : InstrItinClass; +def IIC_FPAddSub : InstrItinClass; +def IIC_FPCompare : InstrItinClass; +def IIC_FPDivD : InstrItinClass; +def IIC_FPDivS : InstrItinClass; +def IIC_FPFused : InstrItinClass; +def IIC_FPRes : InstrItinClass; +def IIC_FPSqrtD : InstrItinClass; +def IIC_FPSqrtS : InstrItinClass; +def IIC_VecGeneral : InstrItinClass; +def IIC_VecFP : InstrItinClass; +def IIC_VecFPCompare : InstrItinClass; +def IIC_VecComplex : InstrItinClass; +def IIC_VecPerm : InstrItinClass; +def IIC_VecFPRound : InstrItinClass; +def IIC_VecVSL : InstrItinClass; +def IIC_VecVSR : InstrItinClass; +def IIC_SprMTMSRD : InstrItinClass; +def IIC_SprSLIE : InstrItinClass; +def IIC_SprSLBIE : InstrItinClass; +def IIC_SprSLBIEG : InstrItinClass; +def IIC_SprSLBMTE : InstrItinClass; +def IIC_SprSLBMFEE : InstrItinClass; +def IIC_SprSLBMFEV : InstrItinClass; +def IIC_SprSLBIA : InstrItinClass; +def IIC_SprSLBSYNC : InstrItinClass; +def IIC_SprTLBIA : InstrItinClass; +def IIC_SprTLBIEL : InstrItinClass; +def IIC_SprTLBIE : InstrItinClass; +def IIC_SprABORT : InstrItinClass; +def IIC_SprMSGSYNC : InstrItinClass; +def IIC_SprSTOP : InstrItinClass; +def IIC_SprMFPMR : InstrItinClass; +def IIC_SprMTPMR : InstrItinClass; + +//===----------------------------------------------------------------------===// +// Processor instruction itineraries. + +include "PPCScheduleG3.td" +include "PPCSchedule440.td" +include "PPCScheduleG4.td" +include "PPCScheduleG4Plus.td" +include "PPCScheduleG5.td" +include "PPCScheduleP7.td" +include "PPCScheduleP8.td" +include "PPCScheduleP9.td" +include "PPCScheduleA2.td" +include "PPCScheduleE500.td" +include "PPCScheduleE500mc.td" +include "PPCScheduleE5500.td" diff --git a/capstone/suite/synctools/tablegen/PPC/PPCSchedule440.td b/capstone/suite/synctools/tablegen/PPC/PPCSchedule440.td new file mode 100644 index 000000000..2455e5e52 --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCSchedule440.td @@ -0,0 +1,608 @@ +//===-- PPCSchedule440.td - PPC 440 Scheduling Definitions -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Primary reference: +// PowerPC 440x6 Embedded Processor Core User's Manual. +// IBM (as updated in) 2010. + +// The basic PPC 440 does not include a floating-point unit; the pipeline +// timings here are constructed to match the FP2 unit shipped with the +// PPC-440- and PPC-450-based Blue Gene (L and P) supercomputers. +// References: +// S. Chatterjee, et al. Design and exploitation of a high-performance +// SIMD floating-point unit for Blue Gene/L. +// IBM J. Res. & Dev. 49 (2/3) March/May 2005. +// also: +// Carlos Sosa and Brant Knudson. IBM System Blue Gene Solution: +// Blue Gene/P Application Development. +// IBM (as updated in) 2009. + +//===----------------------------------------------------------------------===// +// Functional units on the PowerPC 440/450 chip sets +// +def P440_DISS1 : FuncUnit; // Issue unit 1 +def P440_DISS2 : FuncUnit; // Issue unit 2 +def P440_LRACC : FuncUnit; // Register access and dispatch for + // the simple integer (J-pipe) and + // load/store (L-pipe) pipelines +def P440_IRACC : FuncUnit; // Register access and dispatch for + // the complex integer (I-pipe) pipeline +def P440_FRACC : FuncUnit; // Register access and dispatch for + // the floating-point execution (F-pipe) pipeline +def P440_IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline +def P440_IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline +def P440_IWB : FuncUnit; // Write-back unit for the I pipeline +def P440_JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline +def P440_JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline +def P440_JWB : FuncUnit; // Write-back unit for the J pipeline +def P440_AGEN : FuncUnit; // Address generation for the L pipeline +def P440_CRD : FuncUnit; // D-cache access for the L pipeline +def P440_LWB : FuncUnit; // Write-back unit for the L pipeline +def P440_FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline +def P440_FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline +def P440_FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline +def P440_FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline +def P440_FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline +def P440_FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline +def P440_FWB : FuncUnit; // Write-back unit for the F pipeline + +def P440_LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used + // to make sure that no lwarx/stwcx. + // instructions are issued while another + // lwarx/stwcx. is in the L pipe. + +def P440_GPR_Bypass : Bypass; // The bypass for general-purpose regs. +def P440_FPR_Bypass : Bypass; // The bypass for floating-point regs. + +// Notes: +// Instructions are held in the FRACC, LRACC and IRACC pipeline +// stages until their source operands become ready. Exceptions: +// - Store instructions will hold in the AGEN stage +// - The integer multiply-accumulate instruction will hold in +// the IEXE1 stage +// +// For most I-pipe operations, the result is available at the end of +// the IEXE1 stage. Operations such as multiply and divide must +// continue to execute in IEXE2 and IWB. Divide resides in IWB for +// 33 cycles (multiply also calculates its result in IWB). For all +// J-pipe instructions, the result is available +// at the end of the JEXE1 stage. Loads have a 3-cycle latency +// (data is not available until after the LWB stage). +// +// The L1 cache hit latency is four cycles for floating point loads +// and three cycles for integer loads. +// +// The stwcx. instruction requires both the LRACC and the IRACC +// dispatch stages. It must be issued from DISS0. +// +// All lwarx/stwcx. instructions hold in LRACC if another +// uncommitted lwarx/stwcx. is in AGEN, CRD, or LWB. +// +// msync (a.k.a. sync) and mbar will hold in LWB until all load/store +// resources are empty. AGEN and CRD are held empty until the msync/mbar +// commits. +// +// Most floating-point instructions, computational and move, +// have a 5-cycle latency. Divide takes longer (30 cycles). Instructions that +// update the CR take 2 cycles. Stores take 3 cycles and, as mentioned above, +// loads take 4 cycles (for L1 hit). + +// +// This file defines the itinerary class data for the PPC 440 processor. +// +//===----------------------------------------------------------------------===// + + +def PPC440Itineraries : ProcessorItineraries< + [P440_DISS1, P440_DISS2, P440_FRACC, P440_IRACC, P440_IEXE1, P440_IEXE2, + P440_IWB, P440_LRACC, P440_JEXE1, P440_JEXE2, P440_JWB, P440_AGEN, P440_CRD, + P440_LWB, P440_FEXE1, P440_FEXE2, P440_FEXE3, P440_FEXE4, P440_FEXE5, + P440_FEXE6, P440_FWB, P440_LWARX_Hold], + [P440_GPR_Bypass, P440_FPR_Bypass], [ + InstrItinData<IIC_IntSimple, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntGeneral, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntISEL, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass, NoBypass]>, + InstrItinData<IIC_IntCompare, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntDivW, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<33, [P440_IWB]>], + [36, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntMFFS, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [3, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [3, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntMulHW, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntMulHWU, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntMulLI, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntRotate, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntShift, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_IntTrapW, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [2, 0], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_BrB, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_BrCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_BrMCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_BrMCRX, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBA, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBF, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBI, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLoad, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [5, 1, 1], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLoadUpd,[InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [5, 2, 1, 1], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [5, 2, 1, 1], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStStore, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [1, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [2, 1, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStICBI, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStSTFD, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [1, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [2, 1, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLFD, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [5, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLFDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [5, 2, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [5, 2, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLHA, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLHAU, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLMW, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P440_DISS1]>, + InstrStage<1, [P440_IRACC], 0>, + InstrStage<4, [P440_LWARX_Hold], 0>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStSTD, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStSTDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [2, 1, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [2, 1, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [P440_DISS1]>, + InstrStage<1, [P440_IRACC], 0>, + InstrStage<4, [P440_LWARX_Hold], 0>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [P440_DISS1]>, + InstrStage<1, [P440_IRACC], 0>, + InstrStage<4, [P440_LWARX_Hold], 0>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_LdStSync, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_LRACC]>, + InstrStage<3, [P440_AGEN], 1>, + InstrStage<2, [P440_CRD], 1>, + InstrStage<1, [P440_LWB]>]>, + InstrItinData<IIC_SprISYNC, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_FRACC], 0>, + InstrStage<1, [P440_LRACC], 0>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_FEXE1], 0>, + InstrStage<1, [P440_AGEN], 0>, + InstrStage<1, [P440_JEXE1], 0>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_FEXE2], 0>, + InstrStage<1, [P440_CRD], 0>, + InstrStage<1, [P440_JEXE2], 0>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<6, [P440_FEXE3], 0>, + InstrStage<6, [P440_LWB], 0>, + InstrStage<6, [P440_JWB], 0>, + InstrStage<6, [P440_IWB]>]>, + InstrItinData<IIC_SprMFSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [2, 0], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprMTMSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [2, 0], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprMTSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [5, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>]>, + InstrItinData<IIC_SprMFCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprMFMSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [3, 0], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprMFSPR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [6, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprMFTB, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [6, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprMTSPR, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [6, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprMTSRIN, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [6, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprRFI, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_SprSC, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData<IIC_FPGeneral, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0, 0], + [P440_FPR_Bypass, + P440_FPR_Bypass, P440_FPR_Bypass]>, + InstrItinData<IIC_FPAddSub, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0, 0], + [P440_FPR_Bypass, + P440_FPR_Bypass, P440_FPR_Bypass]>, + InstrItinData<IIC_FPCompare, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0, 0], + [P440_FPR_Bypass, P440_FPR_Bypass, + P440_FPR_Bypass]>, + InstrItinData<IIC_FPDivD, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<25, [P440_FWB]>], + [31, 0, 0], + [NoBypass, P440_FPR_Bypass, P440_FPR_Bypass]>, + InstrItinData<IIC_FPDivS, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<13, [P440_FWB]>], + [19, 0, 0], + [NoBypass, P440_FPR_Bypass, P440_FPR_Bypass]>, + InstrItinData<IIC_FPFused, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0, 0, 0], + [P440_FPR_Bypass, + P440_FPR_Bypass, P440_FPR_Bypass, + P440_FPR_Bypass]>, + InstrItinData<IIC_FPRes, [InstrStage<1, [P440_DISS1, P440_DISS2]>, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0], + [P440_FPR_Bypass, P440_FPR_Bypass]> +]>; + +// ===---------------------------------------------------------------------===// +// PPC440 machine model for scheduling and other instruction cost heuristics. + +def PPC440Model : SchedMachineModel { + let IssueWidth = 2; // 2 instructions are dispatched per cycle. + let LoadLatency = 5; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + + let CompleteModel = 0; + + let Itineraries = PPC440Itineraries; +} + diff --git a/capstone/suite/synctools/tablegen/PPC/PPCScheduleA2.td b/capstone/suite/synctools/tablegen/PPC/PPCScheduleA2.td new file mode 100644 index 000000000..54cfae5d7 --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCScheduleA2.td @@ -0,0 +1,172 @@ +//===- PPCScheduleA2.td - PPC A2 Scheduling Definitions --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Primary reference: +// A2 Processor User's Manual. +// IBM (as updated in) 2010. + +//===----------------------------------------------------------------------===// +// Functional units on the PowerPC A2 chip sets +// +def A2_XU : FuncUnit; // A2_XU pipeline +def A2_FU : FuncUnit; // FI pipeline + +// +// This file defines the itinerary class data for the PPC A2 processor. +// +//===----------------------------------------------------------------------===// + + +def PPCA2Itineraries : ProcessorItineraries< + [A2_XU, A2_FU], [], [ + InstrItinData<IIC_IntSimple, [InstrStage<1, [A2_XU]>], + [1, 0, 0]>, + InstrItinData<IIC_IntGeneral, [InstrStage<1, [A2_XU]>], + [2, 0, 0]>, + InstrItinData<IIC_IntISEL, [InstrStage<1, [A2_XU]>], + [2, 0, 0, 0]>, + InstrItinData<IIC_IntCompare, [InstrStage<1, [A2_XU]>], + [2, 0, 0]>, + InstrItinData<IIC_IntDivW, [InstrStage<1, [A2_XU]>], + [39, 0, 0]>, + InstrItinData<IIC_IntDivD, [InstrStage<1, [A2_XU]>], + [71, 0, 0]>, + InstrItinData<IIC_IntMulHW, [InstrStage<1, [A2_XU]>], + [5, 0, 0]>, + InstrItinData<IIC_IntMulHWU, [InstrStage<1, [A2_XU]>], + [5, 0, 0]>, + InstrItinData<IIC_IntMulLI, [InstrStage<1, [A2_XU]>], + [6, 0, 0]>, + InstrItinData<IIC_IntRotate, [InstrStage<1, [A2_XU]>], + [2, 0, 0]>, + InstrItinData<IIC_IntRotateD, [InstrStage<1, [A2_XU]>], + [2, 0, 0]>, + InstrItinData<IIC_IntRotateDI, [InstrStage<1, [A2_XU]>], + [2, 0, 0]>, + InstrItinData<IIC_IntShift, [InstrStage<1, [A2_XU]>], + [2, 0, 0]>, + InstrItinData<IIC_IntTrapW, [InstrStage<1, [A2_XU]>], + [2, 0]>, + InstrItinData<IIC_IntTrapD, [InstrStage<1, [A2_XU]>], + [2, 0]>, + InstrItinData<IIC_BrB, [InstrStage<1, [A2_XU]>], + [6, 0, 0]>, + InstrItinData<IIC_BrCR, [InstrStage<1, [A2_XU]>], + [1, 0, 0]>, + InstrItinData<IIC_BrMCR, [InstrStage<1, [A2_XU]>], + [5, 0, 0]>, + InstrItinData<IIC_BrMCRX, [InstrStage<1, [A2_XU]>], + [1, 0, 0]>, + InstrItinData<IIC_LdStDCBA, [InstrStage<1, [A2_XU]>], + [1, 0, 0]>, + InstrItinData<IIC_LdStDCBF, [InstrStage<1, [A2_XU]>], + [1, 0, 0]>, + InstrItinData<IIC_LdStDCBI, [InstrStage<1, [A2_XU]>], + [1, 0, 0]>, + InstrItinData<IIC_LdStLoad, [InstrStage<1, [A2_XU]>], + [6, 0, 0]>, + InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [A2_XU]>], + [6, 8, 0, 0]>, + InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [A2_XU]>], + [6, 8, 0, 0]>, + InstrItinData<IIC_LdStLDU, [InstrStage<1, [A2_XU]>], + [6, 0, 0]>, + InstrItinData<IIC_LdStLDUX, [InstrStage<1, [A2_XU]>], + [6, 0, 0]>, + InstrItinData<IIC_LdStStore, [InstrStage<1, [A2_XU]>], + [0, 0, 0]>, + InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [A2_XU]>], + [2, 0, 0, 0]>, + InstrItinData<IIC_LdStICBI, [InstrStage<1, [A2_XU]>], + [16, 0, 0]>, + InstrItinData<IIC_LdStSTFD, [InstrStage<1, [A2_XU]>], + [0, 0, 0]>, + InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [A2_XU]>], + [2, 0, 0, 0]>, + InstrItinData<IIC_LdStLFD, [InstrStage<1, [A2_XU]>], + [7, 0, 0]>, + InstrItinData<IIC_LdStLFDU, [InstrStage<1, [A2_XU]>], + [7, 9, 0, 0]>, + InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [A2_XU]>], + [7, 9, 0, 0]>, + InstrItinData<IIC_LdStLHA, [InstrStage<1, [A2_XU]>], + [6, 0, 0]>, + InstrItinData<IIC_LdStLHAU, [InstrStage<1, [A2_XU]>], + [6, 8, 0, 0]>, + InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [A2_XU]>], + [6, 8, 0, 0]>, + InstrItinData<IIC_LdStLWARX, [InstrStage<1, [A2_XU]>], + [82, 0, 0]>, // L2 latency + InstrItinData<IIC_LdStSTD, [InstrStage<1, [A2_XU]>], + [0, 0, 0]>, + InstrItinData<IIC_LdStSTDU, [InstrStage<1, [A2_XU]>], + [2, 0, 0, 0]>, + InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [A2_XU]>], + [2, 0, 0, 0]>, + InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [A2_XU]>], + [82, 0, 0]>, // L2 latency + InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [A2_XU]>], + [82, 0, 0]>, // L2 latency + InstrItinData<IIC_LdStSync, [InstrStage<1, [A2_XU]>], + [6]>, + InstrItinData<IIC_SprISYNC, [InstrStage<1, [A2_XU]>], + [16]>, + InstrItinData<IIC_SprMTMSR, [InstrStage<1, [A2_XU]>], + [16, 0]>, + InstrItinData<IIC_SprMFCR, [InstrStage<1, [A2_XU]>], + [6, 0]>, + InstrItinData<IIC_SprMFCRF, [InstrStage<1, [A2_XU]>], + [1, 0]>, + InstrItinData<IIC_SprMFMSR, [InstrStage<1, [A2_XU]>], + [4, 0]>, + InstrItinData<IIC_SprMFSPR, [InstrStage<1, [A2_XU]>], + [6, 0]>, + InstrItinData<IIC_SprMFTB, [InstrStage<1, [A2_XU]>], + [4, 0]>, + InstrItinData<IIC_SprMTSPR, [InstrStage<1, [A2_XU]>], + [6, 0]>, + InstrItinData<IIC_SprRFI, [InstrStage<1, [A2_XU]>], + [16]>, + InstrItinData<IIC_SprSC, [InstrStage<1, [A2_XU]>], + [16]>, + InstrItinData<IIC_FPGeneral, [InstrStage<1, [A2_FU]>], + [6, 0, 0]>, + InstrItinData<IIC_FPAddSub, [InstrStage<1, [A2_FU]>], + [6, 0, 0]>, + InstrItinData<IIC_FPCompare, [InstrStage<1, [A2_FU]>], + [5, 0, 0]>, + InstrItinData<IIC_FPDivD, [InstrStage<1, [A2_FU]>], + [72, 0, 0]>, + InstrItinData<IIC_FPDivS, [InstrStage<1, [A2_FU]>], + [59, 0, 0]>, + InstrItinData<IIC_FPSqrtD, [InstrStage<1, [A2_FU]>], + [69, 0, 0]>, + InstrItinData<IIC_FPSqrtS, [InstrStage<1, [A2_FU]>], + [65, 0, 0]>, + InstrItinData<IIC_FPFused, [InstrStage<1, [A2_FU]>], + [6, 0, 0, 0]>, + InstrItinData<IIC_FPRes, [InstrStage<1, [A2_FU]>], + [6, 0]> +]>; + +// ===---------------------------------------------------------------------===// +// A2 machine model for scheduling and other instruction cost heuristics. + +def PPCA2Model : SchedMachineModel { + let IssueWidth = 1; // 1 instruction is dispatched per cycle. + let LoadLatency = 6; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 13; + + let CompleteModel = 0; + + let Itineraries = PPCA2Itineraries; +} + diff --git a/capstone/suite/synctools/tablegen/PPC/PPCScheduleE500.td b/capstone/suite/synctools/tablegen/PPC/PPCScheduleE500.td new file mode 100644 index 000000000..d7c2bd15a --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCScheduleE500.td @@ -0,0 +1,274 @@ +//===-- PPCScheduleE500.td - e500 Scheduling Defs ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Freescale e500 32-bit +// Power processor. +// +// All information is derived from the "e500 Core Reference Manual", +// Freescale Document Number E500MCRM, Rev. 1, 03/2012. +// +//===----------------------------------------------------------------------===// +// Relevant functional units in the Freescale e500 core: +// +// * Decode & Dispatch +// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue +// queues (GIQx) or Branch issue queue (BIQ). +def E500_DIS0 : FuncUnit; // Dispatch stage - insn 1 +def E500_DIS1 : FuncUnit; // Dispatch stage - insn 2 + +// * Execute +// 6 pipelined execution units: SU0, SU1, BU, LSU, MU. +// Some instructions can only execute in SU0 but not SU1. +def E500_SU0 : FuncUnit; // Simple unit 0 +def E500_SU1 : FuncUnit; // Simple unit 1 +def E500_BU : FuncUnit; // Branch unit +def E500_MU : FuncUnit; // MU pipeline +def E500_LSU_0 : FuncUnit; // LSU pipeline + +def E500_GPR_Bypass : Bypass; +def E500_CR_Bypass : Bypass; +def E500_DivBypass : Bypass; + +def PPCE500Itineraries : ProcessorItineraries< + [E500_DIS0, E500_DIS1, E500_SU0, E500_SU1, E500_BU, + E500_MU, E500_LSU_0], + [E500_CR_Bypass, E500_GPR_Bypass, E500_DivBypass], [ + InstrItinData<IIC_IntSimple, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0, E500_SU1]>], + [4, 1, 1], // Latency = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntGeneral, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0, E500_SU1]>], + [4, 1, 1], // Latency = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntISEL, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0, E500_SU1]>], + [4, 1, 1, 1], // Latency = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass, + E500_CR_Bypass]>, + InstrItinData<IIC_IntCompare, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0, E500_SU1]>], + [5, 1, 1], // Latency = 1 or 2 + [E500_CR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntDivW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_MU], 0>, + InstrStage<14, [E500_MU]>], + [17, 1, 1], // Latency=4..35, Repeat= 4..35 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntMulHW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<4, [E500_MU]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntMulHWU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<4, [E500_MU]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntMulLI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<4, [E500_MU]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntRotate, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0, E500_SU1]>], + [4, 1, 1], // Latency = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntShift, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0, E500_SU1]>], + [4, 1, 1], // Latency = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_IntTrapW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<2, [E500_SU0]>], + [5, 1], // Latency = 2, Repeat rate = 2 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_BrB, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_BU]>], + [4, 1], // Latency = 1 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData<IIC_BrCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_BU]>], + [4, 1, 1], // Latency = 1 + [E500_CR_Bypass, + E500_CR_Bypass, E500_CR_Bypass]>, + InstrItinData<IIC_BrMCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_BU]>], + [4, 1], // Latency = 1 + [E500_CR_Bypass, E500_CR_Bypass]>, + InstrItinData<IIC_BrMCRX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0, E500_SU1]>], + [4, 1, 1], // Latency = 1 + [E500_CR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBA, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3, Repeat rate = 1 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBF, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStLoad, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0, E500_SU1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0, E500_SU1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStStore, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0, E500_SU1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStICBI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStLHA, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStLHAU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0, E500_SU1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0, E500_SU1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStLMW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [7, 1], // Latency = r+3 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStLWARX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<3, [E500_LSU_0]>], + [6, 1, 1], // Latency = 3, Repeat rate = 3 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData<IIC_LdStSync, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0]>]>, + InstrItinData<IIC_SprMFSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<4, [E500_SU0]>], + [7, 1], + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_SprMTMSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<2, [E500_SU0, E500_SU1]>], + [5, 1], // Latency = 2, Repeat rate = 4 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_SprMTSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0]>], + [5, 1], + [NoBypass, E500_GPR_Bypass]>, + InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_LSU_0], 0>]>, + InstrItinData<IIC_SprMFCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<5, [E500_SU0]>], + [8, 1], + [E500_GPR_Bypass, E500_CR_Bypass]>, + InstrItinData<IIC_SprMFCRF, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<5, [E500_SU0]>], + [8, 1], + [E500_GPR_Bypass, E500_CR_Bypass]>, + InstrItinData<IIC_SprMFPMR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<4, [E500_SU0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<4, [E500_SU0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_SprMFSPR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0, E500_SU1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [E500_GPR_Bypass, E500_CR_Bypass]>, + InstrItinData<IIC_SprMTPMR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [E500_CR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_SprMFTB, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<4, [E500_SU0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData<IIC_SprMTSPR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0, E500_SU1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [E500_CR_Bypass, E500_GPR_Bypass]>, + InstrItinData<IIC_SprMTSRIN, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0]>], + [4, 1], + [NoBypass, E500_GPR_Bypass]>, + InstrItinData<IIC_FPDGeneral, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<6, [E500_MU]>], + [9, 1, 1], // Latency = 6, Repeat rate = 1 + [NoBypass]>, + InstrItinData<IIC_FPSGeneral, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<4, [E500_MU]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [NoBypass]>, + InstrItinData<IIC_FPDivD, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<32, [E500_MU]>], + [35, 1, 1], // Latency = 32, Repeat rate = 32 + [E500_DivBypass]>, + InstrItinData<IIC_FPDivS, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<29, [E500_MU]>], + [32, 1, 1], // Latency = 29, Repeat rate = 29 + [E500_DivBypass]>, + InstrItinData<IIC_VecGeneral, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<1, [E500_SU0]>], + [4, 1, 1], // Latency = 1, Repeat rate = 1 + [NoBypass]>, + InstrItinData<IIC_VecComplex, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>, + InstrStage<4, [E500_MU]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [NoBypass]> +]>; + +// ===---------------------------------------------------------------------===// +// e500 machine model for scheduling and other instruction cost heuristics. + +def PPCE500Model : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let LoadLatency = 5; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + + let CompleteModel = 0; + + let Itineraries = PPCE500Itineraries; +} diff --git a/capstone/suite/synctools/tablegen/PPC/PPCScheduleE500mc.td b/capstone/suite/synctools/tablegen/PPC/PPCScheduleE500mc.td new file mode 100644 index 000000000..5f95f2a79 --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCScheduleE500mc.td @@ -0,0 +1,329 @@ +//===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Freescale e500mc 32-bit +// Power processor. +// +// All information is derived from the "e500mc Core Reference Manual", +// Freescale Document Number E500MCRM, Rev. 1, 03/2012. +// +//===----------------------------------------------------------------------===// +// Relevant functional units in the Freescale e500mc core: +// +// * Decode & Dispatch +// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue +// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). +def E500mc_DIS0 : FuncUnit; // Dispatch stage - insn 1 +def E500mc_DIS1 : FuncUnit; // Dispatch stage - insn 2 + +// * Execute +// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. +// Some instructions can only execute in SFX0 but not SFX1. +// The CFX has a bypass path, allowing non-divide instructions to execute +// while a divide instruction is executed. +def E500mc_SFX0 : FuncUnit; // Simple unit 0 +def E500mc_SFX1 : FuncUnit; // Simple unit 1 +def E500mc_BU : FuncUnit; // Branch unit +def E500mc_CFX_DivBypass + : FuncUnit; // CFX divide bypass path +def E500mc_CFX_0 : FuncUnit; // CFX pipeline +def E500mc_LSU_0 : FuncUnit; // LSU pipeline +def E500mc_FPU_0 : FuncUnit; // FPU pipeline + +def E500mc_GPR_Bypass : Bypass; +def E500mc_FPR_Bypass : Bypass; +def E500mc_CR_Bypass : Bypass; + +def PPCE500mcItineraries : ProcessorItineraries< + [E500mc_DIS0, E500mc_DIS1, E500mc_SFX0, E500mc_SFX1, E500mc_BU, E500mc_CFX_DivBypass, + E500mc_CFX_0, E500mc_LSU_0, E500mc_FPU_0], + [E500mc_CR_Bypass, E500mc_GPR_Bypass, E500mc_FPR_Bypass], [ + InstrItinData<IIC_IntSimple, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_IntGeneral, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_IntISEL, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], + [4, 1, 1, 1], // Latency = 1 + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass, + E500mc_CR_Bypass]>, + InstrItinData<IIC_IntCompare, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], + [5, 1, 1], // Latency = 1 or 2 + [E500mc_CR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_IntDivW, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_CFX_0], 0>, + InstrStage<14, [E500mc_CFX_DivBypass]>], + [17, 1, 1], // Latency=4..35, Repeat= 4..35 + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_IntMFFS, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<8, [E500mc_FPU_0]>], + [11], // Latency = 8 + [E500mc_FPR_Bypass]>, + InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<8, [E500mc_FPU_0]>], + [11, 1, 1], // Latency = 8 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData<IIC_IntMulHW, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_IntMulHWU, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_IntMulLI, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_IntRotate, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_IntShift, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_IntTrapW, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<2, [E500mc_SFX0]>], + [5, 1], // Latency = 2, Repeat rate = 2 + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_BrB, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_BU]>], + [4, 1], // Latency = 1 + [NoBypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_BrCR, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_BU]>], + [4, 1, 1], // Latency = 1 + [E500mc_CR_Bypass, + E500mc_CR_Bypass, E500mc_CR_Bypass]>, + InstrItinData<IIC_BrMCR, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_BU]>], + [4, 1], // Latency = 1 + [E500mc_CR_Bypass, E500mc_CR_Bypass]>, + InstrItinData<IIC_BrMCRX, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500mc_CR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBA, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [6, 1], // Latency = 3, Repeat rate = 1 + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBF, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [6, 1], // Latency = 3 + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBI, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [6, 1], // Latency = 3 + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_LdStLoad, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [6, 1], // Latency = 3 + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [6, 1], // Latency = 3 + [E500mc_GPR_Bypass, E500mc_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [6, 1], // Latency = 3 + [E500mc_GPR_Bypass, E500mc_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStStore, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500mc_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStICBI, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_LdStSTFD, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [6, 1, 1], // Latency = 3 + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [6, 1, 1], // Latency = 3 + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLFD, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [7, 1, 1], // Latency = 4 + [E500mc_FPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_LdStLFDU, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [7, 1, 1], // Latency = 4 + [E500mc_FPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [7, 1, 1], // Latency = 4 + [E500mc_FPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLHA, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [6, 1], // Latency = 3 + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_LdStLHAU, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [6, 1], // Latency = 3 + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [6, 1], // Latency = 3 + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_LdStLMW, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [7, 1], // Latency = r+3 + [NoBypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_LdStLWARX, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<3, [E500mc_LSU_0]>], + [6, 1, 1], // Latency = 3, Repeat rate = 3 + [E500mc_GPR_Bypass, + E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_LdStSync, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_LSU_0]>]>, + InstrItinData<IIC_SprMFSR, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<4, [E500mc_SFX0]>], + [7, 1], + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_SprMTMSR, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<2, [E500mc_SFX0, E500mc_SFX1]>], + [5, 1], // Latency = 2, Repeat rate = 4 + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_SprMTSR, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0]>], + [5, 1], + [NoBypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_LSU_0], 0>]>, + InstrItinData<IIC_SprMFCR, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<5, [E500mc_SFX0]>], + [8, 1], + [E500mc_GPR_Bypass, E500mc_CR_Bypass]>, + InstrItinData<IIC_SprMFCRF, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<5, [E500mc_SFX0]>], + [8, 1], + [E500mc_GPR_Bypass, E500mc_CR_Bypass]>, + InstrItinData<IIC_SprMFPMR, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<4, [E500mc_SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<4, [E500mc_SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [E500mc_GPR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_SprMFSPR, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [E500mc_GPR_Bypass, E500mc_CR_Bypass]>, + InstrItinData<IIC_SprMTPMR, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [E500mc_CR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_SprMFTB, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<4, [E500mc_SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [NoBypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_SprMTSPR, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0, E500mc_SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [E500mc_CR_Bypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_SprMTSRIN, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<1, [E500mc_SFX0]>], + [4, 1], + [NoBypass, E500mc_GPR_Bypass]>, + InstrItinData<IIC_FPGeneral, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<2, [E500mc_FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [E500mc_FPR_Bypass, + E500mc_FPR_Bypass, E500mc_FPR_Bypass]>, + InstrItinData<IIC_FPAddSub, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<4, [E500mc_FPU_0]>], + [13, 1, 1], // Latency = 10, Repeat rate = 4 + [E500mc_FPR_Bypass, + E500mc_FPR_Bypass, E500mc_FPR_Bypass]>, + InstrItinData<IIC_FPCompare, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<2, [E500mc_FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [E500mc_CR_Bypass, + E500mc_FPR_Bypass, E500mc_FPR_Bypass]>, + InstrItinData<IIC_FPDivD, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<68, [E500mc_FPU_0]>], + [71, 1, 1], // Latency = 68, Repeat rate = 68 + [E500mc_FPR_Bypass, + E500mc_FPR_Bypass, E500mc_FPR_Bypass]>, + InstrItinData<IIC_FPDivS, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<38, [E500mc_FPU_0]>], + [41, 1, 1], // Latency = 38, Repeat rate = 38 + [E500mc_FPR_Bypass, + E500mc_FPR_Bypass, E500mc_FPR_Bypass]>, + InstrItinData<IIC_FPFused, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<4, [E500mc_FPU_0]>], + [13, 1, 1, 1], // Latency = 10, Repeat rate = 4 + [E500mc_FPR_Bypass, + E500mc_FPR_Bypass, E500mc_FPR_Bypass, + E500mc_FPR_Bypass]>, + InstrItinData<IIC_FPRes, [InstrStage<1, [E500mc_DIS0, E500mc_DIS1], 0>, + InstrStage<38, [E500mc_FPU_0]>], + [41, 1], // Latency = 38, Repeat rate = 38 + [E500mc_FPR_Bypass, E500mc_FPR_Bypass]> +]>; + +// ===---------------------------------------------------------------------===// +// e500mc machine model for scheduling and other instruction cost heuristics. + +def PPCE500mcModel : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let LoadLatency = 5; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + + let CompleteModel = 0; + + let Itineraries = PPCE500mcItineraries; +} diff --git a/capstone/suite/synctools/tablegen/PPC/PPCScheduleE5500.td b/capstone/suite/synctools/tablegen/PPC/PPCScheduleE5500.td new file mode 100644 index 000000000..32f8e652d --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCScheduleE5500.td @@ -0,0 +1,385 @@ +//===-- PPCScheduleE500mc.td - e5500 Scheduling Defs -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Freescale e5500 64-bit +// Power processor. +// +// All information is derived from the "e5500 Core Reference Manual", +// Freescale Document Number e5500RM, Rev. 1, 03/2012. +// +//===----------------------------------------------------------------------===// +// Relevant functional units in the Freescale e5500 core +// (These are the same as for the e500mc) +// +// * Decode & Dispatch +// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue +// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). +def E5500_DIS0 : FuncUnit; +def E5500_DIS1 : FuncUnit; + +// * Execute +// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. +// The CFX has a bypass path, allowing non-divide instructions to execute +// while a divide instruction is being executed. +def E5500_SFX0 : FuncUnit; // Simple unit 0 +def E5500_SFX1 : FuncUnit; // Simple unit 1 +def E5500_BU : FuncUnit; // Branch unit +def E5500_CFX_DivBypass + : FuncUnit; // CFX divide bypass path +def E5500_CFX_0 : FuncUnit; // CFX pipeline stage 0 + +def E5500_CFX_1 : FuncUnit; // CFX pipeline stage 1 + +def E5500_LSU_0 : FuncUnit; // LSU pipeline +def E5500_FPU_0 : FuncUnit; // FPU pipeline + +def E5500_GPR_Bypass : Bypass; +def E5500_FPR_Bypass : Bypass; +def E5500_CR_Bypass : Bypass; + +def PPCE5500Itineraries : ProcessorItineraries< + [E5500_DIS0, E5500_DIS1, E5500_SFX0, E5500_SFX1, E5500_BU, + E5500_CFX_DivBypass, E5500_CFX_0, E5500_CFX_1, + E5500_LSU_0, E5500_FPU_0], + [E5500_CR_Bypass, E5500_GPR_Bypass, E5500_FPR_Bypass], [ + InstrItinData<IIC_IntSimple, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5, 2, 2], // Latency = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntGeneral, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5, 2, 2], // Latency = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntISEL, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5, 2, 2, 2], // Latency = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass, + E5500_CR_Bypass]>, + InstrItinData<IIC_IntCompare, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [6, 2, 2], // Latency = 1 or 2 + [E5500_CR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntDivD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<26, [E5500_CFX_DivBypass]>], + [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntDivW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<16, [E5500_CFX_DivBypass]>], + [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntMFFS, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_FPU_0]>], + [11], // Latency = 7, Repeat rate = 1 + [E5500_FPR_Bypass]>, + InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<7, [E5500_FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 7 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData<IIC_IntMulHD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<2, [E5500_CFX_1]>], + [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntMulHW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<1, [E5500_CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntMulHWU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<1, [E5500_CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntMulLI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<2, [E5500_CFX_1]>], + [8, 2, 2], // Latency = 4 or 5, Repeat = 2 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntRotate, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5, 2, 2], // Latency = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntRotateD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<2, [E5500_SFX0, E5500_SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntRotateDI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5, 2, 2], // Latency = 1, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntShift, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<2, [E5500_SFX0, E5500_SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_IntTrapW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<2, [E5500_SFX0]>], + [6, 2], // Latency = 2, Repeat rate = 2 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_BrB, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_BU]>], + [5, 2], // Latency = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_BrCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_BU]>], + [5, 2, 2], // Latency = 1 + [E5500_CR_Bypass, + E5500_CR_Bypass, E5500_CR_Bypass]>, + InstrItinData<IIC_BrMCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_BU]>], + [5, 2], // Latency = 1 + [E5500_CR_Bypass, E5500_CR_Bypass]>, + InstrItinData<IIC_BrMCRX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0]>], + [5, 2, 2], // Latency = 1 + [E5500_CR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBA, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBF, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStDCBI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStLoad, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStLDARX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<3, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 3 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStLDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStStore, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStICBI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStSTFD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLFD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLFDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLHA, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStLHAU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStLMW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<4, [E5500_LSU_0]>], + [8, 2], // Latency = r+3, Repeat rate = r+3 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStLWARX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<3, [E5500_LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 3 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStSTD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStSTDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_LdStSync, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0]>]>, + InstrItinData<IIC_SprMTMSR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<2, [E5500_CFX_0]>], + [6, 2], // Latency = 2, Repeat rate = 4 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_LSU_0], 0>]>, + InstrItinData<IIC_SprMFCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<5, [E5500_CFX_0]>], + [9, 2], // Latency = 5, Repeat rate = 5 + [E5500_GPR_Bypass, E5500_CR_Bypass]>, + InstrItinData<IIC_SprMFCRF, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<5, [E5500_CFX_0]>], + [9, 2], // Latency = 5, Repeat rate = 5 + [E5500_GPR_Bypass, E5500_CR_Bypass]>, + InstrItinData<IIC_SprMFPMR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<4, [E5500_CFX_0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_SprMFSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0]>], + [5], // Latency = 1, Repeat rate = 1 + [E5500_GPR_Bypass]>, + InstrItinData<IIC_SprMTPMR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0]>], + [5], // Latency = 1, Repeat rate = 1 + [E5500_GPR_Bypass]>, + InstrItinData<IIC_SprMFTB, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<4, [E5500_CFX_0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData<IIC_SprMTSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_CFX_0]>], + [5], // Latency = 1, Repeat rate = 1 + [E5500_GPR_Bypass]>, + InstrItinData<IIC_FPGeneral, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData<IIC_FPAddSub, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData<IIC_FPCompare, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [E5500_CR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData<IIC_FPDivD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<31, [E5500_FPU_0]>], + [39, 2, 2], // Latency = 35, Repeat rate = 31 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData<IIC_FPDivS, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<16, [E5500_FPU_0]>], + [24, 2, 2], // Latency = 20, Repeat rate = 16 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData<IIC_FPFused, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<1, [E5500_FPU_0]>], + [11, 2, 2, 2], // Latency = 7, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass, + E5500_FPR_Bypass]>, + InstrItinData<IIC_FPRes, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>, + InstrStage<2, [E5500_FPU_0]>], + [12, 2], // Latency = 8, Repeat rate = 2 + [E5500_FPR_Bypass, E5500_FPR_Bypass]> +]>; + +// ===---------------------------------------------------------------------===// +// e5500 machine model for scheduling and other instruction cost heuristics. + +def PPCE5500Model : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let LoadLatency = 6; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + + let CompleteModel = 0; + + let Itineraries = PPCE5500Itineraries; +} diff --git a/capstone/suite/synctools/tablegen/PPC/PPCScheduleG3.td b/capstone/suite/synctools/tablegen/PPC/PPCScheduleG3.td new file mode 100644 index 000000000..21efd8f8f --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCScheduleG3.td @@ -0,0 +1,80 @@ +//===-- PPCScheduleG3.td - PPC G3 Scheduling Definitions ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the G3 (750) processor. +// +//===----------------------------------------------------------------------===// + +def G3_BPU : FuncUnit; // Branch unit +def G3_SLU : FuncUnit; // Store/load unit +def G3_SRU : FuncUnit; // special register unit +def G3_IU1 : FuncUnit; // integer unit 1 (simple) +def G3_IU2 : FuncUnit; // integer unit 2 (complex) +def G3_FPU1 : FuncUnit; // floating point unit 1 + +def G3Itineraries : ProcessorItineraries< + [G3_IU1, G3_IU2, G3_FPU1, G3_BPU, G3_SRU, G3_SLU], [], [ + InstrItinData<IIC_IntSimple , [InstrStage<1, [G3_IU1, G3_IU2]>]>, + InstrItinData<IIC_IntGeneral , [InstrStage<1, [G3_IU1, G3_IU2]>]>, + InstrItinData<IIC_IntCompare , [InstrStage<1, [G3_IU1, G3_IU2]>]>, + InstrItinData<IIC_IntDivW , [InstrStage<19, [G3_IU1]>]>, + InstrItinData<IIC_IntMFFS , [InstrStage<1, [G3_FPU1]>]>, + InstrItinData<IIC_IntMTFSB0 , [InstrStage<3, [G3_FPU1]>]>, + InstrItinData<IIC_IntMulHW , [InstrStage<5, [G3_IU1]>]>, + InstrItinData<IIC_IntMulHWU , [InstrStage<6, [G3_IU1]>]>, + InstrItinData<IIC_IntMulLI , [InstrStage<3, [G3_IU1]>]>, + InstrItinData<IIC_IntRotate , [InstrStage<1, [G3_IU1, G3_IU2]>]>, + InstrItinData<IIC_IntShift , [InstrStage<1, [G3_IU1, G3_IU2]>]>, + InstrItinData<IIC_IntTrapW , [InstrStage<2, [G3_IU1, G3_IU2]>]>, + InstrItinData<IIC_BrB , [InstrStage<1, [G3_BPU]>]>, + InstrItinData<IIC_BrCR , [InstrStage<1, [G3_SRU]>]>, + InstrItinData<IIC_BrMCR , [InstrStage<1, [G3_SRU]>]>, + InstrItinData<IIC_BrMCRX , [InstrStage<1, [G3_SRU]>]>, + InstrItinData<IIC_LdStDCBA , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G3_SLU]>]>, + InstrItinData<IIC_LdStDCBI , [InstrStage<3, [G3_SLU]>]>, + InstrItinData<IIC_LdStLoad , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStStore , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStICBI , [InstrStage<3, [G3_SLU]>]>, + InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStSTFDU , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLFD , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLFDU , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLFDUX , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLHA , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLHAU , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLHAUX , [InstrStage<2, [G3_SLU]>]>, + InstrItinData<IIC_LdStLMW , [InstrStage<34, [G3_SLU]>]>, + InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G3_SLU]>]>, + InstrItinData<IIC_LdStSTWCX , [InstrStage<8, [G3_SLU]>]>, + InstrItinData<IIC_LdStSync , [InstrStage<3, [G3_SLU]>]>, + InstrItinData<IIC_SprISYNC , [InstrStage<2, [G3_SRU]>]>, + InstrItinData<IIC_SprMFSR , [InstrStage<3, [G3_SRU]>]>, + InstrItinData<IIC_SprMTMSR , [InstrStage<1, [G3_SRU]>]>, + InstrItinData<IIC_SprMTSR , [InstrStage<2, [G3_SRU]>]>, + InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G3_SRU]>]>, + InstrItinData<IIC_SprMFCR , [InstrStage<1, [G3_SRU]>]>, + InstrItinData<IIC_SprMFMSR , [InstrStage<1, [G3_SRU]>]>, + InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G3_SRU]>]>, + InstrItinData<IIC_SprMFTB , [InstrStage<3, [G3_SRU]>]>, + InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G3_SRU]>]>, + InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G3_SRU]>]>, + InstrItinData<IIC_SprRFI , [InstrStage<2, [G3_SRU]>]>, + InstrItinData<IIC_SprSC , [InstrStage<2, [G3_SRU]>]>, + InstrItinData<IIC_FPGeneral , [InstrStage<1, [G3_FPU1]>]>, + InstrItinData<IIC_FPAddSub , [InstrStage<1, [G3_FPU1]>]>, + InstrItinData<IIC_FPCompare , [InstrStage<1, [G3_FPU1]>]>, + InstrItinData<IIC_FPDivD , [InstrStage<31, [G3_FPU1]>]>, + InstrItinData<IIC_FPDivS , [InstrStage<17, [G3_FPU1]>]>, + InstrItinData<IIC_FPFused , [InstrStage<2, [G3_FPU1]>]>, + InstrItinData<IIC_FPRes , [InstrStage<10, [G3_FPU1]>]> +]>; diff --git a/capstone/suite/synctools/tablegen/PPC/PPCScheduleG4.td b/capstone/suite/synctools/tablegen/PPC/PPCScheduleG4.td new file mode 100644 index 000000000..340773ef7 --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCScheduleG4.td @@ -0,0 +1,96 @@ +//===-- PPCScheduleG4.td - PPC G4 Scheduling Definitions ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the G4 (7400) processor. +// +//===----------------------------------------------------------------------===// + +def G4_BPU : FuncUnit; // Branch unit +def G4_SLU : FuncUnit; // Store/load unit +def G4_SRU : FuncUnit; // special register unit +def G4_IU1 : FuncUnit; // integer unit 1 (simple) +def G4_IU2 : FuncUnit; // integer unit 2 (complex) +def G4_FPU1 : FuncUnit; // floating point unit 1 +def G4_VPU : FuncUnit; // vector permutation unit +def G4_VIU1 : FuncUnit; // vector integer unit 1 (simple) +def G4_VIU2 : FuncUnit; // vector integer unit 2 (complex) +def G4_VFPU : FuncUnit; // vector floating point unit + +def G4Itineraries : ProcessorItineraries< + [G4_IU1, G4_IU2, G4_SLU, G4_SRU, G4_BPU, G4_FPU1, + G4_VIU1, G4_VIU2, G4_VPU, G4_VFPU], [], [ + InstrItinData<IIC_IntSimple , [InstrStage<1, [G4_IU1, G4_IU2]>]>, + InstrItinData<IIC_IntGeneral , [InstrStage<1, [G4_IU1, G4_IU2]>]>, + InstrItinData<IIC_IntCompare , [InstrStage<1, [G4_IU1, G4_IU2]>]>, + InstrItinData<IIC_IntDivW , [InstrStage<19, [G4_IU1]>]>, + InstrItinData<IIC_IntMFFS , [InstrStage<3, [G4_FPU1]>]>, + InstrItinData<IIC_IntMFVSCR , [InstrStage<1, [G4_VIU1]>]>, + InstrItinData<IIC_IntMTFSB0 , [InstrStage<3, [G4_FPU1]>]>, + InstrItinData<IIC_IntMulHW , [InstrStage<5, [G4_IU1]>]>, + InstrItinData<IIC_IntMulHWU , [InstrStage<6, [G4_IU1]>]>, + InstrItinData<IIC_IntMulLI , [InstrStage<3, [G4_IU1]>]>, + InstrItinData<IIC_IntRotate , [InstrStage<1, [G4_IU1, G4_IU2]>]>, + InstrItinData<IIC_IntShift , [InstrStage<1, [G4_IU1, G4_IU2]>]>, + InstrItinData<IIC_IntTrapW , [InstrStage<2, [G4_IU1, G4_IU2]>]>, + InstrItinData<IIC_BrB , [InstrStage<1, [G4_BPU]>]>, + InstrItinData<IIC_BrCR , [InstrStage<1, [G4_SRU]>]>, + InstrItinData<IIC_BrMCR , [InstrStage<1, [G4_SRU]>]>, + InstrItinData<IIC_BrMCRX , [InstrStage<1, [G4_SRU]>]>, + InstrItinData<IIC_LdStDCBF , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStDCBI , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLoad , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStStore , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStDSS , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStICBI , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStSTFDU , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLFD , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLFDU , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLFDUX , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLHA , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLHAU , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLHAUX , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLMW , [InstrStage<34, [G4_SLU]>]>, + InstrItinData<IIC_LdStLVecX , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G4_SLU]>]>, + InstrItinData<IIC_LdStSTVEBX , [InstrStage<2, [G4_SLU]>]>, + InstrItinData<IIC_LdStSTWCX , [InstrStage<5, [G4_SLU]>]>, + InstrItinData<IIC_LdStSync , [InstrStage<8, [G4_SLU]>]>, + InstrItinData<IIC_SprISYNC , [InstrStage<2, [G4_SRU]>]>, + InstrItinData<IIC_SprMFSR , [InstrStage<3, [G4_SRU]>]>, + InstrItinData<IIC_SprMTMSR , [InstrStage<1, [G4_SRU]>]>, + InstrItinData<IIC_SprMTSR , [InstrStage<2, [G4_SRU]>]>, + InstrItinData<IIC_SprTLBSYNC , [InstrStage<8, [G4_SRU]>]>, + InstrItinData<IIC_SprMFCR , [InstrStage<1, [G4_SRU]>]>, + InstrItinData<IIC_SprMFMSR , [InstrStage<1, [G4_SRU]>]>, + InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G4_SRU]>]>, + InstrItinData<IIC_SprMFTB , [InstrStage<1, [G4_SRU]>]>, + InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G4_SRU]>]>, + InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G4_SRU]>]>, + InstrItinData<IIC_SprRFI , [InstrStage<2, [G4_SRU]>]>, + InstrItinData<IIC_SprSC , [InstrStage<2, [G4_SRU]>]>, + InstrItinData<IIC_FPGeneral , [InstrStage<1, [G4_FPU1]>]>, + InstrItinData<IIC_FPAddSub , [InstrStage<1, [G4_FPU1]>]>, + InstrItinData<IIC_FPCompare , [InstrStage<1, [G4_FPU1]>]>, + InstrItinData<IIC_FPDivD , [InstrStage<31, [G4_FPU1]>]>, + InstrItinData<IIC_FPDivS , [InstrStage<17, [G4_FPU1]>]>, + InstrItinData<IIC_FPFused , [InstrStage<1, [G4_FPU1]>]>, + InstrItinData<IIC_FPRes , [InstrStage<10, [G4_FPU1]>]>, + InstrItinData<IIC_VecGeneral , [InstrStage<1, [G4_VIU1]>]>, + InstrItinData<IIC_VecFP , [InstrStage<4, [G4_VFPU]>]>, + InstrItinData<IIC_VecFPCompare, [InstrStage<1, [G4_VIU1]>]>, + InstrItinData<IIC_VecComplex , [InstrStage<3, [G4_VIU2]>]>, + InstrItinData<IIC_VecPerm , [InstrStage<1, [G4_VPU]>]>, + InstrItinData<IIC_VecFPRound , [InstrStage<4, [G4_VFPU]>]>, + InstrItinData<IIC_VecVSL , [InstrStage<1, [G4_VIU1]>]>, + InstrItinData<IIC_VecVSR , [InstrStage<1, [G4_VIU1]>]> +]>; diff --git a/capstone/suite/synctools/tablegen/PPC/PPCScheduleG4Plus.td b/capstone/suite/synctools/tablegen/PPC/PPCScheduleG4Plus.td new file mode 100644 index 000000000..1d9f13fcb --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCScheduleG4Plus.td @@ -0,0 +1,112 @@ +//===-- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the G4+ (7450) processor. +// +//===----------------------------------------------------------------------===// + +def G4P_BPU : FuncUnit; // Branch unit +def G4P_SLU : FuncUnit; // Store/load unit +def G4P_SRU : FuncUnit; // special register unit +def G4P_IU1 : FuncUnit; // integer unit 1 (simple) +def G4P_IU2 : FuncUnit; // integer unit 2 (complex) +def G4P_IU3 : FuncUnit; // integer unit 3 (simple) +def G4P_IU4 : FuncUnit; // integer unit 4 (simple) +def G4P_FPU1 : FuncUnit; // floating point unit 1 +def G4P_VPU : FuncUnit; // vector permutation unit +def G4P_VIU1 : FuncUnit; // vector integer unit 1 (simple) +def G4P_VIU2 : FuncUnit; // vector integer unit 2 (complex) +def G4P_VFPU : FuncUnit; // vector floating point unit + +def G4PlusItineraries : ProcessorItineraries< + [G4P_IU1, G4P_IU2, G4P_IU3, G4P_IU4, G4P_BPU, G4P_SLU, G4P_FPU1, + G4P_VFPU, G4P_VIU1, G4P_VIU2, G4P_VPU], [], [ + InstrItinData<IIC_IntSimple , [InstrStage<1, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_IntGeneral , [InstrStage<1, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_IntCompare , [InstrStage<1, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_IntDivW , [InstrStage<23, [G4P_IU2]>]>, + InstrItinData<IIC_IntMFFS , [InstrStage<5, [G4P_FPU1]>]>, + InstrItinData<IIC_IntMFVSCR , [InstrStage<2, [G4P_VFPU]>]>, + InstrItinData<IIC_IntMTFSB0 , [InstrStage<5, [G4P_FPU1]>]>, + InstrItinData<IIC_IntMulHW , [InstrStage<4, [G4P_IU2]>]>, + InstrItinData<IIC_IntMulHWU , [InstrStage<4, [G4P_IU2]>]>, + InstrItinData<IIC_IntMulLI , [InstrStage<3, [G4P_IU2]>]>, + InstrItinData<IIC_IntRotate , [InstrStage<1, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_IntShift , [InstrStage<2, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_IntTrapW , [InstrStage<2, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_BrB , [InstrStage<1, [G4P_BPU]>]>, + InstrItinData<IIC_BrCR , [InstrStage<2, [G4P_IU2]>]>, + InstrItinData<IIC_BrMCR , [InstrStage<2, [G4P_IU2]>]>, + InstrItinData<IIC_BrMCRX , [InstrStage<2, [G4P_IU2]>]>, + InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStDCBI , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLoad , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStStore , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStDSS , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStICBI , [InstrStage<3, [G4P_IU2]>]>, + InstrItinData<IIC_LdStSTFD , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSTFDU , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLFD , [InstrStage<4, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLFDU , [InstrStage<4, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLFDUX , [InstrStage<4, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLHA , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLHAU , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLHAUX , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLMW , [InstrStage<37, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLVecX , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLWA , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSTD , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSTDCX , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSTVEBX , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSTWCX , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_LdStSync , [InstrStage<35, [G4P_SLU]>]>, + InstrItinData<IIC_SprISYNC , [InstrStage<0, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_SprMFSR , [InstrStage<4, [G4P_IU2]>]>, + InstrItinData<IIC_SprMTMSR , [InstrStage<2, [G4P_IU2]>]>, + InstrItinData<IIC_SprMTSR , [InstrStage<2, [G4P_IU2]>]>, + InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G4P_SLU]>]>, + InstrItinData<IIC_SprMFCR , [InstrStage<2, [G4P_IU2]>]>, + InstrItinData<IIC_SprMFMSR , [InstrStage<3, [G4P_IU2]>]>, + InstrItinData<IIC_SprMFSPR , [InstrStage<4, [G4P_IU2]>]>, + InstrItinData<IIC_SprMFTB , [InstrStage<5, [G4P_IU2]>]>, + InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G4P_IU2]>]>, + InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G4P_IU2]>]>, + InstrItinData<IIC_SprRFI , [InstrStage<1, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_SprSC , [InstrStage<0, [G4P_IU1, G4P_IU2, + G4P_IU3, G4P_IU4]>]>, + InstrItinData<IIC_FPGeneral , [InstrStage<5, [G4P_FPU1]>]>, + InstrItinData<IIC_FPAddSub , [InstrStage<5, [G4P_FPU1]>]>, + InstrItinData<IIC_FPCompare , [InstrStage<5, [G4P_FPU1]>]>, + InstrItinData<IIC_FPDivD , [InstrStage<35, [G4P_FPU1]>]>, + InstrItinData<IIC_FPDivS , [InstrStage<21, [G4P_FPU1]>]>, + InstrItinData<IIC_FPFused , [InstrStage<5, [G4P_FPU1]>]>, + InstrItinData<IIC_FPRes , [InstrStage<14, [G4P_FPU1]>]>, + InstrItinData<IIC_VecGeneral , [InstrStage<1, [G4P_VIU1]>]>, + InstrItinData<IIC_VecFP , [InstrStage<4, [G4P_VFPU]>]>, + InstrItinData<IIC_VecFPCompare, [InstrStage<2, [G4P_VFPU]>]>, + InstrItinData<IIC_VecComplex , [InstrStage<4, [G4P_VIU2]>]>, + InstrItinData<IIC_VecPerm , [InstrStage<2, [G4P_VPU]>]>, + InstrItinData<IIC_VecFPRound , [InstrStage<4, [G4P_VIU1]>]>, + InstrItinData<IIC_VecVSL , [InstrStage<2, [G4P_VPU]>]>, + InstrItinData<IIC_VecVSR , [InstrStage<2, [G4P_VPU]>]> +]>; diff --git a/capstone/suite/synctools/tablegen/PPC/PPCScheduleG5.td b/capstone/suite/synctools/tablegen/PPC/PPCScheduleG5.td new file mode 100644 index 000000000..b5a9f96d4 --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCScheduleG5.td @@ -0,0 +1,130 @@ +//===-- PPCScheduleG5.td - PPC G5 Scheduling Definitions ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the G5 (970) processor. +// +//===----------------------------------------------------------------------===// + +def G5_BPU : FuncUnit; // Branch unit +def G5_SLU : FuncUnit; // Store/load unit +def G5_SRU : FuncUnit; // special register unit +def G5_IU1 : FuncUnit; // integer unit 1 (simple) +def G5_IU2 : FuncUnit; // integer unit 2 (complex) +def G5_FPU1 : FuncUnit; // floating point unit 1 +def G5_FPU2 : FuncUnit; // floating point unit 2 +def G5_VPU : FuncUnit; // vector permutation unit +def G5_VIU1 : FuncUnit; // vector integer unit 1 (simple) +def G5_VIU2 : FuncUnit; // vector integer unit 2 (complex) +def G5_VFPU : FuncUnit; // vector floating point unit + +def G5Itineraries : ProcessorItineraries< + [G5_IU1, G5_IU2, G5_SLU, G5_BPU, G5_FPU1, G5_FPU2, + G5_VFPU, G5_VIU1, G5_VIU2, G5_VPU], [], [ + InstrItinData<IIC_IntSimple , [InstrStage<2, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntGeneral , [InstrStage<2, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntCompare , [InstrStage<3, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntDivD , [InstrStage<68, [G5_IU1]>]>, + InstrItinData<IIC_IntDivW , [InstrStage<36, [G5_IU1]>]>, + InstrItinData<IIC_IntMFFS , [InstrStage<6, [G5_IU2]>]>, + InstrItinData<IIC_IntMFVSCR , [InstrStage<1, [G5_VFPU]>]>, + InstrItinData<IIC_IntMTFSB0 , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_IntMulHD , [InstrStage<7, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntMulHW , [InstrStage<5, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntMulHWU , [InstrStage<5, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntMulLI , [InstrStage<4, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntRFID , [InstrStage<1, [G5_IU2]>]>, + InstrItinData<IIC_IntRotateD , [InstrStage<2, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntRotateDI , [InstrStage<2, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntRotate , [InstrStage<4, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntShift , [InstrStage<2, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntTrapD , [InstrStage<1, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_IntTrapW , [InstrStage<1, [G5_IU1, G5_IU2]>]>, + InstrItinData<IIC_BrB , [InstrStage<1, [G5_BPU]>]>, + InstrItinData<IIC_BrCR , [InstrStage<4, [G5_BPU]>]>, + InstrItinData<IIC_BrMCR , [InstrStage<2, [G5_BPU]>]>, + InstrItinData<IIC_BrMCRX , [InstrStage<3, [G5_BPU]>]>, + InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLoad , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStStore , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStDSS , [InstrStage<10, [G5_SLU]>]>, + InstrItinData<IIC_LdStICBI , [InstrStage<40, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTFD , [InstrStage<4, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTFDU , [InstrStage<4, [G5_SLU]>]>, + InstrItinData<IIC_LdStLD , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLDU , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLDUX , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLDARX , [InstrStage<11, [G5_SLU]>]>, + InstrItinData<IIC_LdStLFD , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLFDU , [InstrStage<5, [G5_SLU]>]>, + InstrItinData<IIC_LdStLFDUX , [InstrStage<5, [G5_SLU]>]>, + InstrItinData<IIC_LdStLHA , [InstrStage<5, [G5_SLU]>]>, + InstrItinData<IIC_LdStLHAU , [InstrStage<5, [G5_SLU]>]>, + InstrItinData<IIC_LdStLHAUX , [InstrStage<5, [G5_SLU]>]>, + InstrItinData<IIC_LdStLMW , [InstrStage<64, [G5_SLU]>]>, + InstrItinData<IIC_LdStLVecX , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStLWA , [InstrStage<5, [G5_SLU]>]>, + InstrItinData<IIC_LdStLWARX , [InstrStage<11, [G5_SLU]>]>, + InstrItinData<IIC_LdStSLBIA , [InstrStage<40, [G5_SLU]>]>, // needs work + InstrItinData<IIC_LdStSLBIE , [InstrStage<2, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTD , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTDCX , [InstrStage<11, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTVEBX , [InstrStage<5, [G5_SLU]>]>, + InstrItinData<IIC_LdStSTWCX , [InstrStage<11, [G5_SLU]>]>, + InstrItinData<IIC_LdStSync , [InstrStage<35, [G5_SLU]>]>, + InstrItinData<IIC_SprISYNC , [InstrStage<40, [G5_SLU]>]>, // needs work + InstrItinData<IIC_SprMFSR , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_SprMTMSR , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_SprMTSR , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G5_SLU]>]>, + InstrItinData<IIC_SprMFCR , [InstrStage<2, [G5_IU2]>]>, + InstrItinData<IIC_SprMFCRF , [InstrStage<2, [G5_IU2]>]>, + InstrItinData<IIC_SprMFMSR , [InstrStage<3, [G5_IU2]>]>, + InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G5_IU2]>]>, + InstrItinData<IIC_SprMFTB , [InstrStage<10, [G5_IU2]>]>, + InstrItinData<IIC_SprMTSPR , [InstrStage<8, [G5_IU2]>]>, + InstrItinData<IIC_SprSC , [InstrStage<1, [G5_IU2]>]>, + InstrItinData<IIC_FPGeneral , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_FPAddSub , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_FPCompare , [InstrStage<8, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_FPDivD , [InstrStage<33, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_FPDivS , [InstrStage<33, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_FPFused , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_FPRes , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_FPSqrtD , [InstrStage<40, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_FPSqrtS , [InstrStage<40, [G5_FPU1, G5_FPU2]>]>, + InstrItinData<IIC_VecGeneral , [InstrStage<2, [G5_VIU1]>]>, + InstrItinData<IIC_VecFP , [InstrStage<8, [G5_VFPU]>]>, + InstrItinData<IIC_VecFPCompare, [InstrStage<2, [G5_VFPU]>]>, + InstrItinData<IIC_VecComplex , [InstrStage<5, [G5_VIU2]>]>, + InstrItinData<IIC_VecPerm , [InstrStage<3, [G5_VPU]>]>, + InstrItinData<IIC_VecFPRound , [InstrStage<8, [G5_VFPU]>]>, + InstrItinData<IIC_VecVSL , [InstrStage<2, [G5_VIU1]>]>, + InstrItinData<IIC_VecVSR , [InstrStage<3, [G5_VPU]>]> +]>; + +// ===---------------------------------------------------------------------===// +// G5 machine model for scheduling and other instruction cost heuristics. + +def G5Model : SchedMachineModel { + let IssueWidth = 4; // 4 (non-branch) instructions are dispatched per cycle. + let LoadLatency = 3; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 16; + + let CompleteModel = 0; + + let Itineraries = G5Itineraries; +} + diff --git a/capstone/suite/synctools/tablegen/PPC/PPCScheduleP7.td b/capstone/suite/synctools/tablegen/PPC/PPCScheduleP7.td new file mode 100644 index 000000000..a8678f569 --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCScheduleP7.td @@ -0,0 +1,397 @@ +//===-- PPCScheduleP7.td - PPC P7 Scheduling Definitions ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the POWER7 processor. +// +//===----------------------------------------------------------------------===// + +// Primary reference: +// IBM POWER7 multicore server processor +// B. Sinharoy, et al. +// IBM J. Res. & Dev. (55) 3. May/June 2011. + +// Scheduling for the P7 involves tracking two types of resources: +// 1. The dispatch bundle slots +// 2. The functional unit resources + +// Dispatch units: +def P7_DU1 : FuncUnit; +def P7_DU2 : FuncUnit; +def P7_DU3 : FuncUnit; +def P7_DU4 : FuncUnit; +def P7_DU5 : FuncUnit; +def P7_DU6 : FuncUnit; + +def P7_LS1 : FuncUnit; // Load/Store pipeline 1 +def P7_LS2 : FuncUnit; // Load/Store pipeline 2 + +def P7_FX1 : FuncUnit; // FX pipeline 1 +def P7_FX2 : FuncUnit; // FX pipeline 2 + +// VS pipeline 1 (vector integer ops. always here) +def P7_VS1 : FuncUnit; // VS pipeline 1 +// VS pipeline 2 (128-bit stores and perms. here) +def P7_VS2 : FuncUnit; // VS pipeline 2 + +def P7_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs) +def P7_BRU : FuncUnit; // BR unit + +// Notes: +// Each LSU pipeline can also execute FX add and logical instructions. +// Each LSU pipeline can complete a load or store in one cycle. +// +// Each store is broken into two parts, AGEN goes to the LSU while a +// "data steering" op. goes to the FXU or VSU. +// +// FX loads have a two cycle load-to-use latency (so one "bubble" cycle). +// VSU loads have a three cycle load-to-use latency (so two "bubble" cycle). +// +// Frequent FX ops. take only one cycle and results can be used again in the +// next cycle (there is a self-bypass). Getting results from the other FX +// pipeline takes an additional cycle. +// +// The VSU XS is similar to the POWER6, but with a pipeline length of 2 cycles +// (instead of 3 cycles on the POWER6). VSU XS handles vector FX-style ops. +// Dispatch of an instruction to VS1 that uses four single prec. inputs +// (either to a float or XC op). prevents dispatch in that cycle to VS2 of any +// floating point instruction. +// +// The VSU PM is similar to the POWER6, but with a pipeline length of 3 cycles +// (instead of 4 cycles on the POWER6). vsel is handled by the PM pipeline +// (unlike on the POWER6). +// +// FMA from the VSUs can forward results in 6 cycles. VS1 XS and vector FP +// share the same write-back, and have a 5-cycle latency difference, so the +// IFU/IDU will not dispatch an XS instructon 5 cycles after a vector FP +// op. has been dispatched to VS1. +// +// Three cycles after an L1 cache hit, a dependent VSU instruction can issue. +// +// Instruction dispatch groups have (at most) four non-branch instructions, and +// two branches. Unlike on the POWER4/5, a branch does not automatically +// end the dispatch group, but a second branch must be the last in the group. + +def P7Itineraries : ProcessorItineraries< + [P7_DU1, P7_DU2, P7_DU3, P7_DU4, P7_DU5, P7_DU6, + P7_LS1, P7_LS2, P7_FX1, P7_FX2, P7_VS1, P7_VS2, P7_CRU, P7_BRU], [], [ + InstrItinData<IIC_IntSimple , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2, + P7_LS1, P7_LS2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntGeneral , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntISEL, [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_FX1, P7_FX2], 0>, + InstrStage<1, [P7_BRU]>], + [1, 1, 1, 1]>, + InstrItinData<IIC_IntCompare , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + // FIXME: Add record-form itinerary data. + InstrItinData<IIC_IntDivW , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<36, [P7_FX1, P7_FX2]>], + [36, 1, 1]>, + InstrItinData<IIC_IntDivD , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<68, [P7_FX1, P7_FX2]>], + [68, 1, 1]>, + InstrItinData<IIC_IntMulHW , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 1, 1]>, + InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 1, 1]>, + InstrItinData<IIC_IntMulLI , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 1, 1]>, + InstrItinData<IIC_IntRotate , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntRotateD , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntShift , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntTrapW , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1]>, + InstrItinData<IIC_IntTrapD , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1]>, + InstrItinData<IIC_BrB , [InstrStage<1, [P7_DU5, P7_DU6], 0>, + InstrStage<1, [P7_BRU]>], + [3, 1, 1]>, + InstrItinData<IIC_BrCR , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_CRU]>], + [3, 1, 1]>, + InstrItinData<IIC_BrMCR , [InstrStage<1, [P7_DU5, P7_DU6], 0>, + InstrStage<1, [P7_BRU]>], + [3, 1, 1]>, + InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU5, P7_DU6], 0>, + InstrStage<1, [P7_BRU]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLoad , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [2, 1, 1]>, + InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [2, 2, 1, 1]>, + InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 3, 1, 1]>, + InstrItinData<IIC_LdStLD , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [2, 1, 1]>, + InstrItinData<IIC_LdStLDU , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [2, 2, 1, 1]>, + InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 3, 1, 1]>, + InstrItinData<IIC_LdStLFD , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 3, 1, 1]>, + InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 3, 1, 1]>, + InstrItinData<IIC_LdStLHA , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 4, 1, 1]>, + InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 4, 1, 1]>, + InstrItinData<IIC_LdStLWA , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLMW , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [2, 1, 1]>, + InstrItinData<IIC_LdStStore , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTD , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [2, 1, 1, 1]>, + InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [2, 1, 1, 1]>, + InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [2, 1, 1, 1]>, + InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_VS2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [1, 1, 1]>, + InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_CRU]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 1]>, // mtcr + InstrItinData<IIC_SprMFCR , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_CRU]>], + [6, 1]>, + InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_CRU]>], + [3, 1]>, + InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_FX1]>], + [4, 1]>, // mtctr + InstrItinData<IIC_FPGeneral , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [5, 1, 1]>, + InstrItinData<IIC_FPAddSub , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [5, 1, 1]>, + InstrItinData<IIC_FPCompare , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [8, 1, 1]>, + InstrItinData<IIC_FPDivD , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [33, 1, 1]>, + InstrItinData<IIC_FPDivS , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [27, 1, 1]>, + InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [44, 1, 1]>, + InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [32, 1, 1]>, + InstrItinData<IIC_FPFused , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [5, 1, 1, 1]>, + InstrItinData<IIC_FPRes , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [5, 1, 1]>, + InstrItinData<IIC_VecGeneral , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_VS1]>], + [2, 1, 1]>, + InstrItinData<IIC_VecVSL , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_VS1]>], + [2, 1, 1]>, + InstrItinData<IIC_VecVSR , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_VS1]>], + [2, 1, 1]>, + InstrItinData<IIC_VecFP , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [6, 1, 1]>, + InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [6, 1, 1]>, + InstrItinData<IIC_VecFPRound , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [6, 1, 1]>, + InstrItinData<IIC_VecComplex , [InstrStage<1, [P7_DU1], 0>, + InstrStage<1, [P7_VS1]>], + [7, 1, 1]>, + InstrItinData<IIC_VecPerm , [InstrStage<1, [P7_DU1, P7_DU2], 0>, + InstrStage<1, [P7_VS2]>], + [3, 1, 1]> +]>; + +// ===---------------------------------------------------------------------===// +// P7 machine model for scheduling and other instruction cost heuristics. + +def P7Model : SchedMachineModel { + let IssueWidth = 6; // 4 (non-branch) instructions are dispatched per cycle. + // Note that the dispatch bundle size is 6 (including + // branches), but the total internal issue bandwidth per + // cycle (from all queues) is 8. + + let LoadLatency = 3; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 16; + + // Try to make sure we have at least 10 dispatch groups in a loop. + let LoopMicroOpBufferSize = 40; + + let CompleteModel = 0; + + let Itineraries = P7Itineraries; +} + diff --git a/capstone/suite/synctools/tablegen/PPC/PPCScheduleP8.td b/capstone/suite/synctools/tablegen/PPC/PPCScheduleP8.td new file mode 100644 index 000000000..79963dd6a --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCScheduleP8.td @@ -0,0 +1,406 @@ +//===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the POWER8 processor. +// +//===----------------------------------------------------------------------===// + +// Scheduling for the P8 involves tracking two types of resources: +// 1. The dispatch bundle slots +// 2. The functional unit resources + +// Dispatch units: +def P8_DU1 : FuncUnit; +def P8_DU2 : FuncUnit; +def P8_DU3 : FuncUnit; +def P8_DU4 : FuncUnit; +def P8_DU5 : FuncUnit; +def P8_DU6 : FuncUnit; +def P8_DU7 : FuncUnit; // Only branch instructions will use DU7,DU8 +def P8_DU8 : FuncUnit; + +// 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU). + +def P8_LU1 : FuncUnit; // Loads or fixed-point operations 1 +def P8_LU2 : FuncUnit; // Loads or fixed-point operations 2 + +// Load/Store pipelines can handle Stores, fixed-point loads, and simple +// fixed-point operations. +def P8_LSU1 : FuncUnit; // Load/Store pipeline 1 +def P8_LSU2 : FuncUnit; // Load/Store pipeline 2 + +// Fixed Point unit +def P8_FXU1 : FuncUnit; // FX pipeline 1 +def P8_FXU2 : FuncUnit; // FX pipeline 2 + +// The Floating-Point Unit (FPU) and Vector Media Extension (VMX) units +// are combined on P7 and newer into a Vector Scalar Unit (VSU). +// The P8 Instruction latency documents still refers to the unit as the +// FPU, so keep in mind that FPU==VSU. +// In contrast to the P7, the VMX units on P8 are symmetric, so no need to +// split vector integer ops or 128-bit load/store/perms to the specific units. +def P8_FPU1 : FuncUnit; // VS pipeline 1 +def P8_FPU2 : FuncUnit; // VS pipeline 2 + +def P8_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs) +def P8_BRU : FuncUnit; // BR unit + +def P8Itineraries : ProcessorItineraries< + [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6, P8_DU7, P8_DU8, + P8_LU1, P8_LU2, P8_LSU1, P8_LSU2, P8_FXU1, P8_FXU2, + P8_FPU1, P8_FPU2, P8_CRU, P8_BRU], [], [ + InstrItinData<IIC_IntSimple , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2, + P8_LU1, P8_LU2, + P8_LSU1, P8_LSU2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2, P8_LU1, + P8_LU2, P8_LSU1, P8_LSU2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntISEL, [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2], 0>, + InstrStage<1, [P8_BRU]>], + [1, 1, 1, 1]>, + InstrItinData<IIC_IntCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntDivW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<15, [P8_FXU1, P8_FXU2]>], + [15, 1, 1]>, + InstrItinData<IIC_IntDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<23, [P8_FXU1, P8_FXU2]>], + [23, 1, 1]>, + InstrItinData<IIC_IntMulHW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [4, 1, 1]>, + InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [4, 1, 1]>, + InstrItinData<IIC_IntMulLI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [4, 1, 1]>, + InstrItinData<IIC_IntRotate , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntRotateD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntShift , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntTrapW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [1, 1]>, + InstrItinData<IIC_IntTrapD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [1, 1]>, + InstrItinData<IIC_BrB , [InstrStage<1, [P8_DU7, P8_DU8], 0>, + InstrStage<1, [P8_BRU]>], + [3, 1, 1]>, + // FIXME - the Br* groups below are not branch related, so should probably + // be renamed. + // IIC_BrCR consists of the cr* instructions. (crand,crnor,creqv, etc). + // and should be 'First' in dispatch. + InstrItinData<IIC_BrCR , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_CRU]>], + [3, 1, 1]>, + // IIC_BrMCR consists of the mcrf instruction. + InstrItinData<IIC_BrMCR , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_CRU]>], + [3, 1, 1]>, + // IIC_BrMCRX consists of mcrxr (obsolete instruction) and mtcrf, which + // should be first in the dispatch group. + InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [3, 1, 1]>, + InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [3, 1]>, + InstrItinData<IIC_LdStLoad , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2, + P8_LU1, P8_LU2]>], + [2, 1, 1]>, + InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2, + P8_LU1, P8_LU2 ], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [2, 2, 1, 1]>, + // Update-Indexed form loads/stores are no longer first and last in the + // dispatch group. They are simply cracked, so require DU1,DU2. + InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2, + P8_LU1, P8_LU2], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [3, 3, 1, 1]>, + InstrItinData<IIC_LdStLD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2, + P8_LU1, P8_LU2]>], + [2, 1, 1]>, + InstrItinData<IIC_LdStLDU , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2, + P8_LU1, P8_LU2], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [2, 2, 1, 1]>, + InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2, + P8_LU1, P8_LU2], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [3, 3, 1, 1]>, + InstrItinData<IIC_LdStLFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_LU1, P8_LU2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_LU1, P8_LU2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LU1, P8_LU2], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [3, 3, 1, 1]>, + InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LU1, P8_LU2], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [3, 3, 1, 1]>, + InstrItinData<IIC_LdStLHA , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2, + P8_LU1, P8_LU2], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2, + P8_LU1, P8_LU2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2, + P8_LU1, P8_LU2], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [4, 4, 1, 1]>, + // first+last in dispatch group. + InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_DU5], 0>, + InstrStage<1, [P8_DU6], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2, + P8_LU1, P8_LU2], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [4, 4, 1, 1]>, + InstrItinData<IIC_LdStLWA , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2, + P8_LU1, P8_LU2]>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2, + P8_LU1, P8_LU2]>], + [3, 1, 1]>, + // first+last + InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_DU5], 0>, + InstrStage<1, [P8_DU6], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2, + P8_LU1, P8_LU2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLMW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2, + P8_LU1, P8_LU2]>], + [2, 1, 1]>, +// Stores are dual-issued from the issue queue, so may only take up one +// dispatch slot. The instruction will be broken into two IOPS. The agen +// op is issued to the LSU, and the data op (register fetch) is issued +// to either the LU (GPR store) or the VSU (FPR store). + InstrItinData<IIC_LdStStore , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2]>, + InstrStage<1, [P8_LU1, P8_LU2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_LU1, P8_LU2, + P8_LSU1, P8_LSU2]>] + [1, 1, 1]>, + InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LU1, P8_LU2, + P8_LSU1, P8_LSU2], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [2, 1, 1, 1]>, + // First+last + InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_DU5], 0>, + InstrStage<1, [P8_DU6], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [2, 1, 1, 1]>, + InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [2, 1, 1, 1]>, + InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_DU5], 0>, + InstrStage<1, [P8_DU6], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2], 0>, + InstrStage<1, [P8_LU1, P8_LU2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_DU5], 0>, + InstrStage<1, [P8_DU6], 0>, + InstrStage<1, [P8_LSU1, P8_LSU2], 0>, + InstrStage<1, [P8_LU1, P8_LU2]>], + [1, 1, 1]>, + InstrItinData<IIC_SprMFCR , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_CRU]>], + [6, 1]>, + InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_CRU]>], + [3, 1]>, + InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_FXU1, P8_FXU2]>], + [4, 1]>, // mtctr + InstrItinData<IIC_FPGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [5, 1, 1]>, + InstrItinData<IIC_FPAddSub , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [5, 1, 1]>, + InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [8, 1, 1]>, + InstrItinData<IIC_FPDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [33, 1, 1]>, + InstrItinData<IIC_FPDivS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [27, 1, 1]>, + InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [44, 1, 1]>, + InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [32, 1, 1]>, + InstrItinData<IIC_FPFused , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [5, 1, 1, 1]>, + InstrItinData<IIC_FPRes , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [5, 1, 1]>, + InstrItinData<IIC_VecGeneral , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [2, 1, 1]>, + InstrItinData<IIC_VecVSL , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [2, 1, 1]>, + InstrItinData<IIC_VecVSR , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [2, 1, 1]>, + InstrItinData<IIC_VecFP , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [6, 1, 1]>, + InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [6, 1, 1]>, + InstrItinData<IIC_VecFPRound , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [6, 1, 1]>, + InstrItinData<IIC_VecComplex , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [7, 1, 1]>, + InstrItinData<IIC_VecPerm , [InstrStage<1, [P8_DU1, P8_DU2], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [3, 1, 1]> +]>; + +// ===---------------------------------------------------------------------===// +// P8 machine model for scheduling and other instruction cost heuristics. +// P8 has an 8 insn dispatch group (6 non-branch, 2 branch) and can issue up +// to 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU). + +def P8Model : SchedMachineModel { + let IssueWidth = 8; // up to 8 instructions dispatched per cycle. + // up to six non-branch instructions. + // up to two branches in a dispatch group. + + let LoadLatency = 3; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 16; + + // Try to make sure we have at least 10 dispatch groups in a loop. + let LoopMicroOpBufferSize = 60; + + let CompleteModel = 0; + + let Itineraries = P8Itineraries; +} + diff --git a/capstone/suite/synctools/tablegen/PPC/PPCScheduleP9.td b/capstone/suite/synctools/tablegen/PPC/PPCScheduleP9.td new file mode 100644 index 000000000..e1a480117 --- /dev/null +++ b/capstone/suite/synctools/tablegen/PPC/PPCScheduleP9.td @@ -0,0 +1,400 @@ +//===-- PPCScheduleP9.td - PPC P9 Scheduling Definitions ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the POWER9 processor. +// +//===----------------------------------------------------------------------===// +include "PPCInstrInfo.td" + +def P9Model : SchedMachineModel { + // The maximum number of instructions to be issued at the same time. + // While a value of 8 is technically correct since 8 instructions can be + // fetched from the instruction cache. However, only 6 instructions may be + // actually dispatched at a time. + let IssueWidth = 8; + + // Load latency is 4 or 5 cycles depending on the load. This latency assumes + // that we have a cache hit. For a cache miss the load latency will be more. + // There are two instructions (lxvl, lxvll) that have a latencty of 6 cycles. + // However it is not worth bumping this value up to 6 when the vast majority + // of instructions are 4 or 5 cycles. + let LoadLatency = 5; + + // A total of 16 cycles to recover from a branch mispredict. + let MispredictPenalty = 16; + + // Try to make sure we have at least 10 dispatch groups in a loop. + // A dispatch group is 6 instructions. + let LoopMicroOpBufferSize = 60; + + let CompleteModel = 1; + + // Do not support QPX (Quad Processing eXtension) or SPE (Signal Procesing + // Engine) on Power 9. + let UnsupportedFeatures = [HasQPX, HasSPE]; + +} + +let SchedModel = P9Model in { + + // ***************** Processor Resources ***************** + + //Dispatcher: + def DISPATCHER : ProcResource<12>; + + // Issue Ports + // An instruction can go down one of two issue queues. + // Address Generation (AGEN) mainly for loads and stores. + // Execution (EXEC) for most other instructions. + // Some instructions cannot be run on just any issue queue and may require an + // Even or an Odd queue. The EXECE represents the even queues and the EXECO + // represents the odd queues. + def IP_AGEN : ProcResource<4>; + def IP_EXEC : ProcResource<4>; + def IP_EXECE : ProcResource<2> { + //Even Exec Ports + let Super = IP_EXEC; + } + def IP_EXECO : ProcResource<2> { + //Odd Exec Ports + let Super = IP_EXEC; + } + + // Pipeline Groups + // Four ALU (Fixed Point Arithmetic) units in total. Two even, two Odd. + def ALU : ProcResource<4>; + def ALUE : ProcResource<2> { + //Even ALU pipelines + let Super = ALU; + } + def ALUO : ProcResource<2> { + //Odd ALU pipelines + let Super = ALU; + } + + // Two DIV (Fixed Point Divide) units. + def DIV : ProcResource<2>; + + // Four DP (Floating Point) units in total. Two even, two Odd. + def DP : ProcResource<4>; + def DPE : ProcResource<2> { + //Even DP pipelines + let Super = DP; + } + def DPO : ProcResource<2> { + //Odd DP pipelines + let Super = DP; + } + + // Four LS (Load or Store) units. + def LS : ProcResource<4>; + + // Two PM (Permute) units. + def PM : ProcResource<2>; + + // Only one DFU (Decimal Floating Point and Quad Precision) unit. + def DFU : ProcResource<1>; + + // Only one Branch unit. + def BR : ProcResource<1> { + let BufferSize = 16; + } + + // Only one CY (Crypto) unit. + def CY : ProcResource<1>; + + // ***************** SchedWriteRes Definitions ***************** + + //Dispatcher + def DISP_1C : SchedWriteRes<[DISPATCHER]> { + let NumMicroOps = 0; + let Latency = 1; + } + + // Issue Ports + def IP_AGEN_1C : SchedWriteRes<[IP_AGEN]> { + let NumMicroOps = 0; + let Latency = 1; + } + + def IP_EXEC_1C : SchedWriteRes<[IP_EXEC]> { + let NumMicroOps = 0; + let Latency = 1; + } + + def IP_EXECE_1C : SchedWriteRes<[IP_EXECE]> { + let NumMicroOps = 0; + let Latency = 1; + } + + def IP_EXECO_1C : SchedWriteRes<[IP_EXECO]> { + let NumMicroOps = 0; + let Latency = 1; + } + + //Pipeline Groups + + // ALU Units + // An ALU may take either 2 or 3 cycles to complete the operation. + // However, the ALU unit is only every busy for 1 cycle at a time and may + // receive new instructions each cycle. + def P9_ALU_2C : SchedWriteRes<[ALU]> { + let Latency = 2; + } + + def P9_ALUE_2C : SchedWriteRes<[ALUE]> { + let Latency = 2; + } + + def P9_ALUO_2C : SchedWriteRes<[ALUO]> { + let Latency = 2; + } + + def P9_ALU_3C : SchedWriteRes<[ALU]> { + let Latency = 3; + } + + def P9_ALUE_3C : SchedWriteRes<[ALUE]> { + let Latency = 3; + } + + def P9_ALUO_3C : SchedWriteRes<[ALUO]> { + let Latency = 3; + } + + // DIV Unit + // A DIV unit may take from 5 to 40 cycles to complete. + // Some DIV operations may keep the unit busy for up to 8 cycles. + def P9_DIV_5C : SchedWriteRes<[DIV]> { + let Latency = 5; + } + + def P9_DIV_12C : SchedWriteRes<[DIV]> { + let Latency = 12; + } + + def P9_DIV_16C_8 : SchedWriteRes<[DIV]> { + let ResourceCycles = [8]; + let Latency = 16; + } + + def P9_DIV_24C_8 : SchedWriteRes<[DIV]> { + let ResourceCycles = [8]; + let Latency = 24; + } + + def P9_DIV_40C_8 : SchedWriteRes<[DIV]> { + let ResourceCycles = [8]; + let Latency = 40; + } + + // DP Unit + // A DP unit may take from 2 to 36 cycles to complete. + // Some DP operations keep the unit busy for up to 10 cycles. + def P9_DP_2C : SchedWriteRes<[DP]> { + let Latency = 2; + } + + def P9_DP_5C : SchedWriteRes<[DP]> { + let Latency = 5; + } + + def P9_DP_7C : SchedWriteRes<[DP]> { + let Latency = 7; + } + + def P9_DPE_7C : SchedWriteRes<[DPE]> { + let Latency = 7; + } + + def P9_DPO_7C : SchedWriteRes<[DPO]> { + let Latency = 7; + } + + def P9_DP_22C_5 : SchedWriteRes<[DP]> { + let ResourceCycles = [5]; + let Latency = 22; + } + + def P9_DP_24C_8 : SchedWriteRes<[DP]> { + let ResourceCycles = [8]; + let Latency = 24; + } + + def P9_DPO_24C_8 : SchedWriteRes<[DPO]> { + let ResourceCycles = [8]; + let Latency = 24; + } + + def P9_DPE_24C_8 : SchedWriteRes<[DPE]> { + let ResourceCycles = [8]; + let Latency = 24; + } + + def P9_DP_26C_5 : SchedWriteRes<[DP]> { + let ResourceCycles = [5]; + let Latency = 22; + } + + def P9_DP_27C_7 : SchedWriteRes<[DP]> { + let ResourceCycles = [7]; + let Latency = 27; + } + + def P9_DPE_27C_10 : SchedWriteRes<[DP]> { + let ResourceCycles = [10]; + let Latency = 27; + } + + def P9_DPO_27C_10 : SchedWriteRes<[DP]> { + let ResourceCycles = [10]; + let Latency = 27; + } + + def P9_DP_33C_8 : SchedWriteRes<[DP]> { + let ResourceCycles = [8]; + let Latency = 33; + } + + def P9_DPE_33C_8 : SchedWriteRes<[DPE]> { + let ResourceCycles = [8]; + let Latency = 33; + } + + def P9_DPO_33C_8 : SchedWriteRes<[DPO]> { + let ResourceCycles = [8]; + let Latency = 33; + } + + def P9_DP_36C_10 : SchedWriteRes<[DP]> { + let ResourceCycles = [10]; + let Latency = 36; + } + + def P9_DPE_36C_10 : SchedWriteRes<[DP]> { + let ResourceCycles = [10]; + let Latency = 36; + } + + def P9_DPO_36C_10 : SchedWriteRes<[DP]> { + let ResourceCycles = [10]; + let Latency = 36; + } + + // PM Unit + // Three cycle permute operations. + def P9_PM_3C : SchedWriteRes<[PM]> { + let Latency = 3; + } + + // Load and Store Units + // Loads can have 4, 5 or 6 cycles of latency. + // Stores are listed as having a single cycle of latency. This is not + // completely accurate since it takes more than 1 cycle to actually store + // the value. However, since the store does not produce a result it can be + // considered complete after one cycle. + def P9_LS_1C : SchedWriteRes<[LS]> { + let Latency = 1; + } + + def P9_LS_4C : SchedWriteRes<[LS]> { + let Latency = 4; + } + + def P9_LS_5C : SchedWriteRes<[LS]> { + let Latency = 5; + } + + def P9_LS_6C : SchedWriteRes<[LS]> { + let Latency = 6; + } + + // DFU Unit + // Some of the most expensive ops use the DFU. + // Can take from 12 cycles to 76 cycles to obtain a result. + // The unit may be busy for up to 62 cycles. + def P9_DFU_12C : SchedWriteRes<[DFU]> { + let Latency = 12; + } + + def P9_DFU_23C : SchedWriteRes<[DFU]> { + let Latency = 23; + let ResourceCycles = [11]; + } + + def P9_DFU_24C : SchedWriteRes<[DFU]> { + let Latency = 24; + let ResourceCycles = [12]; + } + + def P9_DFU_37C : SchedWriteRes<[DFU]> { + let Latency = 37; + let ResourceCycles = [25]; + } + + def P9_DFU_58C : SchedWriteRes<[DFU]> { + let Latency = 58; + let ResourceCycles = [44]; + } + + def P9_DFU_76C : SchedWriteRes<[DFU]> { + let Latency = 76; + let ResourceCycles = [62]; + } + + // 2 or 5 cycle latencies for the branch unit. + def P9_BR_2C : SchedWriteRes<[BR]> { + let Latency = 2; + } + + def P9_BR_5C : SchedWriteRes<[BR]> { + let Latency = 5; + } + + // 6 cycle latency for the crypto unit + def P9_CY_6C : SchedWriteRes<[CY]> { + let Latency = 6; + } + + // ***************** WriteSeq Definitions ***************** + + // These are combinations of the resources listed above. + // The idea is that some cracked instructions cannot be done in parallel and + // so the latencies for their resources must be added. + def P9_LoadAndALUOp_6C : WriteSequence<[P9_LS_4C, P9_ALU_2C]>; + def P9_LoadAndALUOp_7C : WriteSequence<[P9_LS_5C, P9_ALU_2C]>; + def P9_LoadAndALU2Op_7C : WriteSequence<[P9_LS_4C, P9_ALU_3C]>; + def P9_LoadAndALU2Op_8C : WriteSequence<[P9_LS_5C, P9_ALU_3C]>; + def P9_LoadAndPMOp_8C : WriteSequence<[P9_LS_5C, P9_PM_3C]>; + def P9_LoadAndLoadOp_8C : WriteSequence<[P9_LS_4C, P9_LS_4C]>; + def P9_IntDivAndALUOp_18C_8 : WriteSequence<[P9_DIV_16C_8, P9_ALU_2C]>; + def P9_IntDivAndALUOp_26C_8 : WriteSequence<[P9_DIV_24C_8, P9_ALU_2C]>; + def P9_IntDivAndALUOp_42C_8 : WriteSequence<[P9_DIV_40C_8, P9_ALU_2C]>; + def P9_StoreAndALUOp_3C : WriteSequence<[P9_LS_1C, P9_ALU_2C]>; + def P9_StoreAndALUOp_4C : WriteSequence<[P9_LS_1C, P9_ALU_3C]>; + def P9_ALUOpAndALUOp_4C : WriteSequence<[P9_ALU_2C, P9_ALU_2C]>; + def P9_ALU2OpAndALU2Op_6C : WriteSequence<[P9_ALU_3C, P9_ALU_3C]>; + def P9_ALUOpAndALUOpAndALUOp_6C : + WriteSequence<[P9_ALU_2C, P9_ALU_2C, P9_ALU_2C]>; + def P9_DPOpAndALUOp_7C : WriteSequence<[P9_DP_5C, P9_ALU_2C]>; + def P9_DPOpAndALUOp_9C : WriteSequence<[P9_DP_7C, P9_ALU_2C]>; + def P9_DPOpAndALU2Op_10C : WriteSequence<[P9_DP_7C, P9_ALU_3C]>; + def P9_DPOpAndALUOp_24C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_2C]>; + def P9_DPOpAndALUOp_35C_8 : WriteSequence<[P9_DP_33C_8, P9_ALU_2C]>; + def P9_DPOpAndALU2Op_25C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_3C]>; + def P9_DPOpAndALU2Op_29C_5 : WriteSequence<[P9_DP_26C_5, P9_ALU_3C]>; + def P9_DPOpAndALU2Op_36C_8 : WriteSequence<[P9_DP_33C_8, P9_ALU_3C]>; + def P9_DPOpAndALU2Op_39C_10 : WriteSequence<[P9_DP_36C_10, P9_ALU_3C]>; + def P9_BROpAndALUOp_7C : WriteSequence<[P9_BR_5C, P9_ALU_2C]>; + + // Include the resource requirements of individual instructions. + include "P9InstrResources.td" + +} + |