Name |
Date |
Size |
#Lines |
LOC |
||
---|---|---|---|---|---|---|
.. | - | - | ||||
AsmParser/ | H | - | - | 1,944 | 1,627 | |
Disassembler/ | H | - | - | 407 | 305 | |
GISel/ | H | - | - | 1,621 | 1,138 | |
MCTargetDesc/ | H | - | - | 4,075 | 3,015 | |
TargetInfo/ | H | - | - | 67 | 39 | |
P10InstrResources.td | H A D | 28-Jul-2024 | 44.3 KiB | 2,057 | 1,923 | |
P9InstrResources.td | H A D | 02-Sep-2023 | 37.6 KiB | 1,440 | 1,353 | |
PPC.h | H A D | 28-Jul-2024 | 7.7 KiB | 210 | 106 | |
PPC.td | H A D | 30-Jul-2024 | 38.6 KiB | 750 | 702 | |
PPCAsmPrinter.cpp | H A D | 28-Jul-2024 | 128.5 KiB | 3,340 | 2,400 | |
PPCBack2BackFusion.def | H A D | 28-Jul-2024 | 13.7 KiB | 1,054 | 1,053 | |
PPCBoolRetToInt.cpp | H A D | 28-Jul-2024 | 10 KiB | 291 | 194 | |
PPCBranchCoalescing.cpp | H A D | 28-Jul-2024 | 30.2 KiB | 791 | 397 | |
PPCBranchSelector.cpp | H A D | 02-Dec-2021 | 15.8 KiB | 414 | 228 | |
PPCCCState.cpp | H A D | 13-Jun-2021 | 1.1 KiB | 36 | 23 | |
PPCCCState.h | H A D | 22-Aug-2021 | 2.2 KiB | 74 | 46 | |
PPCCTRLoops.cpp | H A D | 28-Jul-2024 | 12.2 KiB | 362 | 238 | |
PPCCTRLoopsVerify.cpp | H A D | 28-Jul-2024 | 5.6 KiB | 184 | 136 | |
PPCCallingConv.cpp | H A D | 02-Sep-2023 | 7.9 KiB | 199 | 129 | |
PPCCallingConv.h | H A D | 02-Sep-2023 | 2.1 KiB | 51 | 31 | |
PPCCallingConv.td | H A D | 02-Sep-2023 | 18.6 KiB | 431 | 348 | |
PPCEarlyReturn.cpp | H A D | 28-Jul-2024 | 7.1 KiB | 211 | 148 | |
PPCExpandAtomicPseudoInsts.cpp | H A D | 28-Jul-2024 | 11 KiB | 303 | 241 | |
PPCExpandISEL.cpp | H A D | 27-Aug-2020 | 17.9 KiB | 492 | 308 | |
PPCFastISel.cpp | H A D | 28-Jul-2024 | 85.4 KiB | 2,477 | 1,698 | |
PPCFrameLowering.cpp | H A D | 25-Aug-2024 | 107.7 KiB | 2,812 | 1,953 | |
PPCFrameLowering.h | H A D | 28-Jul-2024 | 7.8 KiB | 183 | 75 | |
PPCGenRegisterBankInfo.def | H A D | 18-Dec-2023 | 4.6 KiB | 113 | 101 | |
PPCGenScalarMASSEntries.cpp | H A D | 04-Jul-2022 | 4.5 KiB | 150 | 88 | |
PPCHazardRecognizers.cpp | H A D | 28-Jul-2024 | 14.1 KiB | 436 | 281 | |
PPCHazardRecognizers.h | H A D | 20-Dec-2019 | 3.8 KiB | 102 | 52 | |
PPCISelDAGToDAG.cpp | H A D | 28-Jul-2024 | 307.4 KiB | 7,939 | 5,708 | |
PPCISelLowering.cpp | H A D | 22-Sep-2024 | 740.1 KiB | 18,862 | 13,542 | |
PPCISelLowering.h | H A D | 28-Jul-2024 | 65.3 KiB | 1,513 | 737 | |
PPCInstr64Bit.td | H A D | 04-Sep-2024 | 95.8 KiB | 2,025 | 1,796 | |
PPCInstrAltivec.td | H A D | 02-Sep-2023 | 80.1 KiB | 1,654 | 1,460 | |
PPCInstrBuilder.h | H A D | 20-Dec-2019 | 1.5 KiB | 43 | 14 | |
PPCInstrDFP.td | H A D | 02-Sep-2023 | 10.1 KiB | 194 | 165 | |
PPCInstrFormats.td | H A D | 18-Dec-2023 | 61.4 KiB | 2,350 | 1,939 | |
PPCInstrFuture.td | H A D | 02-Sep-2023 | 3.3 KiB | 89 | 74 | |
PPCInstrFutureMMA.td | H A D | 14-Apr-2023 | 3.8 KiB | 117 | 98 | |
PPCInstrHTM.td | H A D | 02-Sep-2023 | 5.4 KiB | 176 | 128 | |
PPCInstrInfo.cpp | H A D | 30-Jul-2024 | 204.7 KiB | 5,581 | 4,110 | |
PPCInstrInfo.h | H A D | 28-Jul-2024 | 31.2 KiB | 681 | 447 | |
PPCInstrInfo.td | H A D | 04-Sep-2024 | 243.1 KiB | 5,336 | 4,711 | |
PPCInstrMMA.td | H A D | 02-Sep-2023 | 55.9 KiB | 1,107 | 1,048 | |
PPCInstrP10.td | H A D | 28-Jul-2024 | 109.9 KiB | 2,525 | 2,285 | |
PPCInstrSPE.td | H A D | 02-Sep-2023 | 49.7 KiB | 882 | 775 | |
PPCInstrVSX.td | H A D | 28-Jul-2024 | 248.8 KiB | 5,153 | 4,722 | |
PPCLoopInstrFormPrep.cpp | H A D | 28-Jul-2024 | 55.4 KiB | 1,499 | 936 | |
PPCLowerMASSVEntries.cpp | H A D | 28-Jul-2024 | 6.5 KiB | 200 | 123 | |
PPCMCInstLower.cpp | H A D | 28-Jul-2024 | 8.6 KiB | 231 | 185 | |
PPCMIPeephole.cpp | H A D | 28-Jul-2024 | 81.2 KiB | 2,048 | 1,475 | |
PPCMachineFunctionInfo.cpp | H A D | 14-Apr-2023 | 6.6 KiB | 196 | 160 | |
PPCMachineFunctionInfo.h | H A D | 28-Jul-2024 | 10.8 KiB | 292 | 134 | |
PPCMachineScheduler.cpp | H A D | 22-Aug-2021 | 9.5 KiB | 252 | 153 | |
PPCMachineScheduler.h | H A D | 22-Aug-2021 | 1.8 KiB | 54 | 30 | |
PPCMacroFusion.cpp | H A D | 18-Dec-2023 | 9.9 KiB | 294 | 191 | |
PPCMacroFusion.def | H A D | 28-Jul-2024 | 6.6 KiB | 160 | 128 | |
PPCMacroFusion.h | H A D | 22-Aug-2021 | 1 KiB | 28 | 7 | |
PPCMergeStringPool.cpp | H A D | 28-Jul-2024 | 12.3 KiB | 334 | 199 | |
PPCPerfectShuffle.h | H A D | 20-Dec-2019 | 397.6 KiB | 6,591 | 6,567 | |
PPCPfmCounters.td | H A D | 20-Dec-2019 | 705 | 19 | 16 | |
PPCPreEmitPeephole.cpp | H A D | 28-Jul-2024 | 24.3 KiB | 609 | 448 | |
PPCReduceCRLogicals.cpp | H A D | 28-Jul-2024 | 28.7 KiB | 740 | 573 | |
PPCRegisterInfo.cpp | H A D | 28-Jul-2024 | 77.6 KiB | 1,966 | 1,396 | |
PPCRegisterInfo.h | H A D | 18-Dec-2023 | 7.5 KiB | 235 | 170 | |
PPCRegisterInfo.td | H A D | 06-Aug-2024 | 39.8 KiB | 1,145 | 1,040 | |
PPCRegisterInfoDMR.td | H A D | 14-Apr-2023 | 5.5 KiB | 165 | 145 | |
PPCRegisterInfoMMA.td | H A D | 14-Apr-2023 | 4.8 KiB | 111 | 98 | |
PPCSchedPredicates.td | H A D | 14-Apr-2023 | 8.9 KiB | 296 | 292 | |
PPCSchedule.td | H A D | 20-Mar-2022 | 5.4 KiB | 147 | 143 | |
PPCSchedule440.td | H A D | 20-Dec-2019 | 34.6 KiB | 601 | 586 | |
PPCScheduleA2.td | H A D | 20-Dec-2019 | 7.9 KiB | 170 | 159 | |
PPCScheduleE500.td | H A D | 20-Dec-2019 | 16.6 KiB | 280 | 272 | |
PPCScheduleE500mc.td | H A D | 20-Dec-2019 | 20.9 KiB | 335 | 327 | |
PPCScheduleE5500.td | H A D | 20-Dec-2019 | 23.6 KiB | 379 | 369 | |
PPCScheduleG3.td | H A D | 20-Dec-2019 | 4.5 KiB | 81 | 78 | |
PPCScheduleG4.td | H A D | 20-Dec-2019 | 5.4 KiB | 97 | 94 | |
PPCScheduleG4Plus.td | H A D | 20-Dec-2019 | 6.4 KiB | 111 | 108 | |
PPCScheduleG5.td | H A D | 20-Dec-2019 | 7.1 KiB | 129 | 121 | |
PPCScheduleP10.td | H A D | 18-Dec-2023 | 12.8 KiB | 411 | 328 | |
PPCScheduleP7.td | H A D | 18-Dec-2023 | 14.8 KiB | 405 | 388 | |
PPCScheduleP8.td | H A D | 18-Dec-2023 | 15.9 KiB | 414 | 397 | |
PPCScheduleP9.td | H A D | 18-Dec-2023 | 12.3 KiB | 429 | 356 | |
PPCSubtarget.cpp | H A D | 28-Jul-2024 | 9.2 KiB | 269 | 179 | |
PPCSubtarget.h | H A D | 30-Jul-2024 | 9.9 KiB | 319 | 204 | |
PPCTLSDynamicCall.cpp | H A D | 28-Jul-2024 | 14.6 KiB | 345 | 229 | |
PPCTOCRegDeps.cpp | H A D | 28-Jul-2024 | 5.2 KiB | 153 | 69 | |
PPCTargetMachine.cpp | H A D | 28-Jul-2024 | 22.2 KiB | 647 | 459 | |
PPCTargetMachine.h | H A D | 28-Jul-2024 | 2.9 KiB | 87 | 50 | |
PPCTargetObjectFile.cpp | H A D | 22-Aug-2021 | 2.5 KiB | 60 | 30 | |
PPCTargetObjectFile.h | H A D | 20-Dec-2019 | 1.2 KiB | 34 | 14 | |
PPCTargetStreamer.h | H A D | 14-Apr-2023 | 1.1 KiB | 37 | 21 | |
PPCTargetTransformInfo.cpp | H A D | 30-Jul-2024 | 41.1 KiB | 1,100 | 770 | |
PPCTargetTransformInfo.h | H A D | 28-Jul-2024 | 7.6 KiB | 162 | 121 | |
PPCVSXCopy.cpp | H A D | 20-Mar-2022 | 5.6 KiB | 170 | 116 | |
PPCVSXFMAMutate.cpp | H A D | 28-Jul-2024 | 14.9 KiB | 392 | 223 | |
PPCVSXSwapRemoval.cpp | H A D | 02-Sep-2023 | 38 KiB | 1,075 | 679 | |
README_P9.txt | H A D | 04-Jul-2022 | 22 KiB | 601 | 475 |
README_P9.txt
1//===- README_P9.txt - Notes for improving Power9 code gen ----------------===// 2 3TODO: Instructions Need Implement Instrinstics or Map to LLVM IR 4 5Altivec: 6- Vector Compare Not Equal (Zero): 7 vcmpneb(.) vcmpneh(.) vcmpnew(.) 8 vcmpnezb(.) vcmpnezh(.) vcmpnezw(.) 9 . Same as other VCMP*, use VCMP/VCMPo form (support intrinsic) 10 11- Vector Extract Unsigned: vextractub vextractuh vextractuw vextractd 12 . Don't use llvm extractelement because they have different semantics 13 . Use instrinstics: 14 (set v2i64:$vD, (int_ppc_altivec_vextractub v16i8:$vA, imm:$UIMM)) 15 (set v2i64:$vD, (int_ppc_altivec_vextractuh v8i16:$vA, imm:$UIMM)) 16 (set v2i64:$vD, (int_ppc_altivec_vextractuw v4i32:$vA, imm:$UIMM)) 17 (set v2i64:$vD, (int_ppc_altivec_vextractd v2i64:$vA, imm:$UIMM)) 18 19- Vector Extract Unsigned Byte Left/Right-Indexed: 20 vextublx vextubrx vextuhlx vextuhrx vextuwlx vextuwrx 21 . Use instrinstics: 22 // Left-Indexed 23 (set i64:$rD, (int_ppc_altivec_vextublx i64:$rA, v16i8:$vB)) 24 (set i64:$rD, (int_ppc_altivec_vextuhlx i64:$rA, v8i16:$vB)) 25 (set i64:$rD, (int_ppc_altivec_vextuwlx i64:$rA, v4i32:$vB)) 26 27 // Right-Indexed 28 (set i64:$rD, (int_ppc_altivec_vextubrx i64:$rA, v16i8:$vB)) 29 (set i64:$rD, (int_ppc_altivec_vextuhrx i64:$rA, v8i16:$vB)) 30 (set i64:$rD, (int_ppc_altivec_vextuwrx i64:$rA, v4i32:$vB)) 31 32- Vector Insert Element Instructions: vinsertb vinsertd vinserth vinsertw 33 (set v16i8:$vD, (int_ppc_altivec_vinsertb v16i8:$vA, imm:$UIMM)) 34 (set v8i16:$vD, (int_ppc_altivec_vinsertd v8i16:$vA, imm:$UIMM)) 35 (set v4i32:$vD, (int_ppc_altivec_vinserth v4i32:$vA, imm:$UIMM)) 36 (set v2i64:$vD, (int_ppc_altivec_vinsertw v2i64:$vA, imm:$UIMM)) 37 38- Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD]: 39 vclzlsbb vctzlsbb 40 . Use intrinsic: 41 (set i64:$rD, (int_ppc_altivec_vclzlsbb v16i8:$vB)) 42 (set i64:$rD, (int_ppc_altivec_vctzlsbb v16i8:$vB)) 43 44- Vector Count Trailing Zeros: vctzb vctzh vctzw vctzd 45 . Map to llvm cttz 46 (set v16i8:$vD, (cttz v16i8:$vB)) // vctzb 47 (set v8i16:$vD, (cttz v8i16:$vB)) // vctzh 48 (set v4i32:$vD, (cttz v4i32:$vB)) // vctzw 49 (set v2i64:$vD, (cttz v2i64:$vB)) // vctzd 50 51- Vector Extend Sign: vextsb2w vextsh2w vextsb2d vextsh2d vextsw2d 52 . vextsb2w: 53 (set v4i32:$vD, (sext v4i8:$vB)) 54 55 // PowerISA_V3.0: 56 do i = 0 to 3 57 VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].byte[3]) 58 end 59 60 . vextsh2w: 61 (set v4i32:$vD, (sext v4i16:$vB)) 62 63 // PowerISA_V3.0: 64 do i = 0 to 3 65 VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].hword[1]) 66 end 67 68 . vextsb2d 69 (set v2i64:$vD, (sext v2i8:$vB)) 70 71 // PowerISA_V3.0: 72 do i = 0 to 1 73 VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].byte[7]) 74 end 75 76 . vextsh2d 77 (set v2i64:$vD, (sext v2i16:$vB)) 78 79 // PowerISA_V3.0: 80 do i = 0 to 1 81 VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].hword[3]) 82 end 83 84 . vextsw2d 85 (set v2i64:$vD, (sext v2i32:$vB)) 86 87 // PowerISA_V3.0: 88 do i = 0 to 1 89 VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].word[1]) 90 end 91 92- Vector Integer Negate: vnegw vnegd 93 . Map to llvm ineg 94 (set v4i32:$rT, (ineg v4i32:$rA)) // vnegw 95 (set v2i64:$rT, (ineg v2i64:$rA)) // vnegd 96 97- Vector Parity Byte: vprtybw vprtybd vprtybq 98 . Use intrinsic: 99 (set v4i32:$rD, (int_ppc_altivec_vprtybw v4i32:$vB)) 100 (set v2i64:$rD, (int_ppc_altivec_vprtybd v2i64:$vB)) 101 (set v1i128:$rD, (int_ppc_altivec_vprtybq v1i128:$vB)) 102 103- Vector (Bit) Permute (Right-indexed): 104 . vbpermd: Same as "vbpermq", use VX1_Int_Ty2: 105 VX1_Int_Ty2<1484, "vbpermd", int_ppc_altivec_vbpermd, v2i64, v2i64>; 106 107 . vpermr: use VA1a_Int_Ty3 108 VA1a_Int_Ty3<59, "vpermr", int_ppc_altivec_vpermr, v16i8, v16i8, v16i8>; 109 110- Vector Rotate Left Mask/Mask-Insert: vrlwnm vrlwmi vrldnm vrldmi 111 . Use intrinsic: 112 VX1_Int_Ty<389, "vrlwnm", int_ppc_altivec_vrlwnm, v4i32>; 113 VX1_Int_Ty<133, "vrlwmi", int_ppc_altivec_vrlwmi, v4i32>; 114 VX1_Int_Ty<453, "vrldnm", int_ppc_altivec_vrldnm, v2i64>; 115 VX1_Int_Ty<197, "vrldmi", int_ppc_altivec_vrldmi, v2i64>; 116 117- Vector Shift Left/Right: vslv vsrv 118 . Use intrinsic, don't map to llvm shl and lshr, because they have different 119 semantics, e.g. vslv: 120 121 do i = 0 to 15 122 sh ← VR[VRB].byte[i].bit[5:7] 123 VR[VRT].byte[i] ← src.byte[i:i+1].bit[sh:sh+7] 124 end 125 126 VR[VRT].byte[i] is composed of 2 bytes from src.byte[i:i+1] 127 128 . VX1_Int_Ty<1860, "vslv", int_ppc_altivec_vslv, v16i8>; 129 VX1_Int_Ty<1796, "vsrv", int_ppc_altivec_vsrv, v16i8>; 130 131- Vector Multiply-by-10 (& Write Carry) Unsigned Quadword: 132 vmul10uq vmul10cuq 133 . Use intrinsic: 134 VX1_Int_Ty<513, "vmul10uq", int_ppc_altivec_vmul10uq, v1i128>; 135 VX1_Int_Ty< 1, "vmul10cuq", int_ppc_altivec_vmul10cuq, v1i128>; 136 137- Vector Multiply-by-10 Extended (& Write Carry) Unsigned Quadword: 138 vmul10euq vmul10ecuq 139 . Use intrinsic: 140 VX1_Int_Ty<577, "vmul10euq", int_ppc_altivec_vmul10euq, v1i128>; 141 VX1_Int_Ty< 65, "vmul10ecuq", int_ppc_altivec_vmul10ecuq, v1i128>; 142 143- Decimal Convert From/to National/Zoned/Signed-QWord: 144 bcdcfn. bcdcfz. bcdctn. bcdctz. bcdcfsq. bcdctsq. 145 . Use instrinstics: 146 (set v1i128:$vD, (int_ppc_altivec_bcdcfno v1i128:$vB, i1:$PS)) 147 (set v1i128:$vD, (int_ppc_altivec_bcdcfzo v1i128:$vB, i1:$PS)) 148 (set v1i128:$vD, (int_ppc_altivec_bcdctno v1i128:$vB)) 149 (set v1i128:$vD, (int_ppc_altivec_bcdctzo v1i128:$vB, i1:$PS)) 150 (set v1i128:$vD, (int_ppc_altivec_bcdcfsqo v1i128:$vB, i1:$PS)) 151 (set v1i128:$vD, (int_ppc_altivec_bcdctsqo v1i128:$vB)) 152 153- Decimal Copy-Sign/Set-Sign: bcdcpsgn. bcdsetsgn. 154 . Use instrinstics: 155 (set v1i128:$vD, (int_ppc_altivec_bcdcpsgno v1i128:$vA, v1i128:$vB)) 156 (set v1i128:$vD, (int_ppc_altivec_bcdsetsgno v1i128:$vB, i1:$PS)) 157 158- Decimal Shift/Unsigned-Shift/Shift-and-Round: bcds. bcdus. bcdsr. 159 . Use instrinstics: 160 (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS)) 161 (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB)) 162 (set v1i128:$vD, (int_ppc_altivec_bcdsro v1i128:$vA, v1i128:$vB, i1:$PS)) 163 164 . Note! Their VA is accessed only 1 byte, i.e. VA.byte[7] 165 166- Decimal (Unsigned) Truncate: bcdtrunc. bcdutrunc. 167 . Use instrinstics: 168 (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS)) 169 (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB)) 170 171 . Note! Their VA is accessed only 2 byte, i.e. VA.hword[3] (VA.bit[48:63]) 172 173VSX: 174- QP Copy Sign: xscpsgnqp 175 . Similar to xscpsgndp 176 . (set f128:$vT, (fcopysign f128:$vB, f128:$vA) 177 178- QP Absolute/Negative-Absolute/Negate: xsabsqp xsnabsqp xsnegqp 179 . Similar to xsabsdp/xsnabsdp/xsnegdp 180 . (set f128:$vT, (fabs f128:$vB)) // xsabsqp 181 (set f128:$vT, (fneg (fabs f128:$vB))) // xsnabsqp 182 (set f128:$vT, (fneg f128:$vB)) // xsnegqp 183 184- QP Add/Divide/Multiply/Subtract/Square-Root: 185 xsaddqp xsdivqp xsmulqp xssubqp xssqrtqp 186 . Similar to xsadddp 187 . isCommutable = 1 188 (set f128:$vT, (fadd f128:$vA, f128:$vB)) // xsaddqp 189 (set f128:$vT, (fmul f128:$vA, f128:$vB)) // xsmulqp 190 191 . isCommutable = 0 192 (set f128:$vT, (fdiv f128:$vA, f128:$vB)) // xsdivqp 193 (set f128:$vT, (fsub f128:$vA, f128:$vB)) // xssubqp 194 (set f128:$vT, (fsqrt f128:$vB))) // xssqrtqp 195 196- Round to Odd of QP Add/Divide/Multiply/Subtract/Square-Root: 197 xsaddqpo xsdivqpo xsmulqpo xssubqpo xssqrtqpo 198 . Similar to xsrsqrtedp?? 199 def XSRSQRTEDP : XX2Form<60, 74, 200 (outs vsfrc:$XT), (ins vsfrc:$XB), 201 "xsrsqrtedp $XT, $XB", IIC_VecFP, 202 [(set f64:$XT, (PPCfrsqrte f64:$XB))]>; 203 204 . Define DAG Node in PPCInstrInfo.td: 205 def PPCfaddrto: SDNode<"PPCISD::FADDRTO", SDTFPBinOp, []>; 206 def PPCfdivrto: SDNode<"PPCISD::FDIVRTO", SDTFPBinOp, []>; 207 def PPCfmulrto: SDNode<"PPCISD::FMULRTO", SDTFPBinOp, []>; 208 def PPCfsubrto: SDNode<"PPCISD::FSUBRTO", SDTFPBinOp, []>; 209 def PPCfsqrtrto: SDNode<"PPCISD::FSQRTRTO", SDTFPUnaryOp, []>; 210 211 DAG patterns of each instruction (PPCInstrVSX.td): 212 . isCommutable = 1 213 (set f128:$vT, (PPCfaddrto f128:$vA, f128:$vB)) // xsaddqpo 214 (set f128:$vT, (PPCfmulrto f128:$vA, f128:$vB)) // xsmulqpo 215 216 . isCommutable = 0 217 (set f128:$vT, (PPCfdivrto f128:$vA, f128:$vB)) // xsdivqpo 218 (set f128:$vT, (PPCfsubrto f128:$vA, f128:$vB)) // xssubqpo 219 (set f128:$vT, (PPCfsqrtrto f128:$vB)) // xssqrtqpo 220 221- QP (Negative) Multiply-{Add/Subtract}: xsmaddqp xsmsubqp xsnmaddqp xsnmsubqp 222 . Ref: xsmaddadp/xsmsubadp/xsnmaddadp/xsnmsubadp 223 224 . isCommutable = 1 225 // xsmaddqp 226 [(set f128:$vT, (fma f128:$vA, f128:$vB, f128:$vTi))]>, 227 RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, 228 AltVSXFMARel; 229 230 // xsmsubqp 231 [(set f128:$vT, (fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>, 232 RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, 233 AltVSXFMARel; 234 235 // xsnmaddqp 236 [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, f128:$vTi)))]>, 237 RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, 238 AltVSXFMARel; 239 240 // xsnmsubqp 241 [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>, 242 RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, 243 AltVSXFMARel; 244 245- Round to Odd of QP (Negative) Multiply-{Add/Subtract}: 246 xsmaddqpo xsmsubqpo xsnmaddqpo xsnmsubqpo 247 . Similar to xsrsqrtedp?? 248 249 . Define DAG Node in PPCInstrInfo.td: 250 def PPCfmarto: SDNode<"PPCISD::FMARTO", SDTFPTernaryOp, []>; 251 252 It looks like we only need to define "PPCfmarto" for these instructions, 253 because according to PowerISA_V3.0, these instructions perform RTO on 254 fma's result: 255 xsmaddqp(o) 256 v ← bfp_MULTIPLY_ADD(src1, src3, src2) 257 rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v) 258 result ← bfp_CONVERT_TO_BFP128(rnd) 259 260 xsmsubqp(o) 261 v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2)) 262 rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v) 263 result ← bfp_CONVERT_TO_BFP128(rnd) 264 265 xsnmaddqp(o) 266 v ← bfp_MULTIPLY_ADD(src1,src3,src2) 267 rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)) 268 result ← bfp_CONVERT_TO_BFP128(rnd) 269 270 xsnmsubqp(o) 271 v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2)) 272 rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)) 273 result ← bfp_CONVERT_TO_BFP128(rnd) 274 275 DAG patterns of each instruction (PPCInstrVSX.td): 276 . isCommutable = 1 277 // xsmaddqpo 278 [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, f128:$vTi))]>, 279 RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, 280 AltVSXFMARel; 281 282 // xsmsubqpo 283 [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi)))]>, 284 RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, 285 AltVSXFMARel; 286 287 // xsnmaddqpo 288 [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, f128:$vTi)))]>, 289 RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, 290 AltVSXFMARel; 291 292 // xsnmsubqpo 293 [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi))))]>, 294 RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, 295 AltVSXFMARel; 296 297- QP Compare Ordered/Unordered: xscmpoqp xscmpuqp 298 . ref: XSCMPUDP 299 def XSCMPUDP : XX3Form_1<60, 35, 300 (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), 301 "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>; 302 303 . No SDAG, intrinsic, builtin are required?? 304 Or llvm fcmp order/unorder compare?? 305 306- DP/QP Compare Exponents: xscmpexpdp xscmpexpqp 307 . No SDAG, intrinsic, builtin are required? 308 309- DP Compare ==, >=, >, !=: xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp 310 . I checked existing instruction "XSCMPUDP". They are different in target 311 register. "XSCMPUDP" write to CR field, xscmp*dp write to VSX register 312 313 . Use intrinsic: 314 (set i128:$XT, (int_ppc_vsx_xscmpeqdp f64:$XA, f64:$XB)) 315 (set i128:$XT, (int_ppc_vsx_xscmpgedp f64:$XA, f64:$XB)) 316 (set i128:$XT, (int_ppc_vsx_xscmpgtdp f64:$XA, f64:$XB)) 317 (set i128:$XT, (int_ppc_vsx_xscmpnedp f64:$XA, f64:$XB)) 318 319- Vector Compare Not Equal: xvcmpnedp xvcmpnedp. xvcmpnesp xvcmpnesp. 320 . Similar to xvcmpeqdp: 321 defm XVCMPEQDP : XX3Form_Rcr<60, 99, 322 "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, 323 int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>; 324 325 . So we should use "XX3Form_Rcr" to implement intrinsic 326 327- Convert DP -> QP: xscvdpqp 328 . Similar to XSCVDPSP: 329 def XSCVDPSP : XX2Form<60, 265, 330 (outs vsfrc:$XT), (ins vsfrc:$XB), 331 "xscvdpsp $XT, $XB", IIC_VecFP, []>; 332 . So, No SDAG, intrinsic, builtin are required?? 333 334- Round & Convert QP -> DP (dword[1] is set to zero): xscvqpdp xscvqpdpo 335 . Similar to XSCVDPSP 336 . No SDAG, intrinsic, builtin are required?? 337 338- Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero): 339 xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz 340 . According to PowerISA_V3.0, these are similar to "XSCVDPSXDS", "XSCVDPSXWS", 341 "XSCVDPUXDS", "XSCVDPUXWS" 342 343 . DAG patterns: 344 (set f128:$XT, (PPCfctidz f128:$XB)) // xscvqpsdz 345 (set f128:$XT, (PPCfctiwz f128:$XB)) // xscvqpswz 346 (set f128:$XT, (PPCfctiduz f128:$XB)) // xscvqpudz 347 (set f128:$XT, (PPCfctiwuz f128:$XB)) // xscvqpuwz 348 349- Convert (Un)Signed DWord -> QP: xscvsdqp xscvudqp 350 . Similar to XSCVSXDSP 351 . (set f128:$XT, (PPCfcfids f64:$XB)) // xscvsdqp 352 (set f128:$XT, (PPCfcfidus f64:$XB)) // xscvudqp 353 354- (Round &) Convert DP <-> HP: xscvdphp xscvhpdp 355 . Similar to XSCVDPSP 356 . No SDAG, intrinsic, builtin are required?? 357 358- Vector HP -> SP: xvcvhpsp xvcvsphp 359 . Similar to XVCVDPSP: 360 def XVCVDPSP : XX2Form<60, 393, 361 (outs vsrc:$XT), (ins vsrc:$XB), 362 "xvcvdpsp $XT, $XB", IIC_VecFP, []>; 363 . No SDAG, intrinsic, builtin are required?? 364 365- Round to Quad-Precision Integer: xsrqpi xsrqpix 366 . These are combination of "XSRDPI", "XSRDPIC", "XSRDPIM", .., because you 367 need to assign rounding mode in instruction 368 . Provide builtin? 369 (set f128:$vT, (int_ppc_vsx_xsrqpi f128:$vB)) 370 (set f128:$vT, (int_ppc_vsx_xsrqpix f128:$vB)) 371 372- Round Quad-Precision to Double-Extended Precision (fp80): xsrqpxp 373 . Provide builtin? 374 (set f128:$vT, (int_ppc_vsx_xsrqpxp f128:$vB)) 375 376Fixed Point Facility: 377 378- Exploit cmprb and cmpeqb (perhaps for something like 379 isalpha/isdigit/isupper/islower and isspace respectivelly). This can 380 perhaps be done through a builtin. 381 382- Provide testing for cnttz[dw] 383- Insert Exponent DP/QP: xsiexpdp xsiexpqp 384 . Use intrinsic? 385 . xsiexpdp: 386 // Note: rA and rB are the unsigned integer value. 387 (set f128:$XT, (int_ppc_vsx_xsiexpdp i64:$rA, i64:$rB)) 388 389 . xsiexpqp: 390 (set f128:$vT, (int_ppc_vsx_xsiexpqp f128:$vA, f64:$vB)) 391 392- Extract Exponent/Significand DP/QP: xsxexpdp xsxsigdp xsxexpqp xsxsigqp 393 . Use intrinsic? 394 . (set i64:$rT, (int_ppc_vsx_xsxexpdp f64$XB)) // xsxexpdp 395 (set i64:$rT, (int_ppc_vsx_xsxsigdp f64$XB)) // xsxsigdp 396 (set f128:$vT, (int_ppc_vsx_xsxexpqp f128$vB)) // xsxexpqp 397 (set f128:$vT, (int_ppc_vsx_xsxsigqp f128$vB)) // xsxsigqp 398 399- Vector Insert Word: xxinsertw 400 - Useful for inserting f32/i32 elements into vectors (the element to be 401 inserted needs to be prepared) 402 . Note: llvm has insertelem in "Vector Operations" 403 ; yields <n x <ty>> 404 <result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx> 405 406 But how to map to it?? 407 [(set v1f128:$XT, (insertelement v1f128:$XTi, f128:$XB, i4:$UIMM))]>, 408 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 409 410 . Or use intrinsic? 411 (set v1f128:$XT, (int_ppc_vsx_xxinsertw v1f128:$XTi, f128:$XB, i4:$UIMM)) 412 413- Vector Extract Unsigned Word: xxextractuw 414 - Not useful for extraction of f32 from v4f32 (the current pattern is better - 415 shift->convert) 416 - It is useful for (uint_to_fp (vector_extract v4i32, N)) 417 - Unfortunately, it can't be used for (sint_to_fp (vector_extract v4i32, N)) 418 . Note: llvm has extractelement in "Vector Operations" 419 ; yields <ty> 420 <result> = extractelement <n x <ty>> <val>, <ty2> <idx> 421 422 How to map to it?? 423 [(set f128:$XT, (extractelement v1f128:$XB, i4:$UIMM))] 424 425 . Or use intrinsic? 426 (set f128:$XT, (int_ppc_vsx_xxextractuw v1f128:$XB, i4:$UIMM)) 427 428- Vector Insert Exponent DP/SP: xviexpdp xviexpsp 429 . Use intrinsic 430 (set v2f64:$XT, (int_ppc_vsx_xviexpdp v2f64:$XA, v2f64:$XB)) 431 (set v4f32:$XT, (int_ppc_vsx_xviexpsp v4f32:$XA, v4f32:$XB)) 432 433- Vector Extract Exponent/Significand DP/SP: xvxexpdp xvxexpsp xvxsigdp xvxsigsp 434 . Use intrinsic 435 (set v2f64:$XT, (int_ppc_vsx_xvxexpdp v2f64:$XB)) 436 (set v4f32:$XT, (int_ppc_vsx_xvxexpsp v4f32:$XB)) 437 (set v2f64:$XT, (int_ppc_vsx_xvxsigdp v2f64:$XB)) 438 (set v4f32:$XT, (int_ppc_vsx_xvxsigsp v4f32:$XB)) 439 440- Test Data Class SP/DP/QP: xststdcsp xststdcdp xststdcqp 441 . No SDAG, intrinsic, builtin are required? 442 Because it seems that we have no way to map BF field? 443 444 Instruction Form: [PO T XO B XO BX TX] 445 Asm: xststd* BF,XB,DCMX 446 447 BF is an index to CR register field. 448 449- Vector Test Data Class SP/DP: xvtstdcsp xvtstdcdp 450 . Use intrinsic 451 (set v4f32:$XT, (int_ppc_vsx_xvtstdcsp v4f32:$XB, i7:$DCMX)) 452 (set v2f64:$XT, (int_ppc_vsx_xvtstdcdp v2f64:$XB, i7:$DCMX)) 453 454- Maximum/Minimum Type-C/Type-J DP: xsmaxcdp xsmaxjdp xsmincdp xsminjdp 455 . PowerISA_V3.0: 456 "xsmaxcdp can be used to implement the C/C++/Java conditional operation 457 (x>y)?x:y for single-precision and double-precision arguments." 458 459 Note! c type and j type have different behavior when: 460 1. Either input is NaN 461 2. Both input are +-Infinity, +-Zero 462 463 . dtype map to llvm fmaxnum/fminnum 464 jtype use intrinsic 465 466 . xsmaxcdp xsmincdp 467 (set f64:$XT, (fmaxnum f64:$XA, f64:$XB)) 468 (set f64:$XT, (fminnum f64:$XA, f64:$XB)) 469 470 . xsmaxjdp xsminjdp 471 (set f64:$XT, (int_ppc_vsx_xsmaxjdp f64:$XA, f64:$XB)) 472 (set f64:$XT, (int_ppc_vsx_xsminjdp f64:$XA, f64:$XB)) 473 474- Vector Byte-Reverse H/W/D/Q Word: xxbrh xxbrw xxbrd xxbrq 475 . Use intrinsic 476 (set v8i16:$XT, (int_ppc_vsx_xxbrh v8i16:$XB)) 477 (set v4i32:$XT, (int_ppc_vsx_xxbrw v4i32:$XB)) 478 (set v2i64:$XT, (int_ppc_vsx_xxbrd v2i64:$XB)) 479 (set v1i128:$XT, (int_ppc_vsx_xxbrq v1i128:$XB)) 480 481- Vector Permute: xxperm xxpermr 482 . I have checked "PPCxxswapd" in PPCInstrVSX.td, but they are different 483 . Use intrinsic 484 (set v16i8:$XT, (int_ppc_vsx_xxperm v16i8:$XA, v16i8:$XB)) 485 (set v16i8:$XT, (int_ppc_vsx_xxpermr v16i8:$XA, v16i8:$XB)) 486 487- Vector Splat Immediate Byte: xxspltib 488 . Similar to XXSPLTW: 489 def XXSPLTW : XX2Form_2<60, 164, 490 (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), 491 "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; 492 493 . No SDAG, intrinsic, builtin are required? 494 495- Load/Store Vector: lxv stxv 496 . Has likely SDAG match: 497 (set v?:$XT, (load ix16addr:$src)) 498 (set v?:$XT, (store ix16addr:$dst)) 499 500 . Need define ix16addr in PPCInstrInfo.td 501 ix16addr: 16-byte aligned, see "def memrix16" in PPCInstrInfo.td 502 503- Load/Store Vector Indexed: lxvx stxvx 504 . Has likely SDAG match: 505 (set v?:$XT, (load xoaddr:$src)) 506 (set v?:$XT, (store xoaddr:$dst)) 507 508- Load/Store DWord: lxsd stxsd 509 . Similar to lxsdx/stxsdx: 510 def LXSDX : XX1Form<31, 588, 511 (outs vsfrc:$XT), (ins memrr:$src), 512 "lxsdx $XT, $src", IIC_LdStLFD, 513 [(set f64:$XT, (load xoaddr:$src))]>; 514 515 . (set f64:$XT, (load iaddrX4:$src)) 516 (set f64:$XT, (store iaddrX4:$dst)) 517 518- Load/Store SP, with conversion from/to DP: lxssp stxssp 519 . Similar to lxsspx/stxsspx: 520 def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src), 521 "lxsspx $XT, $src", IIC_LdStLFD, 522 [(set f32:$XT, (load xoaddr:$src))]>; 523 524 . (set f32:$XT, (load iaddrX4:$src)) 525 (set f32:$XT, (store iaddrX4:$dst)) 526 527- Load as Integer Byte/Halfword & Zero Indexed: lxsibzx lxsihzx 528 . Similar to lxsiwzx: 529 def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src), 530 "lxsiwzx $XT, $src", IIC_LdStLFD, 531 [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; 532 533 . (set f64:$XT, (PPClfiwzx xoaddr:$src)) 534 535- Store as Integer Byte/Halfword Indexed: stxsibx stxsihx 536 . Similar to stxsiwx: 537 def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst), 538 "stxsiwx $XT, $dst", IIC_LdStSTFD, 539 [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; 540 541 . (PPCstfiwx f64:$XT, xoaddr:$dst) 542 543- Load Vector Halfword*8/Byte*16 Indexed: lxvh8x lxvb16x 544 . Similar to lxvd2x/lxvw4x: 545 def LXVD2X : XX1Form<31, 844, 546 (outs vsrc:$XT), (ins memrr:$src), 547 "lxvd2x $XT, $src", IIC_LdStLFD, 548 [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>; 549 550 . (set v8i16:$XT, (int_ppc_vsx_lxvh8x xoaddr:$src)) 551 (set v16i8:$XT, (int_ppc_vsx_lxvb16x xoaddr:$src)) 552 553- Store Vector Halfword*8/Byte*16 Indexed: stxvh8x stxvb16x 554 . Similar to stxvd2x/stxvw4x: 555 def STXVD2X : XX1Form<31, 972, 556 (outs), (ins vsrc:$XT, memrr:$dst), 557 "stxvd2x $XT, $dst", IIC_LdStSTFD, 558 [(store v2f64:$XT, xoaddr:$dst)]>; 559 560 . (store v8i16:$XT, xoaddr:$dst) 561 (store v16i8:$XT, xoaddr:$dst) 562 563- Load/Store Vector (Left-justified) with Length: lxvl lxvll stxvl stxvll 564 . Likely needs an intrinsic 565 . (set v?:$XT, (int_ppc_vsx_lxvl xoaddr:$src)) 566 (set v?:$XT, (int_ppc_vsx_lxvll xoaddr:$src)) 567 568 . (int_ppc_vsx_stxvl xoaddr:$dst)) 569 (int_ppc_vsx_stxvll xoaddr:$dst)) 570 571- Load Vector Word & Splat Indexed: lxvwsx 572 . Likely needs an intrinsic 573 . (set v?:$XT, (int_ppc_vsx_lxvwsx xoaddr:$src)) 574 575Atomic operations (l[dw]at, st[dw]at): 576- Provide custom lowering for common atomic operations to use these 577 instructions with the correct Function Code 578- Ensure the operands are in the correct register (i.e. RT+1, RT+2) 579- Provide builtins since not all FC's necessarily have an existing LLVM 580 atomic operation 581 582Move to CR from XER Extended (mcrxrx): 583- Is there a use for this in LLVM? 584 585Fixed Point Facility: 586 587- Copy-Paste Facility: copy copy_first cp_abort paste paste. paste_last 588 . Use instrinstics: 589 (int_ppc_copy_first i32:$rA, i32:$rB) 590 (int_ppc_copy i32:$rA, i32:$rB) 591 592 (int_ppc_paste i32:$rA, i32:$rB) 593 (int_ppc_paste_last i32:$rA, i32:$rB) 594 595 (int_cp_abort) 596 597- Message Synchronize: msgsync 598- SLB*: slbieg slbsync 599- stop 600 . No instrinstics 601