1//===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the VSX extension to the PowerPC instruction set. 10// 11//===----------------------------------------------------------------------===// 12 13// *********************************** NOTE *********************************** 14// ** For POWER8 Little Endian, the VSX swap optimization relies on knowing ** 15// ** which VMX and VSX instructions are lane-sensitive and which are not. ** 16// ** A lane-sensitive instruction relies, implicitly or explicitly, on ** 17// ** whether lanes are numbered from left to right. An instruction like ** 18// ** VADDFP is not lane-sensitive, because each lane of the result vector ** 19// ** relies only on the corresponding lane of the source vectors. However, ** 20// ** an instruction like VMULESB is lane-sensitive, because "even" and ** 21// ** "odd" lanes are different for big-endian and little-endian numbering. ** 22// ** ** 23// ** When adding new VMX and VSX instructions, please consider whether they ** 24// ** are lane-sensitive. If so, they must be added to a switch statement ** 25// ** in PPCVSXSwapRemoval::gatherVectorInstructions(). ** 26// **************************************************************************** 27 28def PPCRegVSRCAsmOperand : AsmOperandClass { 29 let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber"; 30} 31def vsrc : RegisterOperand<VSRC> { 32 let ParserMatchClass = PPCRegVSRCAsmOperand; 33} 34 35def PPCRegVSFRCAsmOperand : AsmOperandClass { 36 let Name = "RegVSFRC"; let PredicateMethod = "isVSRegNumber"; 37} 38def vsfrc : RegisterOperand<VSFRC> { 39 let ParserMatchClass = PPCRegVSFRCAsmOperand; 40} 41 42def PPCRegVSSRCAsmOperand : AsmOperandClass { 43 let Name = "RegVSSRC"; let PredicateMethod = "isVSRegNumber"; 44} 45def vssrc : RegisterOperand<VSSRC> { 46 let ParserMatchClass = PPCRegVSSRCAsmOperand; 47} 48 49def PPCRegSPILLTOVSRRCAsmOperand : AsmOperandClass { 50 let Name = "RegSPILLTOVSRRC"; let PredicateMethod = "isVSRegNumber"; 51} 52 53def spilltovsrrc : RegisterOperand<SPILLTOVSRRC> { 54 let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand; 55} 56 57def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [ 58 SDTCisVT<0, v4f32>, SDTCisPtrTy<1> 59]>; 60 61def SDT_PPCfpexth : SDTypeProfile<1, 2, [ 62 SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32>, SDTCisPtrTy<2> 63]>; 64 65def SDT_PPCldsplat : SDTypeProfile<1, 1, [ 66 SDTCisVec<0>, SDTCisPtrTy<1> 67]>; 68 69// Little-endian-specific nodes. 70def SDT_PPClxvd2x : SDTypeProfile<1, 1, [ 71 SDTCisVT<0, v2f64>, SDTCisPtrTy<1> 72]>; 73def SDT_PPCstxvd2x : SDTypeProfile<0, 2, [ 74 SDTCisVT<0, v2f64>, SDTCisPtrTy<1> 75]>; 76def SDT_PPCxxswapd : SDTypeProfile<1, 1, [ 77 SDTCisSameAs<0, 1> 78]>; 79def SDTVecConv : SDTypeProfile<1, 2, [ 80 SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2> 81]>; 82def SDTVabsd : SDTypeProfile<1, 3, [ 83 SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32> 84]>; 85def SDT_PPCld_vec_be : SDTypeProfile<1, 1, [ 86 SDTCisVec<0>, SDTCisPtrTy<1> 87]>; 88def SDT_PPCst_vec_be : SDTypeProfile<0, 2, [ 89 SDTCisVec<0>, SDTCisPtrTy<1> 90]>; 91 92def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, 93 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 94def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x, 95 [SDNPHasChain, SDNPMayStore]>; 96def PPCld_vec_be : SDNode<"PPCISD::LOAD_VEC_BE", SDT_PPCld_vec_be, 97 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 98def PPCst_vec_be : SDNode<"PPCISD::STORE_VEC_BE", SDT_PPCst_vec_be, 99 [SDNPHasChain, SDNPMayStore]>; 100def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>; 101def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; 102def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; 103def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>; 104def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>; 105def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>; 106def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>; 107def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>; 108 109def PPCfpexth : SDNode<"PPCISD::FP_EXTEND_HALF", SDT_PPCfpexth, []>; 110def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh, 111 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 112def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat, 113 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 114 115multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase, 116 string asmstr, InstrItinClass itin, Intrinsic Int, 117 ValueType OutTy, ValueType InTy> { 118 let BaseName = asmbase in { 119 def NAME : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 120 !strconcat(asmbase, !strconcat(" ", asmstr)), itin, 121 [(set OutTy:$XT, (Int InTy:$XA, InTy:$XB))]>; 122 let Defs = [CR6] in 123 def _rec : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 124 !strconcat(asmbase, !strconcat(". ", asmstr)), itin, 125 [(set InTy:$XT, 126 (InTy (PPCvcmp_o InTy:$XA, InTy:$XB, xo)))]>, 127 isRecordForm; 128 } 129} 130 131// Instruction form with a single input register for instructions such as 132// XXPERMDI. The reason for defining this is that specifying multiple chained 133// operands (such as loads) to an instruction will perform both chained 134// operations rather than coalescing them into a single register - even though 135// the source memory location is the same. This simply forces the instruction 136// to use the same register for both inputs. 137// For example, an output DAG such as this: 138// (XXPERMDI (LXSIBZX xoaddr:$src), (LXSIBZX xoaddr:$src ), 0)) 139// would result in two load instructions emitted and used as separate inputs 140// to the XXPERMDI instruction. 141class XX3Form_2s<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, 142 InstrItinClass itin, list<dag> pattern> 143 : XX3Form_2<opcode, xo, OOL, IOL, asmstr, itin, pattern> { 144 let XB = XA; 145} 146 147def HasVSX : Predicate<"PPCSubTarget->hasVSX()">; 148def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">; 149def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">; 150def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">; 151 152let Predicates = [HasVSX] in { 153let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. 154let hasSideEffects = 0 in { // VSX instructions don't have side effects. 155 156 // Load indexed instructions 157 let mayLoad = 1, mayStore = 0 in { 158 let CodeSize = 3 in 159 def LXSDX : XX1Form_memOp<31, 588, 160 (outs vsfrc:$XT), (ins memrr:$src), 161 "lxsdx $XT, $src", IIC_LdStLFD, 162 []>; 163 164 // Pseudo instruction XFLOADf64 will be expanded to LXSDX or LFDX later 165 let CodeSize = 3 in 166 def XFLOADf64 : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), 167 "#XFLOADf64", 168 [(set f64:$XT, (load xoaddr:$src))]>; 169 170 let Predicates = [HasVSX, HasOnlySwappingMemOps] in 171 def LXVD2X : XX1Form_memOp<31, 844, 172 (outs vsrc:$XT), (ins memrr:$src), 173 "lxvd2x $XT, $src", IIC_LdStLFD, 174 [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>; 175 176 def LXVDSX : XX1Form_memOp<31, 332, 177 (outs vsrc:$XT), (ins memrr:$src), 178 "lxvdsx $XT, $src", IIC_LdStLFD, []>; 179 180 let Predicates = [HasVSX, HasOnlySwappingMemOps] in 181 def LXVW4X : XX1Form_memOp<31, 780, 182 (outs vsrc:$XT), (ins memrr:$src), 183 "lxvw4x $XT, $src", IIC_LdStLFD, 184 []>; 185 } // mayLoad 186 187 // Store indexed instructions 188 let mayStore = 1, mayLoad = 0 in { 189 let CodeSize = 3 in 190 def STXSDX : XX1Form_memOp<31, 716, 191 (outs), (ins vsfrc:$XT, memrr:$dst), 192 "stxsdx $XT, $dst", IIC_LdStSTFD, 193 []>; 194 195 // Pseudo instruction XFSTOREf64 will be expanded to STXSDX or STFDX later 196 let CodeSize = 3 in 197 def XFSTOREf64 : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst), 198 "#XFSTOREf64", 199 [(store f64:$XT, xoaddr:$dst)]>; 200 201 let Predicates = [HasVSX, HasOnlySwappingMemOps] in { 202 // The behaviour of this instruction is endianness-specific so we provide no 203 // pattern to match it without considering endianness. 204 def STXVD2X : XX1Form_memOp<31, 972, 205 (outs), (ins vsrc:$XT, memrr:$dst), 206 "stxvd2x $XT, $dst", IIC_LdStSTFD, 207 []>; 208 209 def STXVW4X : XX1Form_memOp<31, 908, 210 (outs), (ins vsrc:$XT, memrr:$dst), 211 "stxvw4x $XT, $dst", IIC_LdStSTFD, 212 []>; 213 } 214 } // mayStore 215 216 let Uses = [RM] in { 217 // Add/Mul Instructions 218 let isCommutable = 1 in { 219 def XSADDDP : XX3Form<60, 32, 220 (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), 221 "xsadddp $XT, $XA, $XB", IIC_VecFP, 222 [(set f64:$XT, (fadd f64:$XA, f64:$XB))]>; 223 def XSMULDP : XX3Form<60, 48, 224 (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), 225 "xsmuldp $XT, $XA, $XB", IIC_VecFP, 226 [(set f64:$XT, (fmul f64:$XA, f64:$XB))]>; 227 228 def XVADDDP : XX3Form<60, 96, 229 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 230 "xvadddp $XT, $XA, $XB", IIC_VecFP, 231 [(set v2f64:$XT, (fadd v2f64:$XA, v2f64:$XB))]>; 232 233 def XVADDSP : XX3Form<60, 64, 234 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 235 "xvaddsp $XT, $XA, $XB", IIC_VecFP, 236 [(set v4f32:$XT, (fadd v4f32:$XA, v4f32:$XB))]>; 237 238 def XVMULDP : XX3Form<60, 112, 239 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 240 "xvmuldp $XT, $XA, $XB", IIC_VecFP, 241 [(set v2f64:$XT, (fmul v2f64:$XA, v2f64:$XB))]>; 242 243 def XVMULSP : XX3Form<60, 80, 244 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 245 "xvmulsp $XT, $XA, $XB", IIC_VecFP, 246 [(set v4f32:$XT, (fmul v4f32:$XA, v4f32:$XB))]>; 247 } 248 249 // Subtract Instructions 250 def XSSUBDP : XX3Form<60, 40, 251 (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), 252 "xssubdp $XT, $XA, $XB", IIC_VecFP, 253 [(set f64:$XT, (fsub f64:$XA, f64:$XB))]>; 254 255 def XVSUBDP : XX3Form<60, 104, 256 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 257 "xvsubdp $XT, $XA, $XB", IIC_VecFP, 258 [(set v2f64:$XT, (fsub v2f64:$XA, v2f64:$XB))]>; 259 def XVSUBSP : XX3Form<60, 72, 260 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 261 "xvsubsp $XT, $XA, $XB", IIC_VecFP, 262 [(set v4f32:$XT, (fsub v4f32:$XA, v4f32:$XB))]>; 263 264 // FMA Instructions 265 let BaseName = "XSMADDADP" in { 266 let isCommutable = 1 in 267 def XSMADDADP : XX3Form<60, 33, 268 (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), 269 "xsmaddadp $XT, $XA, $XB", IIC_VecFP, 270 [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>, 271 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 272 AltVSXFMARel; 273 let IsVSXFMAAlt = 1 in 274 def XSMADDMDP : XX3Form<60, 41, 275 (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), 276 "xsmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, 277 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 278 AltVSXFMARel; 279 } 280 281 let BaseName = "XSMSUBADP" in { 282 let isCommutable = 1 in 283 def XSMSUBADP : XX3Form<60, 49, 284 (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), 285 "xsmsubadp $XT, $XA, $XB", IIC_VecFP, 286 [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>, 287 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 288 AltVSXFMARel; 289 let IsVSXFMAAlt = 1 in 290 def XSMSUBMDP : XX3Form<60, 57, 291 (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), 292 "xsmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, 293 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 294 AltVSXFMARel; 295 } 296 297 let BaseName = "XSNMADDADP" in { 298 let isCommutable = 1 in 299 def XSNMADDADP : XX3Form<60, 161, 300 (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), 301 "xsnmaddadp $XT, $XA, $XB", IIC_VecFP, 302 [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>, 303 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 304 AltVSXFMARel; 305 let IsVSXFMAAlt = 1 in 306 def XSNMADDMDP : XX3Form<60, 169, 307 (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), 308 "xsnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, 309 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 310 AltVSXFMARel; 311 } 312 313 let BaseName = "XSNMSUBADP" in { 314 let isCommutable = 1 in 315 def XSNMSUBADP : XX3Form<60, 177, 316 (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), 317 "xsnmsubadp $XT, $XA, $XB", IIC_VecFP, 318 [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>, 319 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 320 AltVSXFMARel; 321 let IsVSXFMAAlt = 1 in 322 def XSNMSUBMDP : XX3Form<60, 185, 323 (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), 324 "xsnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, 325 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 326 AltVSXFMARel; 327 } 328 329 let BaseName = "XVMADDADP" in { 330 let isCommutable = 1 in 331 def XVMADDADP : XX3Form<60, 97, 332 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), 333 "xvmaddadp $XT, $XA, $XB", IIC_VecFP, 334 [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>, 335 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 336 AltVSXFMARel; 337 let IsVSXFMAAlt = 1 in 338 def XVMADDMDP : XX3Form<60, 105, 339 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), 340 "xvmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, 341 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 342 AltVSXFMARel; 343 } 344 345 let BaseName = "XVMADDASP" in { 346 let isCommutable = 1 in 347 def XVMADDASP : XX3Form<60, 65, 348 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), 349 "xvmaddasp $XT, $XA, $XB", IIC_VecFP, 350 [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>, 351 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 352 AltVSXFMARel; 353 let IsVSXFMAAlt = 1 in 354 def XVMADDMSP : XX3Form<60, 73, 355 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), 356 "xvmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, 357 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 358 AltVSXFMARel; 359 } 360 361 let BaseName = "XVMSUBADP" in { 362 let isCommutable = 1 in 363 def XVMSUBADP : XX3Form<60, 113, 364 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), 365 "xvmsubadp $XT, $XA, $XB", IIC_VecFP, 366 [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>, 367 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 368 AltVSXFMARel; 369 let IsVSXFMAAlt = 1 in 370 def XVMSUBMDP : XX3Form<60, 121, 371 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), 372 "xvmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, 373 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 374 AltVSXFMARel; 375 } 376 377 let BaseName = "XVMSUBASP" in { 378 let isCommutable = 1 in 379 def XVMSUBASP : XX3Form<60, 81, 380 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), 381 "xvmsubasp $XT, $XA, $XB", IIC_VecFP, 382 [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>, 383 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 384 AltVSXFMARel; 385 let IsVSXFMAAlt = 1 in 386 def XVMSUBMSP : XX3Form<60, 89, 387 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), 388 "xvmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, 389 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 390 AltVSXFMARel; 391 } 392 393 let BaseName = "XVNMADDADP" in { 394 let isCommutable = 1 in 395 def XVNMADDADP : XX3Form<60, 225, 396 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), 397 "xvnmaddadp $XT, $XA, $XB", IIC_VecFP, 398 [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>, 399 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 400 AltVSXFMARel; 401 let IsVSXFMAAlt = 1 in 402 def XVNMADDMDP : XX3Form<60, 233, 403 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), 404 "xvnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, 405 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 406 AltVSXFMARel; 407 } 408 409 let BaseName = "XVNMADDASP" in { 410 let isCommutable = 1 in 411 def XVNMADDASP : XX3Form<60, 193, 412 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), 413 "xvnmaddasp $XT, $XA, $XB", IIC_VecFP, 414 [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi)))]>, 415 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 416 AltVSXFMARel; 417 let IsVSXFMAAlt = 1 in 418 def XVNMADDMSP : XX3Form<60, 201, 419 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), 420 "xvnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, 421 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 422 AltVSXFMARel; 423 } 424 425 let BaseName = "XVNMSUBADP" in { 426 let isCommutable = 1 in 427 def XVNMSUBADP : XX3Form<60, 241, 428 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), 429 "xvnmsubadp $XT, $XA, $XB", IIC_VecFP, 430 [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>, 431 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 432 AltVSXFMARel; 433 let IsVSXFMAAlt = 1 in 434 def XVNMSUBMDP : XX3Form<60, 249, 435 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), 436 "xvnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, 437 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 438 AltVSXFMARel; 439 } 440 441 let BaseName = "XVNMSUBASP" in { 442 let isCommutable = 1 in 443 def XVNMSUBASP : XX3Form<60, 209, 444 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), 445 "xvnmsubasp $XT, $XA, $XB", IIC_VecFP, 446 [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>, 447 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 448 AltVSXFMARel; 449 let IsVSXFMAAlt = 1 in 450 def XVNMSUBMSP : XX3Form<60, 217, 451 (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), 452 "xvnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, 453 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 454 AltVSXFMARel; 455 } 456 457 // Division Instructions 458 def XSDIVDP : XX3Form<60, 56, 459 (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), 460 "xsdivdp $XT, $XA, $XB", IIC_FPDivD, 461 [(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>; 462 def XSSQRTDP : XX2Form<60, 75, 463 (outs vsfrc:$XT), (ins vsfrc:$XB), 464 "xssqrtdp $XT, $XB", IIC_FPSqrtD, 465 [(set f64:$XT, (fsqrt f64:$XB))]>; 466 467 def XSREDP : XX2Form<60, 90, 468 (outs vsfrc:$XT), (ins vsfrc:$XB), 469 "xsredp $XT, $XB", IIC_VecFP, 470 [(set f64:$XT, (PPCfre f64:$XB))]>; 471 def XSRSQRTEDP : XX2Form<60, 74, 472 (outs vsfrc:$XT), (ins vsfrc:$XB), 473 "xsrsqrtedp $XT, $XB", IIC_VecFP, 474 [(set f64:$XT, (PPCfrsqrte f64:$XB))]>; 475 476 def XSTDIVDP : XX3Form_1<60, 61, 477 (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), 478 "xstdivdp $crD, $XA, $XB", IIC_FPCompare, []>; 479 def XSTSQRTDP : XX2Form_1<60, 106, 480 (outs crrc:$crD), (ins vsfrc:$XB), 481 "xstsqrtdp $crD, $XB", IIC_FPCompare, []>; 482 483 def XVDIVDP : XX3Form<60, 120, 484 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 485 "xvdivdp $XT, $XA, $XB", IIC_FPDivD, 486 [(set v2f64:$XT, (fdiv v2f64:$XA, v2f64:$XB))]>; 487 def XVDIVSP : XX3Form<60, 88, 488 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 489 "xvdivsp $XT, $XA, $XB", IIC_FPDivS, 490 [(set v4f32:$XT, (fdiv v4f32:$XA, v4f32:$XB))]>; 491 492 def XVSQRTDP : XX2Form<60, 203, 493 (outs vsrc:$XT), (ins vsrc:$XB), 494 "xvsqrtdp $XT, $XB", IIC_FPSqrtD, 495 [(set v2f64:$XT, (fsqrt v2f64:$XB))]>; 496 def XVSQRTSP : XX2Form<60, 139, 497 (outs vsrc:$XT), (ins vsrc:$XB), 498 "xvsqrtsp $XT, $XB", IIC_FPSqrtS, 499 [(set v4f32:$XT, (fsqrt v4f32:$XB))]>; 500 501 def XVTDIVDP : XX3Form_1<60, 125, 502 (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), 503 "xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>; 504 def XVTDIVSP : XX3Form_1<60, 93, 505 (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB), 506 "xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>; 507 508 def XVTSQRTDP : XX2Form_1<60, 234, 509 (outs crrc:$crD), (ins vsrc:$XB), 510 "xvtsqrtdp $crD, $XB", IIC_FPCompare, []>; 511 def XVTSQRTSP : XX2Form_1<60, 170, 512 (outs crrc:$crD), (ins vsrc:$XB), 513 "xvtsqrtsp $crD, $XB", IIC_FPCompare, []>; 514 515 def XVREDP : XX2Form<60, 218, 516 (outs vsrc:$XT), (ins vsrc:$XB), 517 "xvredp $XT, $XB", IIC_VecFP, 518 [(set v2f64:$XT, (PPCfre v2f64:$XB))]>; 519 def XVRESP : XX2Form<60, 154, 520 (outs vsrc:$XT), (ins vsrc:$XB), 521 "xvresp $XT, $XB", IIC_VecFP, 522 [(set v4f32:$XT, (PPCfre v4f32:$XB))]>; 523 524 def XVRSQRTEDP : XX2Form<60, 202, 525 (outs vsrc:$XT), (ins vsrc:$XB), 526 "xvrsqrtedp $XT, $XB", IIC_VecFP, 527 [(set v2f64:$XT, (PPCfrsqrte v2f64:$XB))]>; 528 def XVRSQRTESP : XX2Form<60, 138, 529 (outs vsrc:$XT), (ins vsrc:$XB), 530 "xvrsqrtesp $XT, $XB", IIC_VecFP, 531 [(set v4f32:$XT, (PPCfrsqrte v4f32:$XB))]>; 532 533 // Compare Instructions 534 def XSCMPODP : XX3Form_1<60, 43, 535 (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), 536 "xscmpodp $crD, $XA, $XB", IIC_FPCompare, []>; 537 def XSCMPUDP : XX3Form_1<60, 35, 538 (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), 539 "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>; 540 541 defm XVCMPEQDP : XX3Form_Rcr<60, 99, 542 "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, 543 int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>; 544 defm XVCMPEQSP : XX3Form_Rcr<60, 67, 545 "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare, 546 int_ppc_vsx_xvcmpeqsp, v4i32, v4f32>; 547 defm XVCMPGEDP : XX3Form_Rcr<60, 115, 548 "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare, 549 int_ppc_vsx_xvcmpgedp, v2i64, v2f64>; 550 defm XVCMPGESP : XX3Form_Rcr<60, 83, 551 "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare, 552 int_ppc_vsx_xvcmpgesp, v4i32, v4f32>; 553 defm XVCMPGTDP : XX3Form_Rcr<60, 107, 554 "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare, 555 int_ppc_vsx_xvcmpgtdp, v2i64, v2f64>; 556 defm XVCMPGTSP : XX3Form_Rcr<60, 75, 557 "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare, 558 int_ppc_vsx_xvcmpgtsp, v4i32, v4f32>; 559 560 // Move Instructions 561 def XSABSDP : XX2Form<60, 345, 562 (outs vsfrc:$XT), (ins vsfrc:$XB), 563 "xsabsdp $XT, $XB", IIC_VecFP, 564 [(set f64:$XT, (fabs f64:$XB))]>; 565 def XSNABSDP : XX2Form<60, 361, 566 (outs vsfrc:$XT), (ins vsfrc:$XB), 567 "xsnabsdp $XT, $XB", IIC_VecFP, 568 [(set f64:$XT, (fneg (fabs f64:$XB)))]>; 569 def XSNEGDP : XX2Form<60, 377, 570 (outs vsfrc:$XT), (ins vsfrc:$XB), 571 "xsnegdp $XT, $XB", IIC_VecFP, 572 [(set f64:$XT, (fneg f64:$XB))]>; 573 def XSCPSGNDP : XX3Form<60, 176, 574 (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), 575 "xscpsgndp $XT, $XA, $XB", IIC_VecFP, 576 [(set f64:$XT, (fcopysign f64:$XB, f64:$XA))]>; 577 578 def XVABSDP : XX2Form<60, 473, 579 (outs vsrc:$XT), (ins vsrc:$XB), 580 "xvabsdp $XT, $XB", IIC_VecFP, 581 [(set v2f64:$XT, (fabs v2f64:$XB))]>; 582 583 def XVABSSP : XX2Form<60, 409, 584 (outs vsrc:$XT), (ins vsrc:$XB), 585 "xvabssp $XT, $XB", IIC_VecFP, 586 [(set v4f32:$XT, (fabs v4f32:$XB))]>; 587 588 def XVCPSGNDP : XX3Form<60, 240, 589 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 590 "xvcpsgndp $XT, $XA, $XB", IIC_VecFP, 591 [(set v2f64:$XT, (fcopysign v2f64:$XB, v2f64:$XA))]>; 592 def XVCPSGNSP : XX3Form<60, 208, 593 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 594 "xvcpsgnsp $XT, $XA, $XB", IIC_VecFP, 595 [(set v4f32:$XT, (fcopysign v4f32:$XB, v4f32:$XA))]>; 596 597 def XVNABSDP : XX2Form<60, 489, 598 (outs vsrc:$XT), (ins vsrc:$XB), 599 "xvnabsdp $XT, $XB", IIC_VecFP, 600 [(set v2f64:$XT, (fneg (fabs v2f64:$XB)))]>; 601 def XVNABSSP : XX2Form<60, 425, 602 (outs vsrc:$XT), (ins vsrc:$XB), 603 "xvnabssp $XT, $XB", IIC_VecFP, 604 [(set v4f32:$XT, (fneg (fabs v4f32:$XB)))]>; 605 606 def XVNEGDP : XX2Form<60, 505, 607 (outs vsrc:$XT), (ins vsrc:$XB), 608 "xvnegdp $XT, $XB", IIC_VecFP, 609 [(set v2f64:$XT, (fneg v2f64:$XB))]>; 610 def XVNEGSP : XX2Form<60, 441, 611 (outs vsrc:$XT), (ins vsrc:$XB), 612 "xvnegsp $XT, $XB", IIC_VecFP, 613 [(set v4f32:$XT, (fneg v4f32:$XB))]>; 614 615 // Conversion Instructions 616 def XSCVDPSP : XX2Form<60, 265, 617 (outs vsfrc:$XT), (ins vsfrc:$XB), 618 "xscvdpsp $XT, $XB", IIC_VecFP, []>; 619 def XSCVDPSXDS : XX2Form<60, 344, 620 (outs vsfrc:$XT), (ins vsfrc:$XB), 621 "xscvdpsxds $XT, $XB", IIC_VecFP, 622 [(set f64:$XT, (PPCfctidz f64:$XB))]>; 623 let isCodeGenOnly = 1 in 624 def XSCVDPSXDSs : XX2Form<60, 344, 625 (outs vssrc:$XT), (ins vssrc:$XB), 626 "xscvdpsxds $XT, $XB", IIC_VecFP, 627 [(set f32:$XT, (PPCfctidz f32:$XB))]>; 628 def XSCVDPSXWS : XX2Form<60, 88, 629 (outs vsfrc:$XT), (ins vsfrc:$XB), 630 "xscvdpsxws $XT, $XB", IIC_VecFP, 631 [(set f64:$XT, (PPCfctiwz f64:$XB))]>; 632 let isCodeGenOnly = 1 in 633 def XSCVDPSXWSs : XX2Form<60, 88, 634 (outs vssrc:$XT), (ins vssrc:$XB), 635 "xscvdpsxws $XT, $XB", IIC_VecFP, 636 [(set f32:$XT, (PPCfctiwz f32:$XB))]>; 637 def XSCVDPUXDS : XX2Form<60, 328, 638 (outs vsfrc:$XT), (ins vsfrc:$XB), 639 "xscvdpuxds $XT, $XB", IIC_VecFP, 640 [(set f64:$XT, (PPCfctiduz f64:$XB))]>; 641 let isCodeGenOnly = 1 in 642 def XSCVDPUXDSs : XX2Form<60, 328, 643 (outs vssrc:$XT), (ins vssrc:$XB), 644 "xscvdpuxds $XT, $XB", IIC_VecFP, 645 [(set f32:$XT, (PPCfctiduz f32:$XB))]>; 646 def XSCVDPUXWS : XX2Form<60, 72, 647 (outs vsfrc:$XT), (ins vsfrc:$XB), 648 "xscvdpuxws $XT, $XB", IIC_VecFP, 649 [(set f64:$XT, (PPCfctiwuz f64:$XB))]>; 650 let isCodeGenOnly = 1 in 651 def XSCVDPUXWSs : XX2Form<60, 72, 652 (outs vssrc:$XT), (ins vssrc:$XB), 653 "xscvdpuxws $XT, $XB", IIC_VecFP, 654 [(set f32:$XT, (PPCfctiwuz f32:$XB))]>; 655 def XSCVSPDP : XX2Form<60, 329, 656 (outs vsfrc:$XT), (ins vsfrc:$XB), 657 "xscvspdp $XT, $XB", IIC_VecFP, []>; 658 def XSCVSXDDP : XX2Form<60, 376, 659 (outs vsfrc:$XT), (ins vsfrc:$XB), 660 "xscvsxddp $XT, $XB", IIC_VecFP, 661 [(set f64:$XT, (PPCfcfid f64:$XB))]>; 662 def XSCVUXDDP : XX2Form<60, 360, 663 (outs vsfrc:$XT), (ins vsfrc:$XB), 664 "xscvuxddp $XT, $XB", IIC_VecFP, 665 [(set f64:$XT, (PPCfcfidu f64:$XB))]>; 666 667 def XVCVDPSP : XX2Form<60, 393, 668 (outs vsrc:$XT), (ins vsrc:$XB), 669 "xvcvdpsp $XT, $XB", IIC_VecFP, 670 [(set v4f32:$XT, (int_ppc_vsx_xvcvdpsp v2f64:$XB))]>; 671 def XVCVDPSXDS : XX2Form<60, 472, 672 (outs vsrc:$XT), (ins vsrc:$XB), 673 "xvcvdpsxds $XT, $XB", IIC_VecFP, 674 [(set v2i64:$XT, (fp_to_sint v2f64:$XB))]>; 675 def XVCVDPSXWS : XX2Form<60, 216, 676 (outs vsrc:$XT), (ins vsrc:$XB), 677 "xvcvdpsxws $XT, $XB", IIC_VecFP, 678 [(set v4i32:$XT, (int_ppc_vsx_xvcvdpsxws v2f64:$XB))]>; 679 def XVCVDPUXDS : XX2Form<60, 456, 680 (outs vsrc:$XT), (ins vsrc:$XB), 681 "xvcvdpuxds $XT, $XB", IIC_VecFP, 682 [(set v2i64:$XT, (fp_to_uint v2f64:$XB))]>; 683 def XVCVDPUXWS : XX2Form<60, 200, 684 (outs vsrc:$XT), (ins vsrc:$XB), 685 "xvcvdpuxws $XT, $XB", IIC_VecFP, 686 [(set v4i32:$XT, (int_ppc_vsx_xvcvdpuxws v2f64:$XB))]>; 687 688 def XVCVSPDP : XX2Form<60, 457, 689 (outs vsrc:$XT), (ins vsrc:$XB), 690 "xvcvspdp $XT, $XB", IIC_VecFP, 691 [(set v2f64:$XT, (int_ppc_vsx_xvcvspdp v4f32:$XB))]>; 692 def XVCVSPSXDS : XX2Form<60, 408, 693 (outs vsrc:$XT), (ins vsrc:$XB), 694 "xvcvspsxds $XT, $XB", IIC_VecFP, []>; 695 def XVCVSPSXWS : XX2Form<60, 152, 696 (outs vsrc:$XT), (ins vsrc:$XB), 697 "xvcvspsxws $XT, $XB", IIC_VecFP, 698 [(set v4i32:$XT, (fp_to_sint v4f32:$XB))]>; 699 def XVCVSPUXDS : XX2Form<60, 392, 700 (outs vsrc:$XT), (ins vsrc:$XB), 701 "xvcvspuxds $XT, $XB", IIC_VecFP, []>; 702 def XVCVSPUXWS : XX2Form<60, 136, 703 (outs vsrc:$XT), (ins vsrc:$XB), 704 "xvcvspuxws $XT, $XB", IIC_VecFP, 705 [(set v4i32:$XT, (fp_to_uint v4f32:$XB))]>; 706 def XVCVSXDDP : XX2Form<60, 504, 707 (outs vsrc:$XT), (ins vsrc:$XB), 708 "xvcvsxddp $XT, $XB", IIC_VecFP, 709 [(set v2f64:$XT, (sint_to_fp v2i64:$XB))]>; 710 def XVCVSXDSP : XX2Form<60, 440, 711 (outs vsrc:$XT), (ins vsrc:$XB), 712 "xvcvsxdsp $XT, $XB", IIC_VecFP, 713 [(set v4f32:$XT, (int_ppc_vsx_xvcvsxdsp v2i64:$XB))]>; 714 def XVCVSXWDP : XX2Form<60, 248, 715 (outs vsrc:$XT), (ins vsrc:$XB), 716 "xvcvsxwdp $XT, $XB", IIC_VecFP, 717 [(set v2f64:$XT, (int_ppc_vsx_xvcvsxwdp v4i32:$XB))]>; 718 def XVCVSXWSP : XX2Form<60, 184, 719 (outs vsrc:$XT), (ins vsrc:$XB), 720 "xvcvsxwsp $XT, $XB", IIC_VecFP, 721 [(set v4f32:$XT, (sint_to_fp v4i32:$XB))]>; 722 def XVCVUXDDP : XX2Form<60, 488, 723 (outs vsrc:$XT), (ins vsrc:$XB), 724 "xvcvuxddp $XT, $XB", IIC_VecFP, 725 [(set v2f64:$XT, (uint_to_fp v2i64:$XB))]>; 726 def XVCVUXDSP : XX2Form<60, 424, 727 (outs vsrc:$XT), (ins vsrc:$XB), 728 "xvcvuxdsp $XT, $XB", IIC_VecFP, 729 [(set v4f32:$XT, (int_ppc_vsx_xvcvuxdsp v2i64:$XB))]>; 730 def XVCVUXWDP : XX2Form<60, 232, 731 (outs vsrc:$XT), (ins vsrc:$XB), 732 "xvcvuxwdp $XT, $XB", IIC_VecFP, 733 [(set v2f64:$XT, (int_ppc_vsx_xvcvuxwdp v4i32:$XB))]>; 734 def XVCVUXWSP : XX2Form<60, 168, 735 (outs vsrc:$XT), (ins vsrc:$XB), 736 "xvcvuxwsp $XT, $XB", IIC_VecFP, 737 [(set v4f32:$XT, (uint_to_fp v4i32:$XB))]>; 738 739 // Rounding Instructions 740 def XSRDPI : XX2Form<60, 73, 741 (outs vsfrc:$XT), (ins vsfrc:$XB), 742 "xsrdpi $XT, $XB", IIC_VecFP, 743 [(set f64:$XT, (fround f64:$XB))]>; 744 def XSRDPIC : XX2Form<60, 107, 745 (outs vsfrc:$XT), (ins vsfrc:$XB), 746 "xsrdpic $XT, $XB", IIC_VecFP, 747 [(set f64:$XT, (fnearbyint f64:$XB))]>; 748 def XSRDPIM : XX2Form<60, 121, 749 (outs vsfrc:$XT), (ins vsfrc:$XB), 750 "xsrdpim $XT, $XB", IIC_VecFP, 751 [(set f64:$XT, (ffloor f64:$XB))]>; 752 def XSRDPIP : XX2Form<60, 105, 753 (outs vsfrc:$XT), (ins vsfrc:$XB), 754 "xsrdpip $XT, $XB", IIC_VecFP, 755 [(set f64:$XT, (fceil f64:$XB))]>; 756 def XSRDPIZ : XX2Form<60, 89, 757 (outs vsfrc:$XT), (ins vsfrc:$XB), 758 "xsrdpiz $XT, $XB", IIC_VecFP, 759 [(set f64:$XT, (ftrunc f64:$XB))]>; 760 761 def XVRDPI : XX2Form<60, 201, 762 (outs vsrc:$XT), (ins vsrc:$XB), 763 "xvrdpi $XT, $XB", IIC_VecFP, 764 [(set v2f64:$XT, (fround v2f64:$XB))]>; 765 def XVRDPIC : XX2Form<60, 235, 766 (outs vsrc:$XT), (ins vsrc:$XB), 767 "xvrdpic $XT, $XB", IIC_VecFP, 768 [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>; 769 def XVRDPIM : XX2Form<60, 249, 770 (outs vsrc:$XT), (ins vsrc:$XB), 771 "xvrdpim $XT, $XB", IIC_VecFP, 772 [(set v2f64:$XT, (ffloor v2f64:$XB))]>; 773 def XVRDPIP : XX2Form<60, 233, 774 (outs vsrc:$XT), (ins vsrc:$XB), 775 "xvrdpip $XT, $XB", IIC_VecFP, 776 [(set v2f64:$XT, (fceil v2f64:$XB))]>; 777 def XVRDPIZ : XX2Form<60, 217, 778 (outs vsrc:$XT), (ins vsrc:$XB), 779 "xvrdpiz $XT, $XB", IIC_VecFP, 780 [(set v2f64:$XT, (ftrunc v2f64:$XB))]>; 781 782 def XVRSPI : XX2Form<60, 137, 783 (outs vsrc:$XT), (ins vsrc:$XB), 784 "xvrspi $XT, $XB", IIC_VecFP, 785 [(set v4f32:$XT, (fround v4f32:$XB))]>; 786 def XVRSPIC : XX2Form<60, 171, 787 (outs vsrc:$XT), (ins vsrc:$XB), 788 "xvrspic $XT, $XB", IIC_VecFP, 789 [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>; 790 def XVRSPIM : XX2Form<60, 185, 791 (outs vsrc:$XT), (ins vsrc:$XB), 792 "xvrspim $XT, $XB", IIC_VecFP, 793 [(set v4f32:$XT, (ffloor v4f32:$XB))]>; 794 def XVRSPIP : XX2Form<60, 169, 795 (outs vsrc:$XT), (ins vsrc:$XB), 796 "xvrspip $XT, $XB", IIC_VecFP, 797 [(set v4f32:$XT, (fceil v4f32:$XB))]>; 798 def XVRSPIZ : XX2Form<60, 153, 799 (outs vsrc:$XT), (ins vsrc:$XB), 800 "xvrspiz $XT, $XB", IIC_VecFP, 801 [(set v4f32:$XT, (ftrunc v4f32:$XB))]>; 802 803 // Max/Min Instructions 804 let isCommutable = 1 in { 805 def XSMAXDP : XX3Form<60, 160, 806 (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), 807 "xsmaxdp $XT, $XA, $XB", IIC_VecFP, 808 [(set vsfrc:$XT, 809 (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>; 810 def XSMINDP : XX3Form<60, 168, 811 (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), 812 "xsmindp $XT, $XA, $XB", IIC_VecFP, 813 [(set vsfrc:$XT, 814 (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>; 815 816 def XVMAXDP : XX3Form<60, 224, 817 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 818 "xvmaxdp $XT, $XA, $XB", IIC_VecFP, 819 [(set vsrc:$XT, 820 (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>; 821 def XVMINDP : XX3Form<60, 232, 822 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 823 "xvmindp $XT, $XA, $XB", IIC_VecFP, 824 [(set vsrc:$XT, 825 (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>; 826 827 def XVMAXSP : XX3Form<60, 192, 828 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 829 "xvmaxsp $XT, $XA, $XB", IIC_VecFP, 830 [(set vsrc:$XT, 831 (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>; 832 def XVMINSP : XX3Form<60, 200, 833 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 834 "xvminsp $XT, $XA, $XB", IIC_VecFP, 835 [(set vsrc:$XT, 836 (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>; 837 } // isCommutable 838} // Uses = [RM] 839 840 // Logical Instructions 841 let isCommutable = 1 in 842 def XXLAND : XX3Form<60, 130, 843 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 844 "xxland $XT, $XA, $XB", IIC_VecGeneral, 845 [(set v4i32:$XT, (and v4i32:$XA, v4i32:$XB))]>; 846 def XXLANDC : XX3Form<60, 138, 847 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 848 "xxlandc $XT, $XA, $XB", IIC_VecGeneral, 849 [(set v4i32:$XT, (and v4i32:$XA, 850 (vnot_ppc v4i32:$XB)))]>; 851 let isCommutable = 1 in { 852 def XXLNOR : XX3Form<60, 162, 853 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 854 "xxlnor $XT, $XA, $XB", IIC_VecGeneral, 855 [(set v4i32:$XT, (vnot_ppc (or v4i32:$XA, 856 v4i32:$XB)))]>; 857 def XXLOR : XX3Form<60, 146, 858 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 859 "xxlor $XT, $XA, $XB", IIC_VecGeneral, 860 [(set v4i32:$XT, (or v4i32:$XA, v4i32:$XB))]>; 861 let isCodeGenOnly = 1 in 862 def XXLORf: XX3Form<60, 146, 863 (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB), 864 "xxlor $XT, $XA, $XB", IIC_VecGeneral, []>; 865 def XXLXOR : XX3Form<60, 154, 866 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 867 "xxlxor $XT, $XA, $XB", IIC_VecGeneral, 868 [(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>; 869 } // isCommutable 870 871 let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1, 872 isReMaterializable = 1 in { 873 def XXLXORz : XX3Form_SameOp<60, 154, (outs vsrc:$XT), (ins), 874 "xxlxor $XT, $XT, $XT", IIC_VecGeneral, 875 [(set v4i32:$XT, (v4i32 immAllZerosV))]>; 876 def XXLXORdpz : XX3Form_SameOp<60, 154, 877 (outs vsfrc:$XT), (ins), 878 "xxlxor $XT, $XT, $XT", IIC_VecGeneral, 879 [(set f64:$XT, (fpimm0))]>; 880 def XXLXORspz : XX3Form_SameOp<60, 154, 881 (outs vssrc:$XT), (ins), 882 "xxlxor $XT, $XT, $XT", IIC_VecGeneral, 883 [(set f32:$XT, (fpimm0))]>; 884 } 885 886 // Permutation Instructions 887 def XXMRGHW : XX3Form<60, 18, 888 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 889 "xxmrghw $XT, $XA, $XB", IIC_VecPerm, []>; 890 def XXMRGLW : XX3Form<60, 50, 891 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 892 "xxmrglw $XT, $XA, $XB", IIC_VecPerm, []>; 893 894 def XXPERMDI : XX3Form_2<60, 10, 895 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM), 896 "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, 897 [(set v2i64:$XT, (PPCxxpermdi v2i64:$XA, v2i64:$XB, 898 imm32SExt16:$DM))]>; 899 let isCodeGenOnly = 1 in 900 def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM), 901 "xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>; 902 def XXSEL : XX4Form<60, 3, 903 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC), 904 "xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>; 905 906 def XXSLDWI : XX3Form_2<60, 2, 907 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW), 908 "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, 909 [(set v4i32:$XT, (PPCvecshl v4i32:$XA, v4i32:$XB, 910 imm32SExt16:$SHW))]>; 911 912 let isCodeGenOnly = 1 in 913 def XXSLDWIs : XX3Form_2s<60, 2, 914 (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$SHW), 915 "xxsldwi $XT, $XA, $XA, $SHW", IIC_VecPerm, []>; 916 917 def XXSPLTW : XX2Form_2<60, 164, 918 (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), 919 "xxspltw $XT, $XB, $UIM", IIC_VecPerm, 920 [(set v4i32:$XT, 921 (PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>; 922 let isCodeGenOnly = 1 in 923 def XXSPLTWs : XX2Form_2<60, 164, 924 (outs vsrc:$XT), (ins vsfrc:$XB, u2imm:$UIM), 925 "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; 926 927} // hasSideEffects 928 929// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after 930// instruction selection into a branch sequence. 931let PPC970_Single = 1 in { 932 933 def SELECT_CC_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst), 934 (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC), 935 "#SELECT_CC_VSRC", 936 []>; 937 def SELECT_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst), 938 (ins crbitrc:$cond, vsrc:$T, vsrc:$F), 939 "#SELECT_VSRC", 940 [(set v2f64:$dst, 941 (select i1:$cond, v2f64:$T, v2f64:$F))]>; 942 def SELECT_CC_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst), 943 (ins crrc:$cond, f8rc:$T, f8rc:$F, 944 i32imm:$BROPC), "#SELECT_CC_VSFRC", 945 []>; 946 def SELECT_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst), 947 (ins crbitrc:$cond, f8rc:$T, f8rc:$F), 948 "#SELECT_VSFRC", 949 [(set f64:$dst, 950 (select i1:$cond, f64:$T, f64:$F))]>; 951 def SELECT_CC_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst), 952 (ins crrc:$cond, f4rc:$T, f4rc:$F, 953 i32imm:$BROPC), "#SELECT_CC_VSSRC", 954 []>; 955 def SELECT_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst), 956 (ins crbitrc:$cond, f4rc:$T, f4rc:$F), 957 "#SELECT_VSSRC", 958 [(set f32:$dst, 959 (select i1:$cond, f32:$T, f32:$F))]>; 960} 961} // AddedComplexity 962 963def : InstAlias<"xvmovdp $XT, $XB", 964 (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>; 965def : InstAlias<"xvmovsp $XT, $XB", 966 (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>; 967 968def : InstAlias<"xxspltd $XT, $XB, 0", 969 (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>; 970def : InstAlias<"xxspltd $XT, $XB, 1", 971 (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>; 972def : InstAlias<"xxmrghd $XT, $XA, $XB", 973 (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>; 974def : InstAlias<"xxmrgld $XT, $XA, $XB", 975 (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>; 976def : InstAlias<"xxswapd $XT, $XB", 977 (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>; 978def : InstAlias<"xxspltd $XT, $XB, 0", 979 (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>; 980def : InstAlias<"xxspltd $XT, $XB, 1", 981 (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>; 982def : InstAlias<"xxswapd $XT, $XB", 983 (XXPERMDIs vsrc:$XT, vsfrc:$XB, 2)>; 984 985let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. 986 987def : Pat<(v4i32 (vnot_ppc v4i32:$A)), 988 (v4i32 (XXLNOR $A, $A))>; 989def : Pat<(v4i32 (or (and (vnot_ppc v4i32:$C), v4i32:$A), 990 (and v4i32:$B, v4i32:$C))), 991 (v4i32 (XXSEL $A, $B, $C))>; 992 993let Predicates = [IsBigEndian] in { 994def : Pat<(v2f64 (scalar_to_vector f64:$A)), 995 (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>; 996 997def : Pat<(f64 (extractelt v2f64:$S, 0)), 998 (f64 (EXTRACT_SUBREG $S, sub_64))>; 999def : Pat<(f64 (extractelt v2f64:$S, 1)), 1000 (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>; 1001} 1002 1003let Predicates = [IsLittleEndian] in { 1004def : Pat<(v2f64 (scalar_to_vector f64:$A)), 1005 (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64), 1006 (SUBREG_TO_REG (i64 1), $A, sub_64), 0))>; 1007 1008def : Pat<(f64 (extractelt v2f64:$S, 0)), 1009 (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>; 1010def : Pat<(f64 (extractelt v2f64:$S, 1)), 1011 (f64 (EXTRACT_SUBREG $S, sub_64))>; 1012} 1013 1014// Additional fnmsub patterns: -a*b + c == -(a*b - c) 1015def : Pat<(fma (fneg f64:$A), f64:$B, f64:$C), 1016 (XSNMSUBADP $C, $A, $B)>; 1017def : Pat<(fma f64:$A, (fneg f64:$B), f64:$C), 1018 (XSNMSUBADP $C, $A, $B)>; 1019 1020def : Pat<(fma (fneg v2f64:$A), v2f64:$B, v2f64:$C), 1021 (XVNMSUBADP $C, $A, $B)>; 1022def : Pat<(fma v2f64:$A, (fneg v2f64:$B), v2f64:$C), 1023 (XVNMSUBADP $C, $A, $B)>; 1024 1025def : Pat<(fma (fneg v4f32:$A), v4f32:$B, v4f32:$C), 1026 (XVNMSUBASP $C, $A, $B)>; 1027def : Pat<(fma v4f32:$A, (fneg v4f32:$B), v4f32:$C), 1028 (XVNMSUBASP $C, $A, $B)>; 1029 1030def : Pat<(v2f64 (bitconvert v4f32:$A)), 1031 (COPY_TO_REGCLASS $A, VSRC)>; 1032def : Pat<(v2f64 (bitconvert v4i32:$A)), 1033 (COPY_TO_REGCLASS $A, VSRC)>; 1034def : Pat<(v2f64 (bitconvert v8i16:$A)), 1035 (COPY_TO_REGCLASS $A, VSRC)>; 1036def : Pat<(v2f64 (bitconvert v16i8:$A)), 1037 (COPY_TO_REGCLASS $A, VSRC)>; 1038 1039def : Pat<(v4f32 (bitconvert v2f64:$A)), 1040 (COPY_TO_REGCLASS $A, VRRC)>; 1041def : Pat<(v4i32 (bitconvert v2f64:$A)), 1042 (COPY_TO_REGCLASS $A, VRRC)>; 1043def : Pat<(v8i16 (bitconvert v2f64:$A)), 1044 (COPY_TO_REGCLASS $A, VRRC)>; 1045def : Pat<(v16i8 (bitconvert v2f64:$A)), 1046 (COPY_TO_REGCLASS $A, VRRC)>; 1047 1048def : Pat<(v2i64 (bitconvert v4f32:$A)), 1049 (COPY_TO_REGCLASS $A, VSRC)>; 1050def : Pat<(v2i64 (bitconvert v4i32:$A)), 1051 (COPY_TO_REGCLASS $A, VSRC)>; 1052def : Pat<(v2i64 (bitconvert v8i16:$A)), 1053 (COPY_TO_REGCLASS $A, VSRC)>; 1054def : Pat<(v2i64 (bitconvert v16i8:$A)), 1055 (COPY_TO_REGCLASS $A, VSRC)>; 1056 1057def : Pat<(v4f32 (bitconvert v2i64:$A)), 1058 (COPY_TO_REGCLASS $A, VRRC)>; 1059def : Pat<(v4i32 (bitconvert v2i64:$A)), 1060 (COPY_TO_REGCLASS $A, VRRC)>; 1061def : Pat<(v8i16 (bitconvert v2i64:$A)), 1062 (COPY_TO_REGCLASS $A, VRRC)>; 1063def : Pat<(v16i8 (bitconvert v2i64:$A)), 1064 (COPY_TO_REGCLASS $A, VRRC)>; 1065 1066def : Pat<(v2f64 (bitconvert v2i64:$A)), 1067 (COPY_TO_REGCLASS $A, VRRC)>; 1068def : Pat<(v2i64 (bitconvert v2f64:$A)), 1069 (COPY_TO_REGCLASS $A, VRRC)>; 1070 1071def : Pat<(v2f64 (bitconvert v1i128:$A)), 1072 (COPY_TO_REGCLASS $A, VRRC)>; 1073def : Pat<(v1i128 (bitconvert v2f64:$A)), 1074 (COPY_TO_REGCLASS $A, VRRC)>; 1075 1076def : Pat<(v2i64 (bitconvert f128:$A)), 1077 (COPY_TO_REGCLASS $A, VRRC)>; 1078def : Pat<(v4i32 (bitconvert f128:$A)), 1079 (COPY_TO_REGCLASS $A, VRRC)>; 1080def : Pat<(v8i16 (bitconvert f128:$A)), 1081 (COPY_TO_REGCLASS $A, VRRC)>; 1082def : Pat<(v16i8 (bitconvert f128:$A)), 1083 (COPY_TO_REGCLASS $A, VRRC)>; 1084 1085def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)), 1086 (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>; 1087def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 1)), 1088 (v2f64 (XVCVSXWDP (v2i64 (XXMRGLW $C, $C))))>; 1089 1090def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)), 1091 (v2f64 (XVCVUXWDP (v2i64 (XXMRGHW $C, $C))))>; 1092def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)), 1093 (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>; 1094 1095def : Pat<(v2f64 (PPCfpexth v4f32:$C, 0)), (XVCVSPDP (XXMRGHW $C, $C))>; 1096def : Pat<(v2f64 (PPCfpexth v4f32:$C, 1)), (XVCVSPDP (XXMRGLW $C, $C))>; 1097 1098// Loads. 1099let Predicates = [HasVSX, HasOnlySwappingMemOps] in { 1100 def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>; 1101 1102 // Stores. 1103 def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), 1104 (STXVD2X $rS, xoaddr:$dst)>; 1105 def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; 1106} 1107 1108// Load vector big endian order 1109let Predicates = [IsLittleEndian, HasVSX] in { 1110 def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; 1111 def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; 1112 def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; 1113 def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; 1114 def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; 1115 def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; 1116 def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; 1117 def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; 1118} 1119 1120let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in { 1121 def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; 1122 def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; 1123 def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>; 1124 def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>; 1125 def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; 1126 def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; 1127 def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>; 1128 def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), 1129 (STXVW4X $rS, xoaddr:$dst)>; 1130} 1131 1132// Permutes. 1133def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>; 1134def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>; 1135def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>; 1136def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>; 1137def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>; 1138 1139// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and 1140// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable. 1141def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>; 1142 1143// Selects. 1144def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), 1145 (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; 1146def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)), 1147 (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; 1148def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)), 1149 (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>; 1150def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)), 1151 (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>; 1152def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)), 1153 (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>; 1154def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)), 1155 (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>; 1156def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)), 1157 (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>; 1158def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)), 1159 (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; 1160def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)), 1161 (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; 1162def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)), 1163 (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>; 1164 1165def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), 1166 (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; 1167def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)), 1168 (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; 1169def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), 1170 (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>; 1171def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)), 1172 (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>; 1173def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), 1174 (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>; 1175def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), 1176 (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>; 1177def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)), 1178 (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>; 1179def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), 1180 (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; 1181def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)), 1182 (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; 1183def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), 1184 (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>; 1185 1186// Divides. 1187def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B), 1188 (XVDIVSP $A, $B)>; 1189def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B), 1190 (XVDIVDP $A, $B)>; 1191 1192// Reciprocal estimate 1193def : Pat<(int_ppc_vsx_xvresp v4f32:$A), 1194 (XVRESP $A)>; 1195def : Pat<(int_ppc_vsx_xvredp v2f64:$A), 1196 (XVREDP $A)>; 1197 1198// Recip. square root estimate 1199def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A), 1200 (XVRSQRTESP $A)>; 1201def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A), 1202 (XVRSQRTEDP $A)>; 1203 1204// Vector selection 1205def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)), 1206 (COPY_TO_REGCLASS 1207 (XXSEL (COPY_TO_REGCLASS $vC, VSRC), 1208 (COPY_TO_REGCLASS $vB, VSRC), 1209 (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>; 1210def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)), 1211 (COPY_TO_REGCLASS 1212 (XXSEL (COPY_TO_REGCLASS $vC, VSRC), 1213 (COPY_TO_REGCLASS $vB, VSRC), 1214 (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>; 1215def : Pat<(vselect v4i32:$vA, v4i32:$vB, v4i32:$vC), 1216 (XXSEL $vC, $vB, $vA)>; 1217def : Pat<(vselect v2i64:$vA, v2i64:$vB, v2i64:$vC), 1218 (XXSEL $vC, $vB, $vA)>; 1219def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC), 1220 (XXSEL $vC, $vB, $vA)>; 1221def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC), 1222 (XXSEL $vC, $vB, $vA)>; 1223 1224def : Pat<(v4f32 (fmaxnum v4f32:$src1, v4f32:$src2)), 1225 (v4f32 (XVMAXSP $src1, $src2))>; 1226def : Pat<(v4f32 (fminnum v4f32:$src1, v4f32:$src2)), 1227 (v4f32 (XVMINSP $src1, $src2))>; 1228def : Pat<(v2f64 (fmaxnum v2f64:$src1, v2f64:$src2)), 1229 (v2f64 (XVMAXDP $src1, $src2))>; 1230def : Pat<(v2f64 (fminnum v2f64:$src1, v2f64:$src2)), 1231 (v2f64 (XVMINDP $src1, $src2))>; 1232 1233let Predicates = [IsLittleEndian] in { 1234def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), 1235 (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; 1236def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), 1237 (f64 (XSCVSXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; 1238def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), 1239 (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; 1240def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), 1241 (f64 (XSCVUXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; 1242} // IsLittleEndian 1243 1244let Predicates = [IsBigEndian] in { 1245def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), 1246 (f64 (XSCVSXDDP (COPY_TO_REGCLASS $S, VSFRC)))>; 1247def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), 1248 (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; 1249def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), 1250 (f64 (XSCVUXDDP (COPY_TO_REGCLASS $S, VSFRC)))>; 1251def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))), 1252 (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; 1253} // IsBigEndian 1254 1255} // AddedComplexity 1256} // HasVSX 1257 1258def FpMinMax { 1259 dag F32Min = (COPY_TO_REGCLASS (XSMINDP (COPY_TO_REGCLASS $A, VSFRC), 1260 (COPY_TO_REGCLASS $B, VSFRC)), 1261 VSSRC); 1262 dag F32Max = (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC), 1263 (COPY_TO_REGCLASS $B, VSFRC)), 1264 VSSRC); 1265} 1266 1267let AddedComplexity = 400, Predicates = [HasVSX] in { 1268 // f32 Min. 1269 def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)), 1270 (f32 FpMinMax.F32Min)>; 1271 def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)), 1272 (f32 FpMinMax.F32Min)>; 1273 def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))), 1274 (f32 FpMinMax.F32Min)>; 1275 def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))), 1276 (f32 FpMinMax.F32Min)>; 1277 // F32 Max. 1278 def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)), 1279 (f32 FpMinMax.F32Max)>; 1280 def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)), 1281 (f32 FpMinMax.F32Max)>; 1282 def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))), 1283 (f32 FpMinMax.F32Max)>; 1284 def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))), 1285 (f32 FpMinMax.F32Max)>; 1286 1287 // f64 Min. 1288 def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)), 1289 (f64 (XSMINDP $A, $B))>; 1290 def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)), 1291 (f64 (XSMINDP $A, $B))>; 1292 def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))), 1293 (f64 (XSMINDP $A, $B))>; 1294 def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))), 1295 (f64 (XSMINDP $A, $B))>; 1296 // f64 Max. 1297 def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)), 1298 (f64 (XSMAXDP $A, $B))>; 1299 def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)), 1300 (f64 (XSMAXDP $A, $B))>; 1301 def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))), 1302 (f64 (XSMAXDP $A, $B))>; 1303 def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))), 1304 (f64 (XSMAXDP $A, $B))>; 1305} 1306 1307def ScalarLoads { 1308 dag Li8 = (i32 (extloadi8 xoaddr:$src)); 1309 dag ZELi8 = (i32 (zextloadi8 xoaddr:$src)); 1310 dag ZELi8i64 = (i64 (zextloadi8 xoaddr:$src)); 1311 dag SELi8 = (i32 (sext_inreg (extloadi8 xoaddr:$src), i8)); 1312 dag SELi8i64 = (i64 (sext_inreg (extloadi8 xoaddr:$src), i8)); 1313 1314 dag Li16 = (i32 (extloadi16 xoaddr:$src)); 1315 dag ZELi16 = (i32 (zextloadi16 xoaddr:$src)); 1316 dag ZELi16i64 = (i64 (zextloadi16 xoaddr:$src)); 1317 dag SELi16 = (i32 (sextloadi16 xoaddr:$src)); 1318 dag SELi16i64 = (i64 (sextloadi16 xoaddr:$src)); 1319 1320 dag Li32 = (i32 (load xoaddr:$src)); 1321} 1322 1323def DWToSPExtractConv { 1324 dag El0US1 = (f32 (PPCfcfidus 1325 (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0)))))); 1326 dag El1US1 = (f32 (PPCfcfidus 1327 (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1)))))); 1328 dag El0US2 = (f32 (PPCfcfidus 1329 (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0)))))); 1330 dag El1US2 = (f32 (PPCfcfidus 1331 (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1)))))); 1332 dag El0SS1 = (f32 (PPCfcfids 1333 (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0)))))); 1334 dag El1SS1 = (f32 (PPCfcfids 1335 (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1)))))); 1336 dag El0SS2 = (f32 (PPCfcfids 1337 (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0)))))); 1338 dag El1SS2 = (f32 (PPCfcfids 1339 (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1)))))); 1340 dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2)); 1341 dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2)); 1342} 1343 1344// The following VSX instructions were introduced in Power ISA 2.07 1345/* FIXME: if the operands are v2i64, these patterns will not match. 1346 we should define new patterns or otherwise match the same patterns 1347 when the elements are larger than i32. 1348*/ 1349def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">; 1350def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">; 1351def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">; 1352let Predicates = [HasP8Vector] in { 1353let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. 1354 let isCommutable = 1 in { 1355 def XXLEQV : XX3Form<60, 186, 1356 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 1357 "xxleqv $XT, $XA, $XB", IIC_VecGeneral, 1358 [(set v4i32:$XT, (vnot_ppc (xor v4i32:$XA, v4i32:$XB)))]>; 1359 def XXLNAND : XX3Form<60, 178, 1360 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 1361 "xxlnand $XT, $XA, $XB", IIC_VecGeneral, 1362 [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA, 1363 v4i32:$XB)))]>; 1364 } // isCommutable 1365 1366 def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B), 1367 (XXLEQV $A, $B)>; 1368 1369 let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1, 1370 isReMaterializable = 1 in { 1371 def XXLEQVOnes : XX3Form_SameOp<60, 186, (outs vsrc:$XT), (ins), 1372 "xxleqv $XT, $XT, $XT", IIC_VecGeneral, 1373 [(set v4i32:$XT, (bitconvert (v16i8 immAllOnesV)))]>; 1374 } 1375 1376 def XXLORC : XX3Form<60, 170, 1377 (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), 1378 "xxlorc $XT, $XA, $XB", IIC_VecGeneral, 1379 [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>; 1380 1381 // VSX scalar loads introduced in ISA 2.07 1382 let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in { 1383 let CodeSize = 3 in 1384 def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins memrr:$src), 1385 "lxsspx $XT, $src", IIC_LdStLFD, []>; 1386 def LXSIWAX : XX1Form_memOp<31, 76, (outs vsfrc:$XT), (ins memrr:$src), 1387 "lxsiwax $XT, $src", IIC_LdStLFD, []>; 1388 def LXSIWZX : XX1Form_memOp<31, 12, (outs vsfrc:$XT), (ins memrr:$src), 1389 "lxsiwzx $XT, $src", IIC_LdStLFD, []>; 1390 1391 // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later 1392 let CodeSize = 3 in 1393 def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src), 1394 "#XFLOADf32", 1395 [(set f32:$XT, (load xoaddr:$src))]>; 1396 // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later 1397 def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), 1398 "#LIWAX", 1399 [(set f64:$XT, (PPClfiwax xoaddr:$src))]>; 1400 // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later 1401 def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src), 1402 "#LIWZX", 1403 [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; 1404 } // mayLoad 1405 1406 // VSX scalar stores introduced in ISA 2.07 1407 let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in { 1408 let CodeSize = 3 in 1409 def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, memrr:$dst), 1410 "stxsspx $XT, $dst", IIC_LdStSTFD, []>; 1411 def STXSIWX : XX1Form_memOp<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst), 1412 "stxsiwx $XT, $dst", IIC_LdStSTFD, []>; 1413 1414 // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later 1415 let CodeSize = 3 in 1416 def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst), 1417 "#XFSTOREf32", 1418 [(store f32:$XT, xoaddr:$dst)]>; 1419 // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later 1420 def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst), 1421 "#STIWX", 1422 [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; 1423 } // mayStore 1424 1425 def : Pat<(f64 (extloadf32 xoaddr:$src)), 1426 (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>; 1427 def : Pat<(f32 (fpround (f64 (extloadf32 xoaddr:$src)))), 1428 (f32 (XFLOADf32 xoaddr:$src))>; 1429 def : Pat<(f64 (fpextend f32:$src)), 1430 (COPY_TO_REGCLASS $src, VSFRC)>; 1431 1432 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), 1433 (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; 1434 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)), 1435 (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; 1436 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), 1437 (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; 1438 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)), 1439 (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; 1440 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), 1441 (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>; 1442 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), 1443 (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; 1444 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)), 1445 (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; 1446 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), 1447 (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; 1448 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)), 1449 (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; 1450 def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), 1451 (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; 1452 1453 // VSX Elementary Scalar FP arithmetic (SP) 1454 let isCommutable = 1 in { 1455 def XSADDSP : XX3Form<60, 0, 1456 (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), 1457 "xsaddsp $XT, $XA, $XB", IIC_VecFP, 1458 [(set f32:$XT, (fadd f32:$XA, f32:$XB))]>; 1459 def XSMULSP : XX3Form<60, 16, 1460 (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), 1461 "xsmulsp $XT, $XA, $XB", IIC_VecFP, 1462 [(set f32:$XT, (fmul f32:$XA, f32:$XB))]>; 1463 } // isCommutable 1464 def XSSUBSP : XX3Form<60, 8, 1465 (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), 1466 "xssubsp $XT, $XA, $XB", IIC_VecFP, 1467 [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>; 1468 def XSDIVSP : XX3Form<60, 24, 1469 (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), 1470 "xsdivsp $XT, $XA, $XB", IIC_FPDivS, 1471 [(set f32:$XT, (fdiv f32:$XA, f32:$XB))]>; 1472 def XSRESP : XX2Form<60, 26, 1473 (outs vssrc:$XT), (ins vssrc:$XB), 1474 "xsresp $XT, $XB", IIC_VecFP, 1475 [(set f32:$XT, (PPCfre f32:$XB))]>; 1476 def XSRSP : XX2Form<60, 281, 1477 (outs vssrc:$XT), (ins vsfrc:$XB), 1478 "xsrsp $XT, $XB", IIC_VecFP, []>; 1479 def XSSQRTSP : XX2Form<60, 11, 1480 (outs vssrc:$XT), (ins vssrc:$XB), 1481 "xssqrtsp $XT, $XB", IIC_FPSqrtS, 1482 [(set f32:$XT, (fsqrt f32:$XB))]>; 1483 def XSRSQRTESP : XX2Form<60, 10, 1484 (outs vssrc:$XT), (ins vssrc:$XB), 1485 "xsrsqrtesp $XT, $XB", IIC_VecFP, 1486 [(set f32:$XT, (PPCfrsqrte f32:$XB))]>; 1487 1488 // FMA Instructions 1489 let BaseName = "XSMADDASP" in { 1490 let isCommutable = 1 in 1491 def XSMADDASP : XX3Form<60, 1, 1492 (outs vssrc:$XT), 1493 (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), 1494 "xsmaddasp $XT, $XA, $XB", IIC_VecFP, 1495 [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>, 1496 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 1497 AltVSXFMARel; 1498 let IsVSXFMAAlt = 1 in 1499 def XSMADDMSP : XX3Form<60, 9, 1500 (outs vssrc:$XT), 1501 (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), 1502 "xsmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, 1503 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 1504 AltVSXFMARel; 1505 } 1506 1507 let BaseName = "XSMSUBASP" in { 1508 let isCommutable = 1 in 1509 def XSMSUBASP : XX3Form<60, 17, 1510 (outs vssrc:$XT), 1511 (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), 1512 "xsmsubasp $XT, $XA, $XB", IIC_VecFP, 1513 [(set f32:$XT, (fma f32:$XA, f32:$XB, 1514 (fneg f32:$XTi)))]>, 1515 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 1516 AltVSXFMARel; 1517 let IsVSXFMAAlt = 1 in 1518 def XSMSUBMSP : XX3Form<60, 25, 1519 (outs vssrc:$XT), 1520 (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), 1521 "xsmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, 1522 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 1523 AltVSXFMARel; 1524 } 1525 1526 let BaseName = "XSNMADDASP" in { 1527 let isCommutable = 1 in 1528 def XSNMADDASP : XX3Form<60, 129, 1529 (outs vssrc:$XT), 1530 (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), 1531 "xsnmaddasp $XT, $XA, $XB", IIC_VecFP, 1532 [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, 1533 f32:$XTi)))]>, 1534 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 1535 AltVSXFMARel; 1536 let IsVSXFMAAlt = 1 in 1537 def XSNMADDMSP : XX3Form<60, 137, 1538 (outs vssrc:$XT), 1539 (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), 1540 "xsnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, 1541 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 1542 AltVSXFMARel; 1543 } 1544 1545 let BaseName = "XSNMSUBASP" in { 1546 let isCommutable = 1 in 1547 def XSNMSUBASP : XX3Form<60, 145, 1548 (outs vssrc:$XT), 1549 (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), 1550 "xsnmsubasp $XT, $XA, $XB", IIC_VecFP, 1551 [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB, 1552 (fneg f32:$XTi))))]>, 1553 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 1554 AltVSXFMARel; 1555 let IsVSXFMAAlt = 1 in 1556 def XSNMSUBMSP : XX3Form<60, 153, 1557 (outs vssrc:$XT), 1558 (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), 1559 "xsnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, 1560 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, 1561 AltVSXFMARel; 1562 } 1563 1564 // Additional xsnmsubasp patterns: -a*b + c == -(a*b - c) 1565 def : Pat<(fma (fneg f32:$A), f32:$B, f32:$C), 1566 (XSNMSUBASP $C, $A, $B)>; 1567 def : Pat<(fma f32:$A, (fneg f32:$B), f32:$C), 1568 (XSNMSUBASP $C, $A, $B)>; 1569 1570 // Single Precision Conversions (FP <-> INT) 1571 def XSCVSXDSP : XX2Form<60, 312, 1572 (outs vssrc:$XT), (ins vsfrc:$XB), 1573 "xscvsxdsp $XT, $XB", IIC_VecFP, 1574 [(set f32:$XT, (PPCfcfids f64:$XB))]>; 1575 def XSCVUXDSP : XX2Form<60, 296, 1576 (outs vssrc:$XT), (ins vsfrc:$XB), 1577 "xscvuxdsp $XT, $XB", IIC_VecFP, 1578 [(set f32:$XT, (PPCfcfidus f64:$XB))]>; 1579 1580 // Conversions between vector and scalar single precision 1581 def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB), 1582 "xscvdpspn $XT, $XB", IIC_VecFP, []>; 1583 def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB), 1584 "xscvspdpn $XT, $XB", IIC_VecFP, []>; 1585 1586 let Predicates = [IsLittleEndian] in { 1587 def : Pat<DWToSPExtractConv.El0SS1, 1588 (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>; 1589 def : Pat<DWToSPExtractConv.El1SS1, 1590 (f32 (XSCVSXDSP (COPY_TO_REGCLASS 1591 (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>; 1592 def : Pat<DWToSPExtractConv.El0US1, 1593 (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>; 1594 def : Pat<DWToSPExtractConv.El1US1, 1595 (f32 (XSCVUXDSP (COPY_TO_REGCLASS 1596 (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>; 1597 } 1598 1599 let Predicates = [IsBigEndian] in { 1600 def : Pat<DWToSPExtractConv.El0SS1, 1601 (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>; 1602 def : Pat<DWToSPExtractConv.El1SS1, 1603 (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>; 1604 def : Pat<DWToSPExtractConv.El0US1, 1605 (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>; 1606 def : Pat<DWToSPExtractConv.El1US1, 1607 (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>; 1608 } 1609 1610 // Instructions for converting float to i64 feeding a store. 1611 let Predicates = [NoP9Vector] in { 1612 def : Pat<(PPCstore_scal_int_from_vsr 1613 (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 8), 1614 (STXSDX (XSCVDPSXDS f64:$src), xoaddr:$dst)>; 1615 def : Pat<(PPCstore_scal_int_from_vsr 1616 (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 8), 1617 (STXSDX (XSCVDPUXDS f64:$src), xoaddr:$dst)>; 1618 } 1619 1620 // Instructions for converting float to i32 feeding a store. 1621 def : Pat<(PPCstore_scal_int_from_vsr 1622 (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 4), 1623 (STIWX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; 1624 def : Pat<(PPCstore_scal_int_from_vsr 1625 (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4), 1626 (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; 1627 1628 def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)), 1629 (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC), 1630 (COPY_TO_REGCLASS $src2, VRRC)))>; 1631 def : Pat<(v2i64 (umax v2i64:$src1, v2i64:$src2)), 1632 (v2i64 (VMAXUD (COPY_TO_REGCLASS $src1, VRRC), 1633 (COPY_TO_REGCLASS $src2, VRRC)))>; 1634 def : Pat<(v2i64 (smin v2i64:$src1, v2i64:$src2)), 1635 (v2i64 (VMINSD (COPY_TO_REGCLASS $src1, VRRC), 1636 (COPY_TO_REGCLASS $src2, VRRC)))>; 1637 def : Pat<(v2i64 (umin v2i64:$src1, v2i64:$src2)), 1638 (v2i64 (VMINUD (COPY_TO_REGCLASS $src1, VRRC), 1639 (COPY_TO_REGCLASS $src2, VRRC)))>; 1640} // AddedComplexity = 400 1641} // HasP8Vector 1642 1643let AddedComplexity = 400 in { 1644let Predicates = [HasDirectMove] in { 1645 // VSX direct move instructions 1646 def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT), 1647 "mfvsrd $rA, $XT", IIC_VecGeneral, 1648 [(set i64:$rA, (PPCmfvsr f64:$XT))]>, 1649 Requires<[In64BitMode]>; 1650 let isCodeGenOnly = 1 in 1651 def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsrc:$XT), 1652 "mfvsrd $rA, $XT", IIC_VecGeneral, 1653 []>, 1654 Requires<[In64BitMode]>; 1655 def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT), 1656 "mfvsrwz $rA, $XT", IIC_VecGeneral, 1657 [(set i32:$rA, (PPCmfvsr f64:$XT))]>; 1658 let isCodeGenOnly = 1 in 1659 def MFVRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsrc:$XT), 1660 "mfvsrwz $rA, $XT", IIC_VecGeneral, 1661 []>; 1662 def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA), 1663 "mtvsrd $XT, $rA", IIC_VecGeneral, 1664 [(set f64:$XT, (PPCmtvsra i64:$rA))]>, 1665 Requires<[In64BitMode]>; 1666 let isCodeGenOnly = 1 in 1667 def MTVRD : XX1_RS6_RD5_XO<31, 179, (outs vsrc:$XT), (ins g8rc:$rA), 1668 "mtvsrd $XT, $rA", IIC_VecGeneral, 1669 []>, 1670 Requires<[In64BitMode]>; 1671 def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA), 1672 "mtvsrwa $XT, $rA", IIC_VecGeneral, 1673 [(set f64:$XT, (PPCmtvsra i32:$rA))]>; 1674 let isCodeGenOnly = 1 in 1675 def MTVRWA : XX1_RS6_RD5_XO<31, 211, (outs vsrc:$XT), (ins gprc:$rA), 1676 "mtvsrwa $XT, $rA", IIC_VecGeneral, 1677 []>; 1678 def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA), 1679 "mtvsrwz $XT, $rA", IIC_VecGeneral, 1680 [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; 1681 let isCodeGenOnly = 1 in 1682 def MTVRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsrc:$XT), (ins gprc:$rA), 1683 "mtvsrwz $XT, $rA", IIC_VecGeneral, 1684 []>; 1685} // HasDirectMove 1686 1687let Predicates = [IsISA3_0, HasDirectMove] in { 1688 def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA), 1689 "mtvsrws $XT, $rA", IIC_VecGeneral, []>; 1690 1691 def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$rA, g8rc:$rB), 1692 "mtvsrdd $XT, $rA, $rB", IIC_VecGeneral, 1693 []>, Requires<[In64BitMode]>; 1694 1695 def MFVSRLD: XX1_RS6_RD5_XO<31, 307, (outs g8rc:$rA), (ins vsrc:$XT), 1696 "mfvsrld $rA, $XT", IIC_VecGeneral, 1697 []>, Requires<[In64BitMode]>; 1698 1699} // IsISA3_0, HasDirectMove 1700} // AddedComplexity = 400 1701 1702// We want to parse this from asm, but we don't want to emit this as it would 1703// be emitted with a VSX reg. So leave Emit = 0 here. 1704def : InstAlias<"mfvrd $rA, $XT", 1705 (MFVRD g8rc:$rA, vrrc:$XT), 0>; 1706def : InstAlias<"mffprd $rA, $src", 1707 (MFVSRD g8rc:$rA, f8rc:$src)>; 1708def : InstAlias<"mtvrd $XT, $rA", 1709 (MTVRD vrrc:$XT, g8rc:$rA), 0>; 1710def : InstAlias<"mtfprd $dst, $rA", 1711 (MTVSRD f8rc:$dst, g8rc:$rA)>; 1712def : InstAlias<"mfvrwz $rA, $XT", 1713 (MFVRWZ gprc:$rA, vrrc:$XT), 0>; 1714def : InstAlias<"mffprwz $rA, $src", 1715 (MFVSRWZ gprc:$rA, f8rc:$src)>; 1716def : InstAlias<"mtvrwa $XT, $rA", 1717 (MTVRWA vrrc:$XT, gprc:$rA), 0>; 1718def : InstAlias<"mtfprwa $dst, $rA", 1719 (MTVSRWA f8rc:$dst, gprc:$rA)>; 1720def : InstAlias<"mtvrwz $XT, $rA", 1721 (MTVRWZ vrrc:$XT, gprc:$rA), 0>; 1722def : InstAlias<"mtfprwz $dst, $rA", 1723 (MTVSRWZ f8rc:$dst, gprc:$rA)>; 1724 1725/* Direct moves of various widths from GPR's into VSR's. Each move lines 1726 the value up into element 0 (both BE and LE). Namely, entities smaller than 1727 a doubleword are shifted left and moved for BE. For LE, they're moved, then 1728 swapped to go into the least significant element of the VSR. 1729*/ 1730def MovesToVSR { 1731 dag BE_BYTE_0 = 1732 (MTVSRD 1733 (RLDICR 1734 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7)); 1735 dag BE_HALF_0 = 1736 (MTVSRD 1737 (RLDICR 1738 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15)); 1739 dag BE_WORD_0 = 1740 (MTVSRD 1741 (RLDICR 1742 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31)); 1743 dag BE_DWORD_0 = (MTVSRD $A); 1744 1745 dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32)); 1746 dag LE_WORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 1747 LE_MTVSRW, sub_64)); 1748 dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2); 1749 dag LE_DWORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 1750 BE_DWORD_0, sub_64)); 1751 dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2); 1752} 1753 1754/* Patterns for extracting elements out of vectors. Integer elements are 1755 extracted using direct move operations. Patterns for extracting elements 1756 whose indices are not available at compile time are also provided with 1757 various _VARIABLE_ patterns. 1758 The numbering for the DAG's is for LE, but when used on BE, the correct 1759 LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13). 1760*/ 1761def VectorExtractions { 1762 // Doubleword extraction 1763 dag LE_DWORD_0 = 1764 (MFVSRD 1765 (EXTRACT_SUBREG 1766 (XXPERMDI (COPY_TO_REGCLASS $S, VSRC), 1767 (COPY_TO_REGCLASS $S, VSRC), 2), sub_64)); 1768 dag LE_DWORD_1 = (MFVSRD 1769 (EXTRACT_SUBREG 1770 (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); 1771 1772 // Word extraction 1773 dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64)); 1774 dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64)); 1775 dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG 1776 (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); 1777 dag LE_WORD_3 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 3), sub_64)); 1778 1779 // Halfword extraction 1780 dag LE_HALF_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 48), sub_32)); 1781 dag LE_HALF_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 48), sub_32)); 1782 dag LE_HALF_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 48), sub_32)); 1783 dag LE_HALF_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 48), sub_32)); 1784 dag LE_HALF_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 48), sub_32)); 1785 dag LE_HALF_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 48), sub_32)); 1786 dag LE_HALF_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 48), sub_32)); 1787 dag LE_HALF_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 48), sub_32)); 1788 1789 // Byte extraction 1790 dag LE_BYTE_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 56), sub_32)); 1791 dag LE_BYTE_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 56, 56), sub_32)); 1792 dag LE_BYTE_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 56), sub_32)); 1793 dag LE_BYTE_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 40, 56), sub_32)); 1794 dag LE_BYTE_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 56), sub_32)); 1795 dag LE_BYTE_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 24, 56), sub_32)); 1796 dag LE_BYTE_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 56), sub_32)); 1797 dag LE_BYTE_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 8, 56), sub_32)); 1798 dag LE_BYTE_8 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 56), sub_32)); 1799 dag LE_BYTE_9 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 56, 56), sub_32)); 1800 dag LE_BYTE_10 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 56), sub_32)); 1801 dag LE_BYTE_11 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 40, 56), sub_32)); 1802 dag LE_BYTE_12 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 56), sub_32)); 1803 dag LE_BYTE_13 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 24, 56), sub_32)); 1804 dag LE_BYTE_14 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 56), sub_32)); 1805 dag LE_BYTE_15 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 8, 56), sub_32)); 1806 1807 /* Variable element number (BE and LE patterns must be specified separately) 1808 This is a rather involved process. 1809 1810 Conceptually, this is how the move is accomplished: 1811 1. Identify which doubleword contains the element 1812 2. Shift in the VMX register so that the correct doubleword is correctly 1813 lined up for the MFVSRD 1814 3. Perform the move so that the element (along with some extra stuff) 1815 is in the GPR 1816 4. Right shift within the GPR so that the element is right-justified 1817 1818 Of course, the index is an element number which has a different meaning 1819 on LE/BE so the patterns have to be specified separately. 1820 1821 Note: The final result will be the element right-justified with high 1822 order bits being arbitrarily defined (namely, whatever was in the 1823 vector register to the left of the value originally). 1824 */ 1825 1826 /* LE variable byte 1827 Number 1. above: 1828 - For elements 0-7, we shift left by 8 bytes since they're on the right 1829 - For elements 8-15, we need not shift (shift left by zero bytes) 1830 This is accomplished by inverting the bits of the index and AND-ing 1831 with 0x8 (i.e. clearing all bits of the index and inverting bit 60). 1832 */ 1833 dag LE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDC8 (LI8 8), $Idx))); 1834 1835 // Number 2. above: 1836 // - Now that we set up the shift amount, we shift in the VMX register 1837 dag LE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, LE_VBYTE_PERM_VEC)); 1838 1839 // Number 3. above: 1840 // - The doubleword containing our element is moved to a GPR 1841 dag LE_MV_VBYTE = (MFVSRD 1842 (EXTRACT_SUBREG 1843 (v2i64 (COPY_TO_REGCLASS LE_VBYTE_PERMUTE, VSRC)), 1844 sub_64)); 1845 1846 /* Number 4. above: 1847 - Truncate the element number to the range 0-7 (8-15 are symmetrical 1848 and out of range values are truncated accordingly) 1849 - Multiply by 8 as we need to shift right by the number of bits, not bytes 1850 - Shift right in the GPR by the calculated value 1851 */ 1852 dag LE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 7), $Idx), 3, 60), 1853 sub_32); 1854 dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT), 1855 sub_32); 1856 1857 /* LE variable halfword 1858 Number 1. above: 1859 - For elements 0-3, we shift left by 8 since they're on the right 1860 - For elements 4-7, we need not shift (shift left by zero bytes) 1861 Similarly to the byte pattern, we invert the bits of the index, but we 1862 AND with 0x4 (i.e. clear all bits of the index and invert bit 61). 1863 Of course, the shift is still by 8 bytes, so we must multiply by 2. 1864 */ 1865 dag LE_VHALF_PERM_VEC = 1866 (v16i8 (LVSL ZERO8, (RLDICR (ANDC8 (LI8 4), $Idx), 1, 62))); 1867 1868 // Number 2. above: 1869 // - Now that we set up the shift amount, we shift in the VMX register 1870 dag LE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, LE_VHALF_PERM_VEC)); 1871 1872 // Number 3. above: 1873 // - The doubleword containing our element is moved to a GPR 1874 dag LE_MV_VHALF = (MFVSRD 1875 (EXTRACT_SUBREG 1876 (v2i64 (COPY_TO_REGCLASS LE_VHALF_PERMUTE, VSRC)), 1877 sub_64)); 1878 1879 /* Number 4. above: 1880 - Truncate the element number to the range 0-3 (4-7 are symmetrical 1881 and out of range values are truncated accordingly) 1882 - Multiply by 16 as we need to shift right by the number of bits 1883 - Shift right in the GPR by the calculated value 1884 */ 1885 dag LE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 3), $Idx), 4, 59), 1886 sub_32); 1887 dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT), 1888 sub_32); 1889 1890 /* LE variable word 1891 Number 1. above: 1892 - For elements 0-1, we shift left by 8 since they're on the right 1893 - For elements 2-3, we need not shift 1894 */ 1895 dag LE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8, 1896 (RLDICR (ANDC8 (LI8 2), $Idx), 2, 61))); 1897 1898 // Number 2. above: 1899 // - Now that we set up the shift amount, we shift in the VMX register 1900 dag LE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VWORD_PERM_VEC)); 1901 1902 // Number 3. above: 1903 // - The doubleword containing our element is moved to a GPR 1904 dag LE_MV_VWORD = (MFVSRD 1905 (EXTRACT_SUBREG 1906 (v2i64 (COPY_TO_REGCLASS LE_VWORD_PERMUTE, VSRC)), 1907 sub_64)); 1908 1909 /* Number 4. above: 1910 - Truncate the element number to the range 0-1 (2-3 are symmetrical 1911 and out of range values are truncated accordingly) 1912 - Multiply by 32 as we need to shift right by the number of bits 1913 - Shift right in the GPR by the calculated value 1914 */ 1915 dag LE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 1), $Idx), 5, 58), 1916 sub_32); 1917 dag LE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD LE_MV_VWORD, LE_VWORD_SHIFT), 1918 sub_32); 1919 1920 /* LE variable doubleword 1921 Number 1. above: 1922 - For element 0, we shift left by 8 since it's on the right 1923 - For element 1, we need not shift 1924 */ 1925 dag LE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8, 1926 (RLDICR (ANDC8 (LI8 1), $Idx), 3, 60))); 1927 1928 // Number 2. above: 1929 // - Now that we set up the shift amount, we shift in the VMX register 1930 dag LE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VDWORD_PERM_VEC)); 1931 1932 // Number 3. above: 1933 // - The doubleword containing our element is moved to a GPR 1934 // - Number 4. is not needed for the doubleword as the value is 64-bits 1935 dag LE_VARIABLE_DWORD = 1936 (MFVSRD (EXTRACT_SUBREG 1937 (v2i64 (COPY_TO_REGCLASS LE_VDWORD_PERMUTE, VSRC)), 1938 sub_64)); 1939 1940 /* LE variable float 1941 - Shift the vector to line up the desired element to BE Word 0 1942 - Convert 32-bit float to a 64-bit single precision float 1943 */ 1944 dag LE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8, 1945 (RLDICR (XOR8 (LI8 3), $Idx), 2, 61))); 1946 dag LE_VFLOAT_PERMUTE = (VPERM $S, $S, LE_VFLOAT_PERM_VEC); 1947 dag LE_VARIABLE_FLOAT = (XSCVSPDPN LE_VFLOAT_PERMUTE); 1948 1949 /* LE variable double 1950 Same as the LE doubleword except there is no move. 1951 */ 1952 dag LE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)), 1953 (v16i8 (COPY_TO_REGCLASS $S, VRRC)), 1954 LE_VDWORD_PERM_VEC)); 1955 dag LE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS LE_VDOUBLE_PERMUTE, VSRC); 1956 1957 /* BE variable byte 1958 The algorithm here is the same as the LE variable byte except: 1959 - The shift in the VMX register is by 0/8 for opposite element numbers so 1960 we simply AND the element number with 0x8 1961 - The order of elements after the move to GPR is reversed, so we invert 1962 the bits of the index prior to truncating to the range 0-7 1963 */ 1964 dag BE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDI8_rec $Idx, 8))); 1965 dag BE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, BE_VBYTE_PERM_VEC)); 1966 dag BE_MV_VBYTE = (MFVSRD 1967 (EXTRACT_SUBREG 1968 (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)), 1969 sub_64)); 1970 dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60), 1971 sub_32); 1972 dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT), 1973 sub_32); 1974 1975 /* BE variable halfword 1976 The algorithm here is the same as the LE variable halfword except: 1977 - The shift in the VMX register is by 0/8 for opposite element numbers so 1978 we simply AND the element number with 0x4 and multiply by 2 1979 - The order of elements after the move to GPR is reversed, so we invert 1980 the bits of the index prior to truncating to the range 0-3 1981 */ 1982 dag BE_VHALF_PERM_VEC = (v16i8 (LVSL ZERO8, 1983 (RLDICR (ANDI8_rec $Idx, 4), 1, 62))); 1984 dag BE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, BE_VHALF_PERM_VEC)); 1985 dag BE_MV_VHALF = (MFVSRD 1986 (EXTRACT_SUBREG 1987 (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)), 1988 sub_64)); 1989 dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 59), 1990 sub_32); 1991 dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT), 1992 sub_32); 1993 1994 /* BE variable word 1995 The algorithm is the same as the LE variable word except: 1996 - The shift in the VMX register happens for opposite element numbers 1997 - The order of elements after the move to GPR is reversed, so we invert 1998 the bits of the index prior to truncating to the range 0-1 1999 */ 2000 dag BE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8, 2001 (RLDICR (ANDI8_rec $Idx, 2), 2, 61))); 2002 dag BE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VWORD_PERM_VEC)); 2003 dag BE_MV_VWORD = (MFVSRD 2004 (EXTRACT_SUBREG 2005 (v2i64 (COPY_TO_REGCLASS BE_VWORD_PERMUTE, VSRC)), 2006 sub_64)); 2007 dag BE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 1), $Idx), 5, 58), 2008 sub_32); 2009 dag BE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD BE_MV_VWORD, BE_VWORD_SHIFT), 2010 sub_32); 2011 2012 /* BE variable doubleword 2013 Same as the LE doubleword except we shift in the VMX register for opposite 2014 element indices. 2015 */ 2016 dag BE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8, 2017 (RLDICR (ANDI8_rec $Idx, 1), 3, 60))); 2018 dag BE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VDWORD_PERM_VEC)); 2019 dag BE_VARIABLE_DWORD = 2020 (MFVSRD (EXTRACT_SUBREG 2021 (v2i64 (COPY_TO_REGCLASS BE_VDWORD_PERMUTE, VSRC)), 2022 sub_64)); 2023 2024 /* BE variable float 2025 - Shift the vector to line up the desired element to BE Word 0 2026 - Convert 32-bit float to a 64-bit single precision float 2027 */ 2028 dag BE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8, (RLDICR $Idx, 2, 61))); 2029 dag BE_VFLOAT_PERMUTE = (VPERM $S, $S, BE_VFLOAT_PERM_VEC); 2030 dag BE_VARIABLE_FLOAT = (XSCVSPDPN BE_VFLOAT_PERMUTE); 2031 2032 /* BE variable double 2033 Same as the BE doubleword except there is no move. 2034 */ 2035 dag BE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)), 2036 (v16i8 (COPY_TO_REGCLASS $S, VRRC)), 2037 BE_VDWORD_PERM_VEC)); 2038 dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC); 2039} 2040 2041def NoP9Altivec : Predicate<"!PPCSubTarget->hasP9Altivec()">; 2042let AddedComplexity = 400 in { 2043// v4f32 scalar <-> vector conversions (BE) 2044let Predicates = [IsBigEndian, HasP8Vector] in { 2045 def : Pat<(v4f32 (scalar_to_vector f32:$A)), 2046 (v4f32 (XSCVDPSPN $A))>; 2047 def : Pat<(f32 (vector_extract v4f32:$S, 0)), 2048 (f32 (XSCVSPDPN $S))>; 2049 def : Pat<(f32 (vector_extract v4f32:$S, 1)), 2050 (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; 2051 def : Pat<(f32 (vector_extract v4f32:$S, 2)), 2052 (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>; 2053 def : Pat<(f32 (vector_extract v4f32:$S, 3)), 2054 (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; 2055 def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), 2056 (f32 VectorExtractions.BE_VARIABLE_FLOAT)>; 2057} // IsBigEndian, HasP8Vector 2058 2059// Variable index vector_extract for v2f64 does not require P8Vector 2060let Predicates = [IsBigEndian, HasVSX] in 2061 def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), 2062 (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>; 2063 2064let Predicates = [IsBigEndian, HasDirectMove] in { 2065 // v16i8 scalar <-> vector conversions (BE) 2066 def : Pat<(v16i8 (scalar_to_vector i32:$A)), 2067 (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>; 2068 def : Pat<(v8i16 (scalar_to_vector i32:$A)), 2069 (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>; 2070 def : Pat<(v4i32 (scalar_to_vector i32:$A)), 2071 (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>; 2072 def : Pat<(v2i64 (scalar_to_vector i64:$A)), 2073 (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>; 2074 2075 // v2i64 scalar <-> vector conversions (BE) 2076 def : Pat<(i64 (vector_extract v2i64:$S, 0)), 2077 (i64 VectorExtractions.LE_DWORD_1)>; 2078 def : Pat<(i64 (vector_extract v2i64:$S, 1)), 2079 (i64 VectorExtractions.LE_DWORD_0)>; 2080 def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), 2081 (i64 VectorExtractions.BE_VARIABLE_DWORD)>; 2082} // IsBigEndian, HasDirectMove 2083 2084let Predicates = [IsBigEndian, HasDirectMove, NoP9Altivec] in { 2085 def : Pat<(i32 (vector_extract v16i8:$S, 0)), 2086 (i32 VectorExtractions.LE_BYTE_15)>; 2087 def : Pat<(i32 (vector_extract v16i8:$S, 1)), 2088 (i32 VectorExtractions.LE_BYTE_14)>; 2089 def : Pat<(i32 (vector_extract v16i8:$S, 2)), 2090 (i32 VectorExtractions.LE_BYTE_13)>; 2091 def : Pat<(i32 (vector_extract v16i8:$S, 3)), 2092 (i32 VectorExtractions.LE_BYTE_12)>; 2093 def : Pat<(i32 (vector_extract v16i8:$S, 4)), 2094 (i32 VectorExtractions.LE_BYTE_11)>; 2095 def : Pat<(i32 (vector_extract v16i8:$S, 5)), 2096 (i32 VectorExtractions.LE_BYTE_10)>; 2097 def : Pat<(i32 (vector_extract v16i8:$S, 6)), 2098 (i32 VectorExtractions.LE_BYTE_9)>; 2099 def : Pat<(i32 (vector_extract v16i8:$S, 7)), 2100 (i32 VectorExtractions.LE_BYTE_8)>; 2101 def : Pat<(i32 (vector_extract v16i8:$S, 8)), 2102 (i32 VectorExtractions.LE_BYTE_7)>; 2103 def : Pat<(i32 (vector_extract v16i8:$S, 9)), 2104 (i32 VectorExtractions.LE_BYTE_6)>; 2105 def : Pat<(i32 (vector_extract v16i8:$S, 10)), 2106 (i32 VectorExtractions.LE_BYTE_5)>; 2107 def : Pat<(i32 (vector_extract v16i8:$S, 11)), 2108 (i32 VectorExtractions.LE_BYTE_4)>; 2109 def : Pat<(i32 (vector_extract v16i8:$S, 12)), 2110 (i32 VectorExtractions.LE_BYTE_3)>; 2111 def : Pat<(i32 (vector_extract v16i8:$S, 13)), 2112 (i32 VectorExtractions.LE_BYTE_2)>; 2113 def : Pat<(i32 (vector_extract v16i8:$S, 14)), 2114 (i32 VectorExtractions.LE_BYTE_1)>; 2115 def : Pat<(i32 (vector_extract v16i8:$S, 15)), 2116 (i32 VectorExtractions.LE_BYTE_0)>; 2117 def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), 2118 (i32 VectorExtractions.BE_VARIABLE_BYTE)>; 2119 2120 // v8i16 scalar <-> vector conversions (BE) 2121 def : Pat<(i32 (vector_extract v8i16:$S, 0)), 2122 (i32 VectorExtractions.LE_HALF_7)>; 2123 def : Pat<(i32 (vector_extract v8i16:$S, 1)), 2124 (i32 VectorExtractions.LE_HALF_6)>; 2125 def : Pat<(i32 (vector_extract v8i16:$S, 2)), 2126 (i32 VectorExtractions.LE_HALF_5)>; 2127 def : Pat<(i32 (vector_extract v8i16:$S, 3)), 2128 (i32 VectorExtractions.LE_HALF_4)>; 2129 def : Pat<(i32 (vector_extract v8i16:$S, 4)), 2130 (i32 VectorExtractions.LE_HALF_3)>; 2131 def : Pat<(i32 (vector_extract v8i16:$S, 5)), 2132 (i32 VectorExtractions.LE_HALF_2)>; 2133 def : Pat<(i32 (vector_extract v8i16:$S, 6)), 2134 (i32 VectorExtractions.LE_HALF_1)>; 2135 def : Pat<(i32 (vector_extract v8i16:$S, 7)), 2136 (i32 VectorExtractions.LE_HALF_0)>; 2137 def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), 2138 (i32 VectorExtractions.BE_VARIABLE_HALF)>; 2139 2140 // v4i32 scalar <-> vector conversions (BE) 2141 def : Pat<(i32 (vector_extract v4i32:$S, 0)), 2142 (i32 VectorExtractions.LE_WORD_3)>; 2143 def : Pat<(i32 (vector_extract v4i32:$S, 1)), 2144 (i32 VectorExtractions.LE_WORD_2)>; 2145 def : Pat<(i32 (vector_extract v4i32:$S, 2)), 2146 (i32 VectorExtractions.LE_WORD_1)>; 2147 def : Pat<(i32 (vector_extract v4i32:$S, 3)), 2148 (i32 VectorExtractions.LE_WORD_0)>; 2149 def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), 2150 (i32 VectorExtractions.BE_VARIABLE_WORD)>; 2151} // IsBigEndian, HasDirectMove, NoP9Altivec 2152 2153// v4f32 scalar <-> vector conversions (LE) 2154let Predicates = [IsLittleEndian, HasP8Vector] in { 2155 def : Pat<(v4f32 (scalar_to_vector f32:$A)), 2156 (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>; 2157 def : Pat<(f32 (vector_extract v4f32:$S, 0)), 2158 (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; 2159 def : Pat<(f32 (vector_extract v4f32:$S, 1)), 2160 (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>; 2161 def : Pat<(f32 (vector_extract v4f32:$S, 2)), 2162 (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; 2163 def : Pat<(f32 (vector_extract v4f32:$S, 3)), 2164 (f32 (XSCVSPDPN $S))>; 2165 def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), 2166 (f32 VectorExtractions.LE_VARIABLE_FLOAT)>; 2167} // IsLittleEndian, HasP8Vector 2168 2169// Variable index vector_extract for v2f64 does not require P8Vector 2170let Predicates = [IsLittleEndian, HasVSX] in 2171 def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), 2172 (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>; 2173 2174def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst), 2175 (STXVD2X $rS, xoaddr:$dst)>; 2176def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst), 2177 (STXVW4X $rS, xoaddr:$dst)>; 2178def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; 2179def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; 2180 2181// Variable index unsigned vector_extract on Power9 2182let Predicates = [HasP9Altivec, IsLittleEndian] in { 2183 def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), 2184 (VEXTUBRX $Idx, $S)>; 2185 2186 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), 2187 (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>; 2188 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), 2189 (VEXTUHRX (LI8 0), $S)>; 2190 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), 2191 (VEXTUHRX (LI8 2), $S)>; 2192 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), 2193 (VEXTUHRX (LI8 4), $S)>; 2194 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), 2195 (VEXTUHRX (LI8 6), $S)>; 2196 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), 2197 (VEXTUHRX (LI8 8), $S)>; 2198 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), 2199 (VEXTUHRX (LI8 10), $S)>; 2200 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), 2201 (VEXTUHRX (LI8 12), $S)>; 2202 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), 2203 (VEXTUHRX (LI8 14), $S)>; 2204 2205 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), 2206 (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>; 2207 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), 2208 (VEXTUWRX (LI8 0), $S)>; 2209 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), 2210 (VEXTUWRX (LI8 4), $S)>; 2211 // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX 2212 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), 2213 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), 2214 (i32 VectorExtractions.LE_WORD_2), sub_32)>; 2215 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), 2216 (VEXTUWRX (LI8 12), $S)>; 2217 2218 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), 2219 (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>; 2220 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), 2221 (EXTSW (VEXTUWRX (LI8 0), $S))>; 2222 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), 2223 (EXTSW (VEXTUWRX (LI8 4), $S))>; 2224 // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX 2225 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), 2226 (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)), 2227 (i32 VectorExtractions.LE_WORD_2), sub_32))>; 2228 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), 2229 (EXTSW (VEXTUWRX (LI8 12), $S))>; 2230 2231 def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), 2232 (i32 (EXTRACT_SUBREG (VEXTUBRX $Idx, $S), sub_32))>; 2233 def : Pat<(i32 (vector_extract v16i8:$S, 0)), 2234 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 0), $S), sub_32))>; 2235 def : Pat<(i32 (vector_extract v16i8:$S, 1)), 2236 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 1), $S), sub_32))>; 2237 def : Pat<(i32 (vector_extract v16i8:$S, 2)), 2238 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 2), $S), sub_32))>; 2239 def : Pat<(i32 (vector_extract v16i8:$S, 3)), 2240 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 3), $S), sub_32))>; 2241 def : Pat<(i32 (vector_extract v16i8:$S, 4)), 2242 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 4), $S), sub_32))>; 2243 def : Pat<(i32 (vector_extract v16i8:$S, 5)), 2244 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 5), $S), sub_32))>; 2245 def : Pat<(i32 (vector_extract v16i8:$S, 6)), 2246 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 6), $S), sub_32))>; 2247 def : Pat<(i32 (vector_extract v16i8:$S, 7)), 2248 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 7), $S), sub_32))>; 2249 def : Pat<(i32 (vector_extract v16i8:$S, 8)), 2250 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 8), $S), sub_32))>; 2251 def : Pat<(i32 (vector_extract v16i8:$S, 9)), 2252 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 9), $S), sub_32))>; 2253 def : Pat<(i32 (vector_extract v16i8:$S, 10)), 2254 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 10), $S), sub_32))>; 2255 def : Pat<(i32 (vector_extract v16i8:$S, 11)), 2256 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 11), $S), sub_32))>; 2257 def : Pat<(i32 (vector_extract v16i8:$S, 12)), 2258 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 12), $S), sub_32))>; 2259 def : Pat<(i32 (vector_extract v16i8:$S, 13)), 2260 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 13), $S), sub_32))>; 2261 def : Pat<(i32 (vector_extract v16i8:$S, 14)), 2262 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 14), $S), sub_32))>; 2263 def : Pat<(i32 (vector_extract v16i8:$S, 15)), 2264 (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 15), $S), sub_32))>; 2265 2266 def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), 2267 (i32 (EXTRACT_SUBREG (VEXTUHRX 2268 (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>; 2269 def : Pat<(i32 (vector_extract v8i16:$S, 0)), 2270 (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 0), $S), sub_32))>; 2271 def : Pat<(i32 (vector_extract v8i16:$S, 1)), 2272 (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 2), $S), sub_32))>; 2273 def : Pat<(i32 (vector_extract v8i16:$S, 2)), 2274 (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 4), $S), sub_32))>; 2275 def : Pat<(i32 (vector_extract v8i16:$S, 3)), 2276 (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 6), $S), sub_32))>; 2277 def : Pat<(i32 (vector_extract v8i16:$S, 4)), 2278 (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 8), $S), sub_32))>; 2279 def : Pat<(i32 (vector_extract v8i16:$S, 5)), 2280 (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 10), $S), sub_32))>; 2281 def : Pat<(i32 (vector_extract v8i16:$S, 6)), 2282 (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 12), $S), sub_32))>; 2283 def : Pat<(i32 (vector_extract v8i16:$S, 6)), 2284 (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 14), $S), sub_32))>; 2285 2286 def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), 2287 (i32 (EXTRACT_SUBREG (VEXTUWRX 2288 (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>; 2289 def : Pat<(i32 (vector_extract v4i32:$S, 0)), 2290 (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 0), $S), sub_32))>; 2291 def : Pat<(i32 (vector_extract v4i32:$S, 1)), 2292 (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 4), $S), sub_32))>; 2293 // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX 2294 def : Pat<(i32 (vector_extract v4i32:$S, 2)), 2295 (i32 VectorExtractions.LE_WORD_2)>; 2296 def : Pat<(i32 (vector_extract v4i32:$S, 3)), 2297 (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 12), $S), sub_32))>; 2298} 2299 2300let Predicates = [HasP9Altivec, IsBigEndian] in { 2301 def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), 2302 (VEXTUBLX $Idx, $S)>; 2303 2304 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), 2305 (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>; 2306 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), 2307 (VEXTUHLX (LI8 0), $S)>; 2308 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), 2309 (VEXTUHLX (LI8 2), $S)>; 2310 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), 2311 (VEXTUHLX (LI8 4), $S)>; 2312 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), 2313 (VEXTUHLX (LI8 6), $S)>; 2314 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), 2315 (VEXTUHLX (LI8 8), $S)>; 2316 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), 2317 (VEXTUHLX (LI8 10), $S)>; 2318 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), 2319 (VEXTUHLX (LI8 12), $S)>; 2320 def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), 2321 (VEXTUHLX (LI8 14), $S)>; 2322 2323 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), 2324 (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>; 2325 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), 2326 (VEXTUWLX (LI8 0), $S)>; 2327 2328 // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX 2329 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), 2330 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), 2331 (i32 VectorExtractions.LE_WORD_2), sub_32)>; 2332 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), 2333 (VEXTUWLX (LI8 8), $S)>; 2334 def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), 2335 (VEXTUWLX (LI8 12), $S)>; 2336 2337 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), 2338 (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>; 2339 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), 2340 (EXTSW (VEXTUWLX (LI8 0), $S))>; 2341 // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX 2342 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), 2343 (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)), 2344 (i32 VectorExtractions.LE_WORD_2), sub_32))>; 2345 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), 2346 (EXTSW (VEXTUWLX (LI8 8), $S))>; 2347 def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), 2348 (EXTSW (VEXTUWLX (LI8 12), $S))>; 2349 2350 def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), 2351 (i32 (EXTRACT_SUBREG (VEXTUBLX $Idx, $S), sub_32))>; 2352 def : Pat<(i32 (vector_extract v16i8:$S, 0)), 2353 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 0), $S), sub_32))>; 2354 def : Pat<(i32 (vector_extract v16i8:$S, 1)), 2355 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 1), $S), sub_32))>; 2356 def : Pat<(i32 (vector_extract v16i8:$S, 2)), 2357 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 2), $S), sub_32))>; 2358 def : Pat<(i32 (vector_extract v16i8:$S, 3)), 2359 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 3), $S), sub_32))>; 2360 def : Pat<(i32 (vector_extract v16i8:$S, 4)), 2361 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 4), $S), sub_32))>; 2362 def : Pat<(i32 (vector_extract v16i8:$S, 5)), 2363 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 5), $S), sub_32))>; 2364 def : Pat<(i32 (vector_extract v16i8:$S, 6)), 2365 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 6), $S), sub_32))>; 2366 def : Pat<(i32 (vector_extract v16i8:$S, 7)), 2367 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 7), $S), sub_32))>; 2368 def : Pat<(i32 (vector_extract v16i8:$S, 8)), 2369 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 8), $S), sub_32))>; 2370 def : Pat<(i32 (vector_extract v16i8:$S, 9)), 2371 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 9), $S), sub_32))>; 2372 def : Pat<(i32 (vector_extract v16i8:$S, 10)), 2373 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 10), $S), sub_32))>; 2374 def : Pat<(i32 (vector_extract v16i8:$S, 11)), 2375 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 11), $S), sub_32))>; 2376 def : Pat<(i32 (vector_extract v16i8:$S, 12)), 2377 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 12), $S), sub_32))>; 2378 def : Pat<(i32 (vector_extract v16i8:$S, 13)), 2379 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 13), $S), sub_32))>; 2380 def : Pat<(i32 (vector_extract v16i8:$S, 14)), 2381 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 14), $S), sub_32))>; 2382 def : Pat<(i32 (vector_extract v16i8:$S, 15)), 2383 (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 15), $S), sub_32))>; 2384 2385 def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), 2386 (i32 (EXTRACT_SUBREG (VEXTUHLX 2387 (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>; 2388 def : Pat<(i32 (vector_extract v8i16:$S, 0)), 2389 (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 0), $S), sub_32))>; 2390 def : Pat<(i32 (vector_extract v8i16:$S, 1)), 2391 (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 2), $S), sub_32))>; 2392 def : Pat<(i32 (vector_extract v8i16:$S, 2)), 2393 (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 4), $S), sub_32))>; 2394 def : Pat<(i32 (vector_extract v8i16:$S, 3)), 2395 (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 6), $S), sub_32))>; 2396 def : Pat<(i32 (vector_extract v8i16:$S, 4)), 2397 (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 8), $S), sub_32))>; 2398 def : Pat<(i32 (vector_extract v8i16:$S, 5)), 2399 (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 10), $S), sub_32))>; 2400 def : Pat<(i32 (vector_extract v8i16:$S, 6)), 2401 (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 12), $S), sub_32))>; 2402 def : Pat<(i32 (vector_extract v8i16:$S, 6)), 2403 (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 14), $S), sub_32))>; 2404 2405 def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), 2406 (i32 (EXTRACT_SUBREG (VEXTUWLX 2407 (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>; 2408 def : Pat<(i32 (vector_extract v4i32:$S, 0)), 2409 (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 0), $S), sub_32))>; 2410 // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX 2411 def : Pat<(i32 (vector_extract v4i32:$S, 1)), 2412 (i32 VectorExtractions.LE_WORD_2)>; 2413 def : Pat<(i32 (vector_extract v4i32:$S, 2)), 2414 (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 8), $S), sub_32))>; 2415 def : Pat<(i32 (vector_extract v4i32:$S, 3)), 2416 (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 12), $S), sub_32))>; 2417} 2418 2419let Predicates = [IsLittleEndian, HasDirectMove] in { 2420 // v16i8 scalar <-> vector conversions (LE) 2421 def : Pat<(v16i8 (scalar_to_vector i32:$A)), 2422 (v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>; 2423 def : Pat<(v8i16 (scalar_to_vector i32:$A)), 2424 (v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>; 2425 def : Pat<(v4i32 (scalar_to_vector i32:$A)), 2426 (v4i32 MovesToVSR.LE_WORD_0)>; 2427 def : Pat<(v2i64 (scalar_to_vector i64:$A)), 2428 (v2i64 MovesToVSR.LE_DWORD_0)>; 2429 // v2i64 scalar <-> vector conversions (LE) 2430 def : Pat<(i64 (vector_extract v2i64:$S, 0)), 2431 (i64 VectorExtractions.LE_DWORD_0)>; 2432 def : Pat<(i64 (vector_extract v2i64:$S, 1)), 2433 (i64 VectorExtractions.LE_DWORD_1)>; 2434 def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), 2435 (i64 VectorExtractions.LE_VARIABLE_DWORD)>; 2436} // IsLittleEndian, HasDirectMove 2437 2438let Predicates = [IsLittleEndian, HasDirectMove, NoP9Altivec] in { 2439 def : Pat<(i32 (vector_extract v16i8:$S, 0)), 2440 (i32 VectorExtractions.LE_BYTE_0)>; 2441 def : Pat<(i32 (vector_extract v16i8:$S, 1)), 2442 (i32 VectorExtractions.LE_BYTE_1)>; 2443 def : Pat<(i32 (vector_extract v16i8:$S, 2)), 2444 (i32 VectorExtractions.LE_BYTE_2)>; 2445 def : Pat<(i32 (vector_extract v16i8:$S, 3)), 2446 (i32 VectorExtractions.LE_BYTE_3)>; 2447 def : Pat<(i32 (vector_extract v16i8:$S, 4)), 2448 (i32 VectorExtractions.LE_BYTE_4)>; 2449 def : Pat<(i32 (vector_extract v16i8:$S, 5)), 2450 (i32 VectorExtractions.LE_BYTE_5)>; 2451 def : Pat<(i32 (vector_extract v16i8:$S, 6)), 2452 (i32 VectorExtractions.LE_BYTE_6)>; 2453 def : Pat<(i32 (vector_extract v16i8:$S, 7)), 2454 (i32 VectorExtractions.LE_BYTE_7)>; 2455 def : Pat<(i32 (vector_extract v16i8:$S, 8)), 2456 (i32 VectorExtractions.LE_BYTE_8)>; 2457 def : Pat<(i32 (vector_extract v16i8:$S, 9)), 2458 (i32 VectorExtractions.LE_BYTE_9)>; 2459 def : Pat<(i32 (vector_extract v16i8:$S, 10)), 2460 (i32 VectorExtractions.LE_BYTE_10)>; 2461 def : Pat<(i32 (vector_extract v16i8:$S, 11)), 2462 (i32 VectorExtractions.LE_BYTE_11)>; 2463 def : Pat<(i32 (vector_extract v16i8:$S, 12)), 2464 (i32 VectorExtractions.LE_BYTE_12)>; 2465 def : Pat<(i32 (vector_extract v16i8:$S, 13)), 2466 (i32 VectorExtractions.LE_BYTE_13)>; 2467 def : Pat<(i32 (vector_extract v16i8:$S, 14)), 2468 (i32 VectorExtractions.LE_BYTE_14)>; 2469 def : Pat<(i32 (vector_extract v16i8:$S, 15)), 2470 (i32 VectorExtractions.LE_BYTE_15)>; 2471 def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), 2472 (i32 VectorExtractions.LE_VARIABLE_BYTE)>; 2473 2474 // v8i16 scalar <-> vector conversions (LE) 2475 def : Pat<(i32 (vector_extract v8i16:$S, 0)), 2476 (i32 VectorExtractions.LE_HALF_0)>; 2477 def : Pat<(i32 (vector_extract v8i16:$S, 1)), 2478 (i32 VectorExtractions.LE_HALF_1)>; 2479 def : Pat<(i32 (vector_extract v8i16:$S, 2)), 2480 (i32 VectorExtractions.LE_HALF_2)>; 2481 def : Pat<(i32 (vector_extract v8i16:$S, 3)), 2482 (i32 VectorExtractions.LE_HALF_3)>; 2483 def : Pat<(i32 (vector_extract v8i16:$S, 4)), 2484 (i32 VectorExtractions.LE_HALF_4)>; 2485 def : Pat<(i32 (vector_extract v8i16:$S, 5)), 2486 (i32 VectorExtractions.LE_HALF_5)>; 2487 def : Pat<(i32 (vector_extract v8i16:$S, 6)), 2488 (i32 VectorExtractions.LE_HALF_6)>; 2489 def : Pat<(i32 (vector_extract v8i16:$S, 7)), 2490 (i32 VectorExtractions.LE_HALF_7)>; 2491 def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), 2492 (i32 VectorExtractions.LE_VARIABLE_HALF)>; 2493 2494 // v4i32 scalar <-> vector conversions (LE) 2495 def : Pat<(i32 (vector_extract v4i32:$S, 0)), 2496 (i32 VectorExtractions.LE_WORD_0)>; 2497 def : Pat<(i32 (vector_extract v4i32:$S, 1)), 2498 (i32 VectorExtractions.LE_WORD_1)>; 2499 def : Pat<(i32 (vector_extract v4i32:$S, 2)), 2500 (i32 VectorExtractions.LE_WORD_2)>; 2501 def : Pat<(i32 (vector_extract v4i32:$S, 3)), 2502 (i32 VectorExtractions.LE_WORD_3)>; 2503 def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), 2504 (i32 VectorExtractions.LE_VARIABLE_WORD)>; 2505} // IsLittleEndian, HasDirectMove, NoP9Altivec 2506 2507let Predicates = [HasDirectMove, HasVSX] in { 2508// bitconvert f32 -> i32 2509// (convert to 32-bit fp single, shift right 1 word, move to GPR) 2510def : Pat<(i32 (bitconvert f32:$S)), 2511 (i32 (MFVSRWZ (EXTRACT_SUBREG 2512 (XXSLDWI (XSCVDPSPN $S), (XSCVDPSPN $S), 3), 2513 sub_64)))>; 2514// bitconvert i32 -> f32 2515// (move to FPR, shift left 1 word, convert to 64-bit fp single) 2516def : Pat<(f32 (bitconvert i32:$A)), 2517 (f32 (XSCVSPDPN 2518 (XXSLDWI MovesToVSR.LE_WORD_1, MovesToVSR.LE_WORD_1, 1)))>; 2519 2520// bitconvert f64 -> i64 2521// (move to GPR, nothing else needed) 2522def : Pat<(i64 (bitconvert f64:$S)), 2523 (i64 (MFVSRD $S))>; 2524 2525// bitconvert i64 -> f64 2526// (move to FPR, nothing else needed) 2527def : Pat<(f64 (bitconvert i64:$S)), 2528 (f64 (MTVSRD $S))>; 2529 2530// Rounding to integer. 2531def : Pat<(i64 (lrint f64:$S)), 2532 (i64 (MFVSRD (FCTID $S)))>; 2533def : Pat<(i64 (lrint f32:$S)), 2534 (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>; 2535def : Pat<(i64 (llrint f64:$S)), 2536 (i64 (MFVSRD (FCTID $S)))>; 2537def : Pat<(i64 (llrint f32:$S)), 2538 (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>; 2539def : Pat<(i64 (lround f64:$S)), 2540 (i64 (MFVSRD (FCTID (XSRDPI $S))))>; 2541def : Pat<(i64 (lround f32:$S)), 2542 (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>; 2543def : Pat<(i64 (llround f64:$S)), 2544 (i64 (MFVSRD (FCTID (XSRDPI $S))))>; 2545def : Pat<(i64 (llround f32:$S)), 2546 (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>; 2547} 2548 2549let Predicates = [HasVSX] in { 2550// Rounding for single precision. 2551def : Pat<(f32 (fround f32:$S)), 2552 (f32 (COPY_TO_REGCLASS (XSRDPI 2553 (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; 2554def : Pat<(f32 (fnearbyint f32:$S)), 2555 (f32 (COPY_TO_REGCLASS (XSRDPIC 2556 (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; 2557def : Pat<(f32 (ffloor f32:$S)), 2558 (f32 (COPY_TO_REGCLASS (XSRDPIM 2559 (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; 2560def : Pat<(f32 (fceil f32:$S)), 2561 (f32 (COPY_TO_REGCLASS (XSRDPIP 2562 (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; 2563def : Pat<(f32 (ftrunc f32:$S)), 2564 (f32 (COPY_TO_REGCLASS (XSRDPIZ 2565 (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; 2566} 2567 2568// Materialize a zero-vector of long long 2569def : Pat<(v2i64 immAllZerosV), 2570 (v2i64 (XXLXORz))>; 2571} 2572 2573def AlignValues { 2574 dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3)); 2575 dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC); 2576} 2577 2578// The following VSX instructions were introduced in Power ISA 3.0 2579def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">; 2580let AddedComplexity = 400, Predicates = [HasP9Vector] in { 2581 2582 // [PO VRT XO VRB XO /] 2583 class X_VT5_XO5_VB5<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc, 2584 list<dag> pattern> 2585 : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vrrc:$vB), 2586 !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>; 2587 2588 // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /] 2589 class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc, 2590 list<dag> pattern> 2591 : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isRecordForm; 2592 2593 // [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less), 2594 // So we use different operand class for VRB 2595 class X_VT5_XO5_VB5_TyVB<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc, 2596 RegisterOperand vbtype, list<dag> pattern> 2597 : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vbtype:$vB), 2598 !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>; 2599 2600 // [PO VRT XO VRB XO /] 2601 class X_VT5_XO5_VB5_VSFR<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc, 2602 list<dag> pattern> 2603 : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vfrc:$vT), (ins vrrc:$vB), 2604 !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>; 2605 2606 // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /] 2607 class X_VT5_XO5_VB5_VSFR_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc, 2608 list<dag> pattern> 2609 : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isRecordForm; 2610 2611 // [PO T XO B XO BX /] 2612 class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc, 2613 list<dag> pattern> 2614 : XX2_RD5_XO5_RS6<opcode, xo2, xo, (outs g8rc:$rT), (ins vsfrc:$XB), 2615 !strconcat(opc, " $rT, $XB"), IIC_VecFP, pattern>; 2616 2617 // [PO T XO B XO BX TX] 2618 class XX2_XT6_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc, 2619 RegisterOperand vtype, list<dag> pattern> 2620 : XX2_RD6_XO5_RS6<opcode, xo2, xo, (outs vtype:$XT), (ins vtype:$XB), 2621 !strconcat(opc, " $XT, $XB"), IIC_VecFP, pattern>; 2622 2623 // [PO T A B XO AX BX TX], src and dest register use different operand class 2624 class XX3_XT5_XA5_XB5<bits<6> opcode, bits<8> xo, string opc, 2625 RegisterOperand xty, RegisterOperand aty, RegisterOperand bty, 2626 InstrItinClass itin, list<dag> pattern> 2627 : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB), 2628 !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>; 2629 2630 // [PO VRT VRA VRB XO /] 2631 class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc, 2632 list<dag> pattern> 2633 : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vA, vrrc:$vB), 2634 !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>; 2635 2636 // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /] 2637 class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc, 2638 list<dag> pattern> 2639 : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isRecordForm; 2640 2641 // [PO VRT VRA VRB XO /] 2642 class X_VT5_VA5_VB5_FMA<bits<6> opcode, bits<10> xo, string opc, 2643 list<dag> pattern> 2644 : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vTi, vrrc:$vA, vrrc:$vB), 2645 !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>, 2646 RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">; 2647 2648 // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /] 2649 class X_VT5_VA5_VB5_FMA_Ro<bits<6> opcode, bits<10> xo, string opc, 2650 list<dag> pattern> 2651 : X_VT5_VA5_VB5_FMA<opcode, xo, opc, pattern>, isRecordForm; 2652 2653 //===--------------------------------------------------------------------===// 2654 // Quad-Precision Scalar Move Instructions: 2655 2656 // Copy Sign 2657 def XSCPSGNQP : X_VT5_VA5_VB5<63, 100, "xscpsgnqp", 2658 [(set f128:$vT, 2659 (fcopysign f128:$vB, f128:$vA))]>; 2660 2661 // Absolute/Negative-Absolute/Negate 2662 def XSABSQP : X_VT5_XO5_VB5<63, 0, 804, "xsabsqp", 2663 [(set f128:$vT, (fabs f128:$vB))]>; 2664 def XSNABSQP : X_VT5_XO5_VB5<63, 8, 804, "xsnabsqp", 2665 [(set f128:$vT, (fneg (fabs f128:$vB)))]>; 2666 def XSNEGQP : X_VT5_XO5_VB5<63, 16, 804, "xsnegqp", 2667 [(set f128:$vT, (fneg f128:$vB))]>; 2668 2669 //===--------------------------------------------------------------------===// 2670 // Quad-Precision Scalar Floating-Point Arithmetic Instructions: 2671 2672 // Add/Divide/Multiply/Subtract 2673 let isCommutable = 1 in { 2674 def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp", 2675 [(set f128:$vT, (fadd f128:$vA, f128:$vB))]>; 2676 def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp", 2677 [(set f128:$vT, (fmul f128:$vA, f128:$vB))]>; 2678 } 2679 def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" , 2680 [(set f128:$vT, (fsub f128:$vA, f128:$vB))]>; 2681 def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp", 2682 [(set f128:$vT, (fdiv f128:$vA, f128:$vB))]>; 2683 // Square-Root 2684 def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp", 2685 [(set f128:$vT, (fsqrt f128:$vB))]>; 2686 // (Negative) Multiply-{Add/Subtract} 2687 def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp", 2688 [(set f128:$vT, 2689 (fma f128:$vA, f128:$vB, 2690 f128:$vTi))]>; 2691 def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" , 2692 [(set f128:$vT, 2693 (fma f128:$vA, f128:$vB, 2694 (fneg f128:$vTi)))]>; 2695 def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp", 2696 [(set f128:$vT, 2697 (fneg (fma f128:$vA, f128:$vB, 2698 f128:$vTi)))]>; 2699 def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp", 2700 [(set f128:$vT, 2701 (fneg (fma f128:$vA, f128:$vB, 2702 (fneg f128:$vTi))))]>; 2703 2704 let isCommutable = 1 in { 2705 def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo", 2706 [(set f128:$vT, 2707 (int_ppc_addf128_round_to_odd 2708 f128:$vA, f128:$vB))]>; 2709 def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo", 2710 [(set f128:$vT, 2711 (int_ppc_mulf128_round_to_odd 2712 f128:$vA, f128:$vB))]>; 2713 } 2714 def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo", 2715 [(set f128:$vT, 2716 (int_ppc_subf128_round_to_odd 2717 f128:$vA, f128:$vB))]>; 2718 def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo", 2719 [(set f128:$vT, 2720 (int_ppc_divf128_round_to_odd 2721 f128:$vA, f128:$vB))]>; 2722 def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo", 2723 [(set f128:$vT, 2724 (int_ppc_sqrtf128_round_to_odd f128:$vB))]>; 2725 2726 2727 def XSMADDQPO : X_VT5_VA5_VB5_FMA_Ro<63, 388, "xsmaddqpo", 2728 [(set f128:$vT, 2729 (int_ppc_fmaf128_round_to_odd 2730 f128:$vA,f128:$vB,f128:$vTi))]>; 2731 2732 def XSMSUBQPO : X_VT5_VA5_VB5_FMA_Ro<63, 420, "xsmsubqpo" , 2733 [(set f128:$vT, 2734 (int_ppc_fmaf128_round_to_odd 2735 f128:$vA, f128:$vB, (fneg f128:$vTi)))]>; 2736 def XSNMADDQPO: X_VT5_VA5_VB5_FMA_Ro<63, 452, "xsnmaddqpo", 2737 [(set f128:$vT, 2738 (fneg (int_ppc_fmaf128_round_to_odd 2739 f128:$vA, f128:$vB, f128:$vTi)))]>; 2740 def XSNMSUBQPO: X_VT5_VA5_VB5_FMA_Ro<63, 484, "xsnmsubqpo", 2741 [(set f128:$vT, 2742 (fneg (int_ppc_fmaf128_round_to_odd 2743 f128:$vA, f128:$vB, (fneg f128:$vTi))))]>; 2744 2745 // Additional fnmsub patterns: -a*b + c == -(a*b - c) 2746 def : Pat<(fma (fneg f128:$A), f128:$B, f128:$C), (XSNMSUBQP $C, $A, $B)>; 2747 def : Pat<(fma f128:$A, (fneg f128:$B), f128:$C), (XSNMSUBQP $C, $A, $B)>; 2748 2749 //===--------------------------------------------------------------------===// 2750 // Quad/Double-Precision Compare Instructions: 2751 2752 // [PO BF // VRA VRB XO /] 2753 class X_BF3_VA5_VB5<bits<6> opcode, bits<10> xo, string opc, 2754 list<dag> pattern> 2755 : XForm_17<opcode, xo, (outs crrc:$crD), (ins vrrc:$VA, vrrc:$VB), 2756 !strconcat(opc, " $crD, $VA, $VB"), IIC_FPCompare> { 2757 let Pattern = pattern; 2758 } 2759 2760 // QP Compare Ordered/Unordered 2761 def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>; 2762 def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>; 2763 2764 // DP/QP Compare Exponents 2765 def XSCMPEXPDP : XX3Form_1<60, 59, 2766 (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), 2767 "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>; 2768 def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>; 2769 2770 // DP Compare ==, >=, >, != 2771 // Use vsrc for XT, because the entire register of XT is set. 2772 // XT.dword[1] = 0x0000_0000_0000_0000 2773 def XSCMPEQDP : XX3_XT5_XA5_XB5<60, 3, "xscmpeqdp", vsrc, vsfrc, vsfrc, 2774 IIC_FPCompare, []>; 2775 def XSCMPGEDP : XX3_XT5_XA5_XB5<60, 19, "xscmpgedp", vsrc, vsfrc, vsfrc, 2776 IIC_FPCompare, []>; 2777 def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc, 2778 IIC_FPCompare, []>; 2779 2780 //===--------------------------------------------------------------------===// 2781 // Quad-Precision Floating-Point Conversion Instructions: 2782 2783 // Convert DP -> QP 2784 def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc, 2785 [(set f128:$vT, (fpextend f64:$vB))]>; 2786 2787 // Round & Convert QP -> DP (dword[1] is set to zero) 2788 def XSCVQPDP : X_VT5_XO5_VB5_VSFR<63, 20, 836, "xscvqpdp" , []>; 2789 def XSCVQPDPO : X_VT5_XO5_VB5_VSFR_Ro<63, 20, 836, "xscvqpdpo", 2790 [(set f64:$vT, 2791 (int_ppc_truncf128_round_to_odd 2792 f128:$vB))]>; 2793 2794 // Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero) 2795 def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>; 2796 def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz", []>; 2797 def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>; 2798 def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz", []>; 2799 2800 // Convert (Un)Signed DWord -> QP. 2801 def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vfrc, []>; 2802 def : Pat<(f128 (sint_to_fp i64:$src)), 2803 (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>; 2804 def : Pat<(f128 (sint_to_fp (i64 (PPCmfvsr f64:$src)))), 2805 (f128 (XSCVSDQP $src))>; 2806 def : Pat<(f128 (sint_to_fp (i32 (PPCmfvsr f64:$src)))), 2807 (f128 (XSCVSDQP (VEXTSW2Ds $src)))>; 2808 2809 def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vfrc, []>; 2810 def : Pat<(f128 (uint_to_fp i64:$src)), 2811 (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>; 2812 def : Pat<(f128 (uint_to_fp (i64 (PPCmfvsr f64:$src)))), 2813 (f128 (XSCVUDQP $src))>; 2814 2815 // Convert (Un)Signed Word -> QP. 2816 def : Pat<(f128 (sint_to_fp i32:$src)), 2817 (f128 (XSCVSDQP (MTVSRWA $src)))>; 2818 def : Pat<(f128 (sint_to_fp (i32 (load xoaddr:$src)))), 2819 (f128 (XSCVSDQP (LIWAX xoaddr:$src)))>; 2820 def : Pat<(f128 (uint_to_fp i32:$src)), 2821 (f128 (XSCVUDQP (MTVSRWZ $src)))>; 2822 def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))), 2823 (f128 (XSCVUDQP (LIWZX xoaddr:$src)))>; 2824 2825 //===--------------------------------------------------------------------===// 2826 // Round to Floating-Point Integer Instructions 2827 2828 // (Round &) Convert DP <-> HP 2829 // Note! xscvdphp's src and dest register both use the left 64 bits, so we use 2830 // vsfrc for src and dest register. xscvhpdp's src only use the left 16 bits, 2831 // but we still use vsfrc for it. 2832 def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>; 2833 def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>; 2834 2835 // Vector HP -> SP 2836 def XVCVHPSP : XX2_XT6_XO5_XB6<60, 24, 475, "xvcvhpsp", vsrc, []>; 2837 def XVCVSPHP : XX2_XT6_XO5_XB6<60, 25, 475, "xvcvsphp", vsrc, 2838 [(set v4f32:$XT, 2839 (int_ppc_vsx_xvcvsphp v4f32:$XB))]>; 2840 2841 // Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a 2842 // separate pattern so that it can convert the input register class from 2843 // VRRC(v8i16) to VSRC. 2844 def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)), 2845 (v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>; 2846 2847 class Z23_VT5_R1_VB5_RMC2_EX1<bits<6> opcode, bits<8> xo, bit ex, string opc, 2848 list<dag> pattern> 2849 : Z23Form_8<opcode, xo, 2850 (outs vrrc:$vT), (ins u1imm:$r, vrrc:$vB, u2imm:$rmc), 2851 !strconcat(opc, " $r, $vT, $vB, $rmc"), IIC_VecFP, pattern> { 2852 let RC = ex; 2853 } 2854 2855 // Round to Quad-Precision Integer [with Inexact] 2856 def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>; 2857 def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>; 2858 2859 // Use current rounding mode 2860 def : Pat<(f128 (fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>; 2861 // Round to nearest, ties away from zero 2862 def : Pat<(f128 (fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>; 2863 // Round towards Zero 2864 def : Pat<(f128 (ftrunc f128:$vB)), (f128 (XSRQPI 1, $vB, 1))>; 2865 // Round towards +Inf 2866 def : Pat<(f128 (fceil f128:$vB)), (f128 (XSRQPI 1, $vB, 2))>; 2867 // Round towards -Inf 2868 def : Pat<(f128 (ffloor f128:$vB)), (f128 (XSRQPI 1, $vB, 3))>; 2869 2870 // Use current rounding mode, [with Inexact] 2871 def : Pat<(f128 (frint f128:$vB)), (f128 (XSRQPIX 0, $vB, 3))>; 2872 2873 // Round Quad-Precision to Double-Extended Precision (fp80) 2874 def XSRQPXP : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>; 2875 2876 //===--------------------------------------------------------------------===// 2877 // Insert/Extract Instructions 2878 2879 // Insert Exponent DP/QP 2880 // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU 2881 def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB), 2882 "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>; 2883 // vB NOTE: only vB.dword[0] is used, that's why we don't use 2884 // X_VT5_VA5_VB5 form 2885 def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB), 2886 "xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>; 2887 2888 def : Pat<(f128 (int_ppc_scalar_insert_exp_qp f128:$vA, i64:$vB)), 2889 (f128 (XSIEXPQP $vA, (MTVSRD $vB)))>; 2890 2891 // Extract Exponent/Significand DP/QP 2892 def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>; 2893 def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>; 2894 2895 def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>; 2896 def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>; 2897 2898 def : Pat<(i64 (int_ppc_scalar_extract_expq f128:$vA)), 2899 (i64 (MFVSRD (EXTRACT_SUBREG 2900 (v2i64 (XSXEXPQP $vA)), sub_64)))>; 2901 2902 // Vector Insert Word 2903 // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB. 2904 def XXINSERTW : 2905 XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT), 2906 (ins vsrc:$XTi, vsrc:$XB, u4imm:$UIM), 2907 "xxinsertw $XT, $XB, $UIM", IIC_VecFP, 2908 [(set v4i32:$XT, (PPCvecinsert v4i32:$XTi, v4i32:$XB, 2909 imm32SExt16:$UIM))]>, 2910 RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; 2911 2912 // Vector Extract Unsigned Word 2913 def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165, 2914 (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM), 2915 "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>; 2916 2917 // Vector Insert Exponent DP/SP 2918 def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc, 2919 IIC_VecFP, [(set v2f64: $XT,(int_ppc_vsx_xviexpdp v2i64:$XA, v2i64:$XB))]>; 2920 def XVIEXPSP : XX3_XT5_XA5_XB5<60, 216, "xviexpsp", vsrc, vsrc, vsrc, 2921 IIC_VecFP, [(set v4f32: $XT,(int_ppc_vsx_xviexpsp v4i32:$XA, v4i32:$XB))]>; 2922 2923 // Vector Extract Exponent/Significand DP/SP 2924 def XVXEXPDP : XX2_XT6_XO5_XB6<60, 0, 475, "xvxexpdp", vsrc, 2925 [(set v2i64: $XT, 2926 (int_ppc_vsx_xvxexpdp v2f64:$XB))]>; 2927 def XVXEXPSP : XX2_XT6_XO5_XB6<60, 8, 475, "xvxexpsp", vsrc, 2928 [(set v4i32: $XT, 2929 (int_ppc_vsx_xvxexpsp v4f32:$XB))]>; 2930 def XVXSIGDP : XX2_XT6_XO5_XB6<60, 1, 475, "xvxsigdp", vsrc, 2931 [(set v2i64: $XT, 2932 (int_ppc_vsx_xvxsigdp v2f64:$XB))]>; 2933 def XVXSIGSP : XX2_XT6_XO5_XB6<60, 9, 475, "xvxsigsp", vsrc, 2934 [(set v4i32: $XT, 2935 (int_ppc_vsx_xvxsigsp v4f32:$XB))]>; 2936 2937 let AddedComplexity = 400, Predicates = [HasP9Vector] in { 2938 // Extra patterns expanding to vector Extract Word/Insert Word 2939 def : Pat<(v4i32 (int_ppc_vsx_xxinsertw v4i32:$A, v2i64:$B, imm:$IMM)), 2940 (v4i32 (XXINSERTW $A, $B, imm:$IMM))>; 2941 def : Pat<(v2i64 (int_ppc_vsx_xxextractuw v2i64:$A, imm:$IMM)), 2942 (v2i64 (COPY_TO_REGCLASS (XXEXTRACTUW $A, imm:$IMM), VSRC))>; 2943 } // AddedComplexity = 400, HasP9Vector 2944 2945 //===--------------------------------------------------------------------===// 2946 2947 // Test Data Class SP/DP/QP 2948 def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298, 2949 (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), 2950 "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>; 2951 def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362, 2952 (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB), 2953 "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>; 2954 def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708, 2955 (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB), 2956 "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>; 2957 2958 // Vector Test Data Class SP/DP 2959 def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5, 2960 (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), 2961 "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP, 2962 [(set v4i32: $XT, 2963 (int_ppc_vsx_xvtstdcsp v4f32:$XB, timm:$DCMX))]>; 2964 def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5, 2965 (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), 2966 "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP, 2967 [(set v2i64: $XT, 2968 (int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>; 2969 2970 //===--------------------------------------------------------------------===// 2971 2972 // Maximum/Minimum Type-C/Type-J DP 2973 def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsfrc, vsfrc, vsfrc, 2974 IIC_VecFP, 2975 [(set f64:$XT, (PPCxsmaxc f64:$XA, f64:$XB))]>; 2976 def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc, 2977 IIC_VecFP, []>; 2978 def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsfrc, vsfrc, vsfrc, 2979 IIC_VecFP, 2980 [(set f64:$XT, (PPCxsminc f64:$XA, f64:$XB))]>; 2981 def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc, 2982 IIC_VecFP, []>; 2983 2984 //===--------------------------------------------------------------------===// 2985 2986 // Vector Byte-Reverse H/W/D/Q Word 2987 def XXBRH : XX2_XT6_XO5_XB6<60, 7, 475, "xxbrh", vsrc, []>; 2988 def XXBRW : XX2_XT6_XO5_XB6<60, 15, 475, "xxbrw", vsrc, 2989 [(set v4i32:$XT, (bswap v4i32:$XB))]>; 2990 def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc, 2991 [(set v2i64:$XT, (bswap v2i64:$XB))]>; 2992 def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>; 2993 2994 // Vector Reverse 2995 def : Pat<(v8i16 (bswap v8i16 :$A)), 2996 (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; 2997 def : Pat<(v1i128 (bswap v1i128 :$A)), 2998 (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; 2999 3000 // Vector Permute 3001 def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc, 3002 IIC_VecPerm, []>; 3003 def XXPERMR : XX3_XT5_XA5_XB5<60, 58, "xxpermr", vsrc, vsrc, vsrc, 3004 IIC_VecPerm, []>; 3005 3006 // Vector Splat Immediate Byte 3007 def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8), 3008 "xxspltib $XT, $IMM8", IIC_VecPerm, []>; 3009 3010 //===--------------------------------------------------------------------===// 3011 // Vector/Scalar Load/Store Instructions 3012 3013 // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in 3014 // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging. 3015 let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in { 3016 // Load Vector 3017 def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src), 3018 "lxv $XT, $src", IIC_LdStLFD, []>; 3019 // Load DWord 3020 def LXSD : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src), 3021 "lxsd $vD, $src", IIC_LdStLFD, []>; 3022 // Load SP from src, convert it to DP, and place in dword[0] 3023 def LXSSP : DSForm_1<57, 3, (outs vfrc:$vD), (ins memrix:$src), 3024 "lxssp $vD, $src", IIC_LdStLFD, []>; 3025 3026 // [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different 3027 // "out" and "in" dag 3028 class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, 3029 RegisterOperand vtype, list<dag> pattern> 3030 : XX1Form_memOp<opcode, xo, (outs vtype:$XT), (ins memrr:$src), 3031 !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>; 3032 3033 // Load as Integer Byte/Halfword & Zero Indexed 3034 def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc, 3035 [(set f64:$XT, (PPClxsizx xoaddr:$src, 1))]>; 3036 def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc, 3037 [(set f64:$XT, (PPClxsizx xoaddr:$src, 2))]>; 3038 3039 // Load Vector Halfword*8/Byte*16 Indexed 3040 def LXVH8X : X_XT6_RA5_RB5<31, 812, "lxvh8x" , vsrc, []>; 3041 def LXVB16X : X_XT6_RA5_RB5<31, 876, "lxvb16x", vsrc, []>; 3042 3043 // Load Vector Indexed 3044 def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc, 3045 [(set v2f64:$XT, (load xaddrX16:$src))]>; 3046 // Load Vector (Left-justified) with Length 3047 def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB), 3048 "lxvl $XT, $src, $rB", IIC_LdStLoad, 3049 [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>; 3050 def LXVLL : XX1Form_memOp<31,301, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB), 3051 "lxvll $XT, $src, $rB", IIC_LdStLoad, 3052 [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>; 3053 3054 // Load Vector Word & Splat Indexed 3055 def LXVWSX : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>; 3056 } // mayLoad 3057 3058 // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in 3059 // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging. 3060 let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in { 3061 // Store Vector 3062 def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst), 3063 "stxv $XT, $dst", IIC_LdStSTFD, []>; 3064 // Store DWord 3065 def STXSD : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst), 3066 "stxsd $vS, $dst", IIC_LdStSTFD, []>; 3067 // Convert DP of dword[0] to SP, and Store to dst 3068 def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$vS, memrix:$dst), 3069 "stxssp $vS, $dst", IIC_LdStSTFD, []>; 3070 3071 // [PO S RA RB XO SX] 3072 class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, 3073 RegisterOperand vtype, list<dag> pattern> 3074 : XX1Form_memOp<opcode, xo, (outs), (ins vtype:$XT, memrr:$dst), 3075 !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>; 3076 3077 // Store as Integer Byte/Halfword Indexed 3078 def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc, 3079 [(PPCstxsix f64:$XT, xoaddr:$dst, 1)]>; 3080 def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc, 3081 [(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>; 3082 let isCodeGenOnly = 1 in { 3083 def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsrc, []>; 3084 def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsrc, []>; 3085 } 3086 3087 // Store Vector Halfword*8/Byte*16 Indexed 3088 def STXVH8X : X_XS6_RA5_RB5<31, 940, "stxvh8x" , vsrc, []>; 3089 def STXVB16X : X_XS6_RA5_RB5<31, 1004, "stxvb16x", vsrc, []>; 3090 3091 // Store Vector Indexed 3092 def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc, 3093 [(store v2f64:$XT, xaddrX16:$dst)]>; 3094 3095 // Store Vector (Left-justified) with Length 3096 def STXVL : XX1Form_memOp<31, 397, (outs), 3097 (ins vsrc:$XT, memr:$dst, g8rc:$rB), 3098 "stxvl $XT, $dst, $rB", IIC_LdStLoad, 3099 [(int_ppc_vsx_stxvl v4i32:$XT, addr:$dst, 3100 i64:$rB)]>; 3101 def STXVLL : XX1Form_memOp<31, 429, (outs), 3102 (ins vsrc:$XT, memr:$dst, g8rc:$rB), 3103 "stxvll $XT, $dst, $rB", IIC_LdStLoad, 3104 [(int_ppc_vsx_stxvll v4i32:$XT, addr:$dst, 3105 i64:$rB)]>; 3106 } // mayStore 3107 3108 let Predicates = [IsLittleEndian] in { 3109 def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), 3110 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>; 3111 def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), 3112 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>; 3113 def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), 3114 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>; 3115 def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), 3116 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>; 3117 def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), 3118 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>; 3119 def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), 3120 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>; 3121 def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), 3122 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>; 3123 def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), 3124 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>; 3125 } 3126 3127 let Predicates = [IsBigEndian] in { 3128 def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), 3129 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>; 3130 def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), 3131 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>; 3132 def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), 3133 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>; 3134 def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), 3135 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>; 3136 def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))), 3137 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>; 3138 def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))), 3139 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>; 3140 def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))), 3141 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>; 3142 def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))), 3143 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>; 3144 } 3145 3146 // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead 3147 // of f64 3148 def : Pat<(v8i16 (PPCmtvsrz i32:$A)), 3149 (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>; 3150 def : Pat<(v16i8 (PPCmtvsrz i32:$A)), 3151 (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>; 3152 3153 // Patterns for which instructions from ISA 3.0 are a better match 3154 let Predicates = [IsLittleEndian, HasP9Vector] in { 3155 def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), 3156 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>; 3157 def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), 3158 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>; 3159 def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), 3160 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>; 3161 def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), 3162 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; 3163 def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), 3164 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>; 3165 def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), 3166 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>; 3167 def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), 3168 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>; 3169 def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), 3170 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>; 3171 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), 3172 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; 3173 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), 3174 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>; 3175 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)), 3176 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>; 3177 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), 3178 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; 3179 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), 3180 (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; 3181 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), 3182 (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; 3183 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)), 3184 (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; 3185 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), 3186 (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; 3187 3188 def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)), 3189 (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>; 3190 def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst), 3191 (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; 3192 3193 def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)), 3194 (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>; 3195 def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst), 3196 (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; 3197 } // IsLittleEndian, HasP9Vector 3198 3199 let Predicates = [IsBigEndian, HasP9Vector] in { 3200 def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), 3201 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; 3202 def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), 3203 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>; 3204 def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), 3205 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>; 3206 def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), 3207 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>; 3208 def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), 3209 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>; 3210 def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), 3211 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>; 3212 def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))), 3213 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>; 3214 def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))), 3215 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>; 3216 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), 3217 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; 3218 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), 3219 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>; 3220 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)), 3221 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>; 3222 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), 3223 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; 3224 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), 3225 (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; 3226 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), 3227 (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; 3228 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)), 3229 (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; 3230 def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), 3231 (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; 3232 } // IsBigEndian, HasP9Vector 3233 3234 // D-Form Load/Store 3235 def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; 3236 def : Pat<(v4f32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; 3237 def : Pat<(v2i64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; 3238 def : Pat<(v2f64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; 3239 def : Pat<(f128 (quadwOffsetLoad iaddrX16:$src)), 3240 (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>; 3241 def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddrX16:$src)), (LXV memrix16:$src)>; 3242 def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddrX16:$src)), (LXV memrix16:$src)>; 3243 3244 def : Pat<(quadwOffsetStore v4f32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; 3245 def : Pat<(quadwOffsetStore v4i32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; 3246 def : Pat<(quadwOffsetStore v2f64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; 3247 def : Pat<(quadwOffsetStore f128:$rS, iaddrX16:$dst), 3248 (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>; 3249 def : Pat<(quadwOffsetStore v2i64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>; 3250 def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddrX16:$dst), 3251 (STXV $rS, memrix16:$dst)>; 3252 def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddrX16:$dst), 3253 (STXV $rS, memrix16:$dst)>; 3254 3255 3256 def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; 3257 def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; 3258 def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; 3259 def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>; 3260 def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>; 3261 def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>; 3262 def : Pat<(f128 (nonQuadwOffsetLoad xoaddr:$src)), 3263 (COPY_TO_REGCLASS (LXVX xoaddr:$src), VRRC)>; 3264 def : Pat<(nonQuadwOffsetStore f128:$rS, xoaddr:$dst), 3265 (STXVX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; 3266 def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst), 3267 (STXVX $rS, xoaddr:$dst)>; 3268 def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst), 3269 (STXVX $rS, xoaddr:$dst)>; 3270 def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst), 3271 (STXVX $rS, xoaddr:$dst)>; 3272 def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst), 3273 (STXVX $rS, xoaddr:$dst)>; 3274 def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), 3275 (STXVX $rS, xoaddr:$dst)>; 3276 def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), 3277 (STXVX $rS, xoaddr:$dst)>; 3278 3279 let AddedComplexity = 400 in { 3280 // LIWAX - This instruction is used for sign extending i32 -> i64. 3281 // LIWZX - This instruction will be emitted for i32, f32, and when 3282 // zero-extending i32 to i64 (zext i32 -> i64). 3283 let Predicates = [IsLittleEndian] in { 3284 3285 def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), 3286 (v2i64 (XXPERMDIs 3287 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC), 2))>; 3288 3289 def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), 3290 (v2i64 (XXPERMDIs 3291 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; 3292 3293 def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), 3294 (v4i32 (XXPERMDIs 3295 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; 3296 3297 def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), 3298 (v4f32 (XXPERMDIs 3299 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; 3300 } 3301 3302 let Predicates = [IsBigEndian] in { 3303 def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), 3304 (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>; 3305 3306 def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), 3307 (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>; 3308 3309 def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), 3310 (v4i32 (XXSLDWIs 3311 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; 3312 3313 def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), 3314 (v4f32 (XXSLDWIs 3315 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; 3316 } 3317 3318 } 3319 3320 // Build vectors from i8 loads 3321 def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)), 3322 (v16i8 (VSPLTBs 7, (LXSIBZX xoaddr:$src)))>; 3323 def : Pat<(v8i16 (scalar_to_vector ScalarLoads.ZELi8)), 3324 (v8i16 (VSPLTHs 3, (LXSIBZX xoaddr:$src)))>; 3325 def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)), 3326 (v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>; 3327 def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi8i64)), 3328 (v2i64 (XXPERMDIs (LXSIBZX xoaddr:$src), 0))>; 3329 def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi8)), 3330 (v4i32 (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1))>; 3331 def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi8i64)), 3332 (v2i64 (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0))>; 3333 3334 // Build vectors from i16 loads 3335 def : Pat<(v8i16 (scalar_to_vector ScalarLoads.Li16)), 3336 (v8i16 (VSPLTHs 3, (LXSIHZX xoaddr:$src)))>; 3337 def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi16)), 3338 (v4i32 (XXSPLTWs (LXSIHZX xoaddr:$src), 1))>; 3339 def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi16i64)), 3340 (v2i64 (XXPERMDIs (LXSIHZX xoaddr:$src), 0))>; 3341 def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi16)), 3342 (v4i32 (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1))>; 3343 def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)), 3344 (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>; 3345 3346 let Predicates = [IsBigEndian, HasP9Vector] in { 3347 // Scalar stores of i8 3348 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), 3349 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>; 3350 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), 3351 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; 3352 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), 3353 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>; 3354 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), 3355 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; 3356 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), 3357 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>; 3358 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), 3359 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; 3360 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), 3361 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>; 3362 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), 3363 (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; 3364 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), 3365 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>; 3366 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), 3367 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; 3368 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), 3369 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>; 3370 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), 3371 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; 3372 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), 3373 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>; 3374 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), 3375 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; 3376 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), 3377 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>; 3378 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), 3379 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; 3380 3381 // Scalar stores of i16 3382 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), 3383 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; 3384 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), 3385 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; 3386 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), 3387 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; 3388 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), 3389 (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; 3390 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), 3391 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; 3392 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), 3393 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; 3394 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), 3395 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; 3396 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), 3397 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; 3398 } // IsBigEndian, HasP9Vector 3399 3400 let Predicates = [IsLittleEndian, HasP9Vector] in { 3401 // Scalar stores of i8 3402 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), 3403 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; 3404 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst), 3405 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>; 3406 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst), 3407 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; 3408 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst), 3409 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>; 3410 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst), 3411 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; 3412 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst), 3413 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>; 3414 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst), 3415 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; 3416 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst), 3417 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>; 3418 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst), 3419 (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; 3420 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst), 3421 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>; 3422 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst), 3423 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; 3424 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst), 3425 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>; 3426 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst), 3427 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; 3428 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst), 3429 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>; 3430 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst), 3431 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; 3432 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst), 3433 (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>; 3434 3435 // Scalar stores of i16 3436 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst), 3437 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; 3438 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst), 3439 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; 3440 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst), 3441 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>; 3442 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst), 3443 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>; 3444 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst), 3445 (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>; 3446 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst), 3447 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>; 3448 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst), 3449 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>; 3450 def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), 3451 (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>; 3452 } // IsLittleEndian, HasP9Vector 3453 3454 3455 // Vector sign extensions 3456 def : Pat<(f64 (PPCVexts f64:$A, 1)), 3457 (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>; 3458 def : Pat<(f64 (PPCVexts f64:$A, 2)), 3459 (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>; 3460 3461 def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src), 3462 "#DFLOADf32", 3463 [(set f32:$XT, (load iaddrX4:$src))]>; 3464 def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src), 3465 "#DFLOADf64", 3466 [(set f64:$XT, (load iaddrX4:$src))]>; 3467 def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst), 3468 "#DFSTOREf32", 3469 [(store f32:$XT, iaddrX4:$dst)]>; 3470 def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst), 3471 "#DFSTOREf64", 3472 [(store f64:$XT, iaddrX4:$dst)]>; 3473 3474 def : Pat<(f64 (extloadf32 iaddrX4:$src)), 3475 (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$src), VSFRC)>; 3476 def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))), 3477 (f32 (DFLOADf32 iaddrX4:$src))>; 3478 3479 def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)), 3480 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>; 3481 def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)), 3482 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC)>; 3483 3484 let AddedComplexity = 400 in { 3485 // The following pseudoinstructions are used to ensure the utilization 3486 // of all 64 VSX registers. 3487 let Predicates = [IsLittleEndian, HasP9Vector] in { 3488 def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))), 3489 (v2i64 (XXPERMDIs 3490 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>; 3491 def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))), 3492 (v2i64 (XXPERMDIs 3493 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>; 3494 3495 def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))), 3496 (v2f64 (XXPERMDIs 3497 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>; 3498 def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))), 3499 (v2f64 (XXPERMDIs 3500 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>; 3501 def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src), 3502 (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), 3503 sub_64), xaddrX4:$src)>; 3504 def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src), 3505 (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), 3506 sub_64), xaddrX4:$src)>; 3507 def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src), 3508 (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; 3509 def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src), 3510 (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; 3511 def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src), 3512 (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), 3513 sub_64), iaddrX4:$src)>; 3514 def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src), 3515 (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), 3516 iaddrX4:$src)>; 3517 def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src), 3518 (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; 3519 def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src), 3520 (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; 3521 } // IsLittleEndian, HasP9Vector 3522 3523 let Predicates = [IsBigEndian, HasP9Vector] in { 3524 def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))), 3525 (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>; 3526 def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))), 3527 (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>; 3528 3529 def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))), 3530 (v2f64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>; 3531 def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))), 3532 (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>; 3533 def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src), 3534 (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), 3535 sub_64), xaddrX4:$src)>; 3536 def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src), 3537 (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), 3538 sub_64), xaddrX4:$src)>; 3539 def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src), 3540 (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; 3541 def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src), 3542 (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>; 3543 def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src), 3544 (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), 3545 sub_64), iaddrX4:$src)>; 3546 def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src), 3547 (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), 3548 sub_64), iaddrX4:$src)>; 3549 def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src), 3550 (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; 3551 def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src), 3552 (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>; 3553 } // IsBigEndian, HasP9Vector 3554 } 3555 3556 let Predicates = [IsBigEndian, HasP9Vector] in { 3557 3558 // (Un)Signed DWord vector extract -> QP 3559 def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))), 3560 (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>; 3561 def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))), 3562 (f128 (XSCVSDQP 3563 (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; 3564 def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))), 3565 (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>; 3566 def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))), 3567 (f128 (XSCVUDQP 3568 (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; 3569 3570 // (Un)Signed Word vector extract -> QP 3571 def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 1)))), 3572 (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>; 3573 foreach Idx = [0,2,3] in { 3574 def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, Idx)))), 3575 (f128 (XSCVSDQP (EXTRACT_SUBREG 3576 (VEXTSW2D (VSPLTW Idx, $src)), sub_64)))>; 3577 } 3578 foreach Idx = 0-3 in { 3579 def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, Idx)))), 3580 (f128 (XSCVUDQP (XXEXTRACTUW $src, !shl(Idx, 2))))>; 3581 } 3582 3583 // (Un)Signed HWord vector extract -> QP 3584 foreach Idx = 0-7 in { 3585 def : Pat<(f128 (sint_to_fp 3586 (i32 (sext_inreg 3587 (vector_extract v8i16:$src, Idx), i16)))), 3588 (f128 (XSCVSDQP (EXTRACT_SUBREG 3589 (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)), 3590 sub_64)))>; 3591 // The SDAG adds the `and` since an `i16` is being extracted as an `i32`. 3592 def : Pat<(f128 (uint_to_fp 3593 (and (i32 (vector_extract v8i16:$src, Idx)), 65535))), 3594 (f128 (XSCVUDQP (EXTRACT_SUBREG 3595 (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>; 3596 } 3597 3598 // (Un)Signed Byte vector extract -> QP 3599 foreach Idx = 0-15 in { 3600 def : Pat<(f128 (sint_to_fp 3601 (i32 (sext_inreg (vector_extract v16i8:$src, Idx), 3602 i8)))), 3603 (f128 (XSCVSDQP (EXTRACT_SUBREG 3604 (VEXTSB2D (VEXTRACTUB Idx, $src)), sub_64)))>; 3605 def : Pat<(f128 (uint_to_fp 3606 (and (i32 (vector_extract v16i8:$src, Idx)), 255))), 3607 (f128 (XSCVUDQP 3608 (EXTRACT_SUBREG (VEXTRACTUB Idx, $src), sub_64)))>; 3609 } 3610 3611 // Unsiged int in vsx register -> QP 3612 def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))), 3613 (f128 (XSCVUDQP 3614 (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 4)))>; 3615 } // IsBigEndian, HasP9Vector 3616 3617 let Predicates = [IsLittleEndian, HasP9Vector] in { 3618 3619 // (Un)Signed DWord vector extract -> QP 3620 def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))), 3621 (f128 (XSCVSDQP 3622 (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; 3623 def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))), 3624 (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>; 3625 def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))), 3626 (f128 (XSCVUDQP 3627 (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>; 3628 def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))), 3629 (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>; 3630 3631 // (Un)Signed Word vector extract -> QP 3632 foreach Idx = [[0,3],[1,2],[3,0]] in { 3633 def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))), 3634 (f128 (XSCVSDQP (EXTRACT_SUBREG 3635 (VEXTSW2D (VSPLTW !head(!tail(Idx)), $src)), 3636 sub_64)))>; 3637 } 3638 def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 2)))), 3639 (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>; 3640 3641 foreach Idx = [[0,12],[1,8],[2,4],[3,0]] in { 3642 def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))), 3643 (f128 (XSCVUDQP (XXEXTRACTUW $src, !head(!tail(Idx)))))>; 3644 } 3645 3646 // (Un)Signed HWord vector extract -> QP 3647 // The Nested foreach lists identifies the vector element and corresponding 3648 // register byte location. 3649 foreach Idx = [[0,14],[1,12],[2,10],[3,8],[4,6],[5,4],[6,2],[7,0]] in { 3650 def : Pat<(f128 (sint_to_fp 3651 (i32 (sext_inreg 3652 (vector_extract v8i16:$src, !head(Idx)), i16)))), 3653 (f128 (XSCVSDQP 3654 (EXTRACT_SUBREG (VEXTSH2D 3655 (VEXTRACTUH !head(!tail(Idx)), $src)), 3656 sub_64)))>; 3657 def : Pat<(f128 (uint_to_fp 3658 (and (i32 (vector_extract v8i16:$src, !head(Idx))), 3659 65535))), 3660 (f128 (XSCVUDQP (EXTRACT_SUBREG 3661 (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>; 3662 } 3663 3664 // (Un)Signed Byte vector extract -> QP 3665 foreach Idx = [[0,15],[1,14],[2,13],[3,12],[4,11],[5,10],[6,9],[7,8],[8,7], 3666 [9,6],[10,5],[11,4],[12,3],[13,2],[14,1],[15,0]] in { 3667 def : Pat<(f128 (sint_to_fp 3668 (i32 (sext_inreg 3669 (vector_extract v16i8:$src, !head(Idx)), i8)))), 3670 (f128 (XSCVSDQP 3671 (EXTRACT_SUBREG 3672 (VEXTSB2D (VEXTRACTUB !head(!tail(Idx)), $src)), 3673 sub_64)))>; 3674 def : Pat<(f128 (uint_to_fp 3675 (and (i32 (vector_extract v16i8:$src, !head(Idx))), 3676 255))), 3677 (f128 (XSCVUDQP 3678 (EXTRACT_SUBREG 3679 (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>; 3680 } 3681 3682 // Unsiged int in vsx register -> QP 3683 def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))), 3684 (f128 (XSCVUDQP 3685 (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 8)))>; 3686 } // IsLittleEndian, HasP9Vector 3687 3688 // Convert (Un)Signed DWord in memory -> QP 3689 def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))), 3690 (f128 (XSCVSDQP (LXSDX xaddrX4:$src)))>; 3691 def : Pat<(f128 (sint_to_fp (i64 (load iaddrX4:$src)))), 3692 (f128 (XSCVSDQP (LXSD iaddrX4:$src)))>; 3693 def : Pat<(f128 (uint_to_fp (i64 (load xaddrX4:$src)))), 3694 (f128 (XSCVUDQP (LXSDX xaddrX4:$src)))>; 3695 def : Pat<(f128 (uint_to_fp (i64 (load iaddrX4:$src)))), 3696 (f128 (XSCVUDQP (LXSD iaddrX4:$src)))>; 3697 3698 // Convert Unsigned HWord in memory -> QP 3699 def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)), 3700 (f128 (XSCVUDQP (LXSIHZX xaddr:$src)))>; 3701 3702 // Convert Unsigned Byte in memory -> QP 3703 def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)), 3704 (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>; 3705 3706 // Truncate & Convert QP -> (Un)Signed (D)Word. 3707 def : Pat<(i64 (fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>; 3708 def : Pat<(i64 (fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>; 3709 def : Pat<(i32 (fp_to_sint f128:$src)), 3710 (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>; 3711 def : Pat<(i32 (fp_to_uint f128:$src)), 3712 (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>; 3713 3714 // Instructions for store(fptosi). 3715 // The 8-byte version is repeated here due to availability of D-Form STXSD. 3716 def : Pat<(PPCstore_scal_int_from_vsr 3717 (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddrX4:$dst, 8), 3718 (STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), 3719 xaddrX4:$dst)>; 3720 def : Pat<(PPCstore_scal_int_from_vsr 3721 (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), iaddrX4:$dst, 8), 3722 (STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), 3723 iaddrX4:$dst)>; 3724 def : Pat<(PPCstore_scal_int_from_vsr 3725 (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4), 3726 (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; 3727 def : Pat<(PPCstore_scal_int_from_vsr 3728 (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 2), 3729 (STXSIHX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; 3730 def : Pat<(PPCstore_scal_int_from_vsr 3731 (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1), 3732 (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>; 3733 def : Pat<(PPCstore_scal_int_from_vsr 3734 (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddrX4:$dst, 8), 3735 (STXSDX (XSCVDPSXDS f64:$src), xaddrX4:$dst)>; 3736 def : Pat<(PPCstore_scal_int_from_vsr 3737 (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), iaddrX4:$dst, 8), 3738 (STXSD (XSCVDPSXDS f64:$src), iaddrX4:$dst)>; 3739 def : Pat<(PPCstore_scal_int_from_vsr 3740 (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2), 3741 (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; 3742 def : Pat<(PPCstore_scal_int_from_vsr 3743 (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 1), 3744 (STXSIBX (XSCVDPSXWS f64:$src), xoaddr:$dst)>; 3745 3746 // Instructions for store(fptoui). 3747 def : Pat<(PPCstore_scal_int_from_vsr 3748 (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddrX4:$dst, 8), 3749 (STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), 3750 xaddrX4:$dst)>; 3751 def : Pat<(PPCstore_scal_int_from_vsr 3752 (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), iaddrX4:$dst, 8), 3753 (STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), 3754 iaddrX4:$dst)>; 3755 def : Pat<(PPCstore_scal_int_from_vsr 3756 (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4), 3757 (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; 3758 def : Pat<(PPCstore_scal_int_from_vsr 3759 (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 2), 3760 (STXSIHX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; 3761 def : Pat<(PPCstore_scal_int_from_vsr 3762 (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1), 3763 (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>; 3764 def : Pat<(PPCstore_scal_int_from_vsr 3765 (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddrX4:$dst, 8), 3766 (STXSDX (XSCVDPUXDS f64:$src), xaddrX4:$dst)>; 3767 def : Pat<(PPCstore_scal_int_from_vsr 3768 (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), iaddrX4:$dst, 8), 3769 (STXSD (XSCVDPUXDS f64:$src), iaddrX4:$dst)>; 3770 def : Pat<(PPCstore_scal_int_from_vsr 3771 (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2), 3772 (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; 3773 def : Pat<(PPCstore_scal_int_from_vsr 3774 (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 1), 3775 (STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; 3776 3777 // Round & Convert QP -> DP/SP 3778 def : Pat<(f64 (fpround f128:$src)), (f64 (XSCVQPDP $src))>; 3779 def : Pat<(f32 (fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>; 3780 3781 // Convert SP -> QP 3782 def : Pat<(f128 (fpextend f32:$src)), 3783 (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>; 3784 3785 def : Pat<(f32 (PPCxsmaxc f32:$XA, f32:$XB)), 3786 (f32 (COPY_TO_REGCLASS (XSMAXCDP (COPY_TO_REGCLASS $XA, VSSRC), 3787 (COPY_TO_REGCLASS $XB, VSSRC)), 3788 VSSRC))>; 3789 def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)), 3790 (f32 (COPY_TO_REGCLASS (XSMINCDP (COPY_TO_REGCLASS $XA, VSSRC), 3791 (COPY_TO_REGCLASS $XB, VSSRC)), 3792 VSSRC))>; 3793 3794} // end HasP9Vector, AddedComplexity 3795 3796let AddedComplexity = 400 in { 3797 let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsBigEndian] in { 3798 def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)), 3799 (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>; 3800 } 3801 let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsLittleEndian] in { 3802 def : Pat<(f128 (PPCbuild_fp128 i64:$rA, i64:$rB)), 3803 (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>; 3804 } 3805} 3806 3807let Predicates = [HasP9Vector], hasSideEffects = 0 in { 3808 let mayStore = 1 in { 3809 def SPILLTOVSR_STX : PseudoXFormMemOp<(outs), 3810 (ins spilltovsrrc:$XT, memrr:$dst), 3811 "#SPILLTOVSR_STX", []>; 3812 def SPILLTOVSR_ST : PPCPostRAExpPseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst), 3813 "#SPILLTOVSR_ST", []>; 3814 } 3815 let mayLoad = 1 in { 3816 def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT), 3817 (ins memrr:$src), 3818 "#SPILLTOVSR_LDX", []>; 3819 def SPILLTOVSR_LD : PPCPostRAExpPseudo<(outs spilltovsrrc:$XT), (ins memrix:$src), 3820 "#SPILLTOVSR_LD", []>; 3821 3822 } 3823} 3824// Integer extend helper dags 32 -> 64 3825def AnyExts { 3826 dag A = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32); 3827 dag B = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $B, sub_32); 3828 dag C = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $C, sub_32); 3829 dag D = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $D, sub_32); 3830} 3831 3832def DblToFlt { 3833 dag A0 = (f32 (fpround (f64 (extractelt v2f64:$A, 0)))); 3834 dag A1 = (f32 (fpround (f64 (extractelt v2f64:$A, 1)))); 3835 dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0)))); 3836 dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1)))); 3837} 3838 3839def ExtDbl { 3840 dag A0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 0)))))); 3841 dag A1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 1)))))); 3842 dag B0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 0)))))); 3843 dag B1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 1)))))); 3844 dag A0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 0)))))); 3845 dag A1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 1)))))); 3846 dag B0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 0)))))); 3847 dag B1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 1)))))); 3848} 3849 3850def ByteToWord { 3851 dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8)); 3852 dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8)); 3853 dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8)); 3854 dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8)); 3855 dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 3)), i8)); 3856 dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 7)), i8)); 3857 dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 11)), i8)); 3858 dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 15)), i8)); 3859} 3860 3861def ByteToDWord { 3862 dag LE_A0 = (i64 (sext_inreg 3863 (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8)); 3864 dag LE_A1 = (i64 (sext_inreg 3865 (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8)); 3866 dag BE_A0 = (i64 (sext_inreg 3867 (i64 (anyext (i32 (vector_extract v16i8:$A, 7)))), i8)); 3868 dag BE_A1 = (i64 (sext_inreg 3869 (i64 (anyext (i32 (vector_extract v16i8:$A, 15)))), i8)); 3870} 3871 3872def HWordToWord { 3873 dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16)); 3874 dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16)); 3875 dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16)); 3876 dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16)); 3877 dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 1)), i16)); 3878 dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 3)), i16)); 3879 dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 5)), i16)); 3880 dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 7)), i16)); 3881} 3882 3883def HWordToDWord { 3884 dag LE_A0 = (i64 (sext_inreg 3885 (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16)); 3886 dag LE_A1 = (i64 (sext_inreg 3887 (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16)); 3888 dag BE_A0 = (i64 (sext_inreg 3889 (i64 (anyext (i32 (vector_extract v8i16:$A, 3)))), i16)); 3890 dag BE_A1 = (i64 (sext_inreg 3891 (i64 (anyext (i32 (vector_extract v8i16:$A, 7)))), i16)); 3892} 3893 3894def WordToDWord { 3895 dag LE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0)))); 3896 dag LE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2)))); 3897 dag BE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 1)))); 3898 dag BE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 3)))); 3899} 3900 3901def FltToIntLoad { 3902 dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A))))); 3903} 3904def FltToUIntLoad { 3905 dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (extloadf32 xoaddr:$A))))); 3906} 3907def FltToLongLoad { 3908 dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A))))); 3909} 3910def FltToLongLoadP9 { 3911 dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddrX4:$A))))); 3912} 3913def FltToULongLoad { 3914 dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A))))); 3915} 3916def FltToULongLoadP9 { 3917 dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddrX4:$A))))); 3918} 3919def FltToLong { 3920 dag A = (i64 (PPCmfvsr (f64 (PPCfctidz (fpextend f32:$A))))); 3921} 3922def FltToULong { 3923 dag A = (i64 (PPCmfvsr (f64 (PPCfctiduz (fpextend f32:$A))))); 3924} 3925def DblToInt { 3926 dag A = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$A)))); 3927 dag B = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$B)))); 3928 dag C = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$C)))); 3929 dag D = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$D)))); 3930} 3931def DblToUInt { 3932 dag A = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$A)))); 3933 dag B = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$B)))); 3934 dag C = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$C)))); 3935 dag D = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$D)))); 3936} 3937def DblToLong { 3938 dag A = (i64 (PPCmfvsr (f64 (PPCfctidz f64:$A)))); 3939} 3940def DblToULong { 3941 dag A = (i64 (PPCmfvsr (f64 (PPCfctiduz f64:$A)))); 3942} 3943def DblToIntLoad { 3944 dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A))))); 3945} 3946def DblToIntLoadP9 { 3947 dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddrX4:$A))))); 3948} 3949def DblToUIntLoad { 3950 dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A))))); 3951} 3952def DblToUIntLoadP9 { 3953 dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddrX4:$A))))); 3954} 3955def DblToLongLoad { 3956 dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A))))); 3957} 3958def DblToULongLoad { 3959 dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (load xoaddr:$A))))); 3960} 3961 3962// FP load dags (for f32 -> v4f32) 3963def LoadFP { 3964 dag A = (f32 (load xoaddr:$A)); 3965 dag B = (f32 (load xoaddr:$B)); 3966 dag C = (f32 (load xoaddr:$C)); 3967 dag D = (f32 (load xoaddr:$D)); 3968} 3969 3970// FP merge dags (for f32 -> v4f32) 3971def MrgFP { 3972 dag LD32A = (COPY_TO_REGCLASS (LIWZX xoaddr:$A), VSRC); 3973 dag LD32B = (COPY_TO_REGCLASS (LIWZX xoaddr:$B), VSRC); 3974 dag LD32C = (COPY_TO_REGCLASS (LIWZX xoaddr:$C), VSRC); 3975 dag LD32D = (COPY_TO_REGCLASS (LIWZX xoaddr:$D), VSRC); 3976 dag AC = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $A, VSRC), 3977 (COPY_TO_REGCLASS $C, VSRC), 0)); 3978 dag BD = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $B, VSRC), 3979 (COPY_TO_REGCLASS $D, VSRC), 0)); 3980 dag ABhToFlt = (XVCVDPSP (XXPERMDI $A, $B, 0)); 3981 dag ABlToFlt = (XVCVDPSP (XXPERMDI $A, $B, 3)); 3982 dag BAhToFlt = (XVCVDPSP (XXPERMDI $B, $A, 0)); 3983 dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3)); 3984} 3985 3986// Word-element merge dags - conversions from f64 to i32 merged into vectors. 3987def MrgWords { 3988 // For big endian, we merge low and hi doublewords (A, B). 3989 dag A0B0 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 0)); 3990 dag A1B1 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 3)); 3991 dag CVA1B1S = (v4i32 (XVCVDPSXWS A1B1)); 3992 dag CVA0B0S = (v4i32 (XVCVDPSXWS A0B0)); 3993 dag CVA1B1U = (v4i32 (XVCVDPUXWS A1B1)); 3994 dag CVA0B0U = (v4i32 (XVCVDPUXWS A0B0)); 3995 3996 // For little endian, we merge low and hi doublewords (B, A). 3997 dag B1A1 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 0)); 3998 dag B0A0 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 3)); 3999 dag CVB1A1S = (v4i32 (XVCVDPSXWS B1A1)); 4000 dag CVB0A0S = (v4i32 (XVCVDPSXWS B0A0)); 4001 dag CVB1A1U = (v4i32 (XVCVDPUXWS B1A1)); 4002 dag CVB0A0U = (v4i32 (XVCVDPUXWS B0A0)); 4003 4004 // For big endian, we merge hi doublewords of (A, C) and (B, D), convert 4005 // then merge. 4006 dag AC = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$A, VSRC), 4007 (COPY_TO_REGCLASS f64:$C, VSRC), 0)); 4008 dag BD = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$B, VSRC), 4009 (COPY_TO_REGCLASS f64:$D, VSRC), 0)); 4010 dag CVACS = (v4i32 (XVCVDPSXWS AC)); 4011 dag CVBDS = (v4i32 (XVCVDPSXWS BD)); 4012 dag CVACU = (v4i32 (XVCVDPUXWS AC)); 4013 dag CVBDU = (v4i32 (XVCVDPUXWS BD)); 4014 4015 // For little endian, we merge hi doublewords of (D, B) and (C, A), convert 4016 // then merge. 4017 dag DB = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$D, VSRC), 4018 (COPY_TO_REGCLASS f64:$B, VSRC), 0)); 4019 dag CA = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$C, VSRC), 4020 (COPY_TO_REGCLASS f64:$A, VSRC), 0)); 4021 dag CVDBS = (v4i32 (XVCVDPSXWS DB)); 4022 dag CVCAS = (v4i32 (XVCVDPSXWS CA)); 4023 dag CVDBU = (v4i32 (XVCVDPUXWS DB)); 4024 dag CVCAU = (v4i32 (XVCVDPUXWS CA)); 4025} 4026 4027// Patterns for BUILD_VECTOR nodes. 4028let AddedComplexity = 400 in { 4029 4030 let Predicates = [HasVSX] in { 4031 // Build vectors of floating point converted to i32. 4032 def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A, 4033 DblToInt.A, DblToInt.A)), 4034 (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS $A), VSRC), 1))>; 4035 def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A, 4036 DblToUInt.A, DblToUInt.A)), 4037 (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS $A), VSRC), 1))>; 4038 def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)), 4039 (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 4040 (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 0))>; 4041 def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)), 4042 (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 4043 (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>; 4044 def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), 4045 (v4i32 (XXSPLTW (COPY_TO_REGCLASS 4046 (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; 4047 def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), 4048 (v4i32 (XXSPLTW (COPY_TO_REGCLASS 4049 (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; 4050 def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), 4051 (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; 4052 def : Pat<(v2f64 (PPCldsplat xoaddr:$A)), 4053 (v2f64 (LXVDSX xoaddr:$A))>; 4054 def : Pat<(v2i64 (PPCldsplat xoaddr:$A)), 4055 (v2i64 (LXVDSX xoaddr:$A))>; 4056 4057 // Build vectors of floating point converted to i64. 4058 def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)), 4059 (v2i64 (XXPERMDIs 4060 (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>; 4061 def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)), 4062 (v2i64 (XXPERMDIs 4063 (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>; 4064 def : Pat<(v2i64 (scalar_to_vector DblToLongLoad.A)), 4065 (v2i64 (XVCVDPSXDS (LXVDSX xoaddr:$A)))>; 4066 def : Pat<(v2i64 (scalar_to_vector DblToULongLoad.A)), 4067 (v2i64 (XVCVDPUXDS (LXVDSX xoaddr:$A)))>; 4068 } 4069 4070 let Predicates = [HasVSX, NoP9Vector] in { 4071 // Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads). 4072 def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)), 4073 (v4i32 (XXSPLTW (COPY_TO_REGCLASS 4074 (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>; 4075 def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)), 4076 (v4i32 (XXSPLTW (COPY_TO_REGCLASS 4077 (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>; 4078 def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)), 4079 (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS 4080 (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; 4081 def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)), 4082 (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS 4083 (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; 4084 } 4085 4086 let Predicates = [IsBigEndian, HasP8Vector] in { 4087 def : Pat<DWToSPExtractConv.BVU, 4088 (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3), 4089 (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3)))>; 4090 def : Pat<DWToSPExtractConv.BVS, 4091 (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3), 4092 (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>; 4093 def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src), 4094 (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; 4095 def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src), 4096 (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; 4097 4098 // Elements in a register on a BE system are in order <0, 1, 2, 3>. 4099 // The store instructions store the second word from the left. 4100 // So to align element zero, we need to modulo-left-shift by 3 words. 4101 // Similar logic applies for elements 2 and 3. 4102 foreach Idx = [ [0,3], [2,1], [3,2] ] in { 4103 def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src), 4104 (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), 4105 sub_64), xoaddr:$src)>; 4106 def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src), 4107 (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), 4108 sub_64), xoaddr:$src)>; 4109 } 4110 } 4111 4112 let Predicates = [HasP8Vector, IsBigEndian, NoP9Vector] in { 4113 def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), 4114 (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; 4115 def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), 4116 (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; 4117 def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src), 4118 (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), 4119 xoaddr:$src)>; 4120 def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), 4121 (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), 4122 xoaddr:$src)>; 4123 } 4124 4125 // Big endian, available on all targets with VSX 4126 let Predicates = [IsBigEndian, HasVSX] in { 4127 def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), 4128 (v2f64 (XXPERMDI 4129 (COPY_TO_REGCLASS $A, VSRC), 4130 (COPY_TO_REGCLASS $B, VSRC), 0))>; 4131 // Using VMRGEW to assemble the final vector would be a lower latency 4132 // solution. However, we choose to go with the slightly higher latency 4133 // XXPERMDI for 2 reasons: 4134 // 1. This is likely to occur in unrolled loops where regpressure is high, 4135 // so we want to use the latter as it has access to all 64 VSX registers. 4136 // 2. Using Altivec instructions in this sequence would likely cause the 4137 // allocation of Altivec registers even for the loads which in turn would 4138 // force the use of LXSIWZX for the loads, adding a cycle of latency to 4139 // each of the loads which would otherwise be able to use LFIWZX. 4140 def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)), 4141 (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32A, MrgFP.LD32B), 4142 (XXMRGHW MrgFP.LD32C, MrgFP.LD32D), 3))>; 4143 def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)), 4144 (VMRGEW MrgFP.AC, MrgFP.BD)>; 4145 def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1, 4146 DblToFlt.B0, DblToFlt.B1)), 4147 (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>; 4148 4149 // Convert 4 doubles to a vector of ints. 4150 def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B, 4151 DblToInt.C, DblToInt.D)), 4152 (v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>; 4153 def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B, 4154 DblToUInt.C, DblToUInt.D)), 4155 (v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>; 4156 def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S, 4157 ExtDbl.B0S, ExtDbl.B1S)), 4158 (v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>; 4159 def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U, 4160 ExtDbl.B0U, ExtDbl.B1U)), 4161 (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>; 4162 } 4163 4164 let Predicates = [IsLittleEndian, HasP8Vector] in { 4165 def : Pat<DWToSPExtractConv.BVU, 4166 (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3), 4167 (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3)))>; 4168 def : Pat<DWToSPExtractConv.BVS, 4169 (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3), 4170 (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>; 4171 def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src), 4172 (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; 4173 def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src), 4174 (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; 4175 4176 // Elements in a register on a LE system are in order <3, 2, 1, 0>. 4177 // The store instructions store the second word from the left. 4178 // So to align element 3, we need to modulo-left-shift by 3 words. 4179 // Similar logic applies for elements 0 and 1. 4180 foreach Idx = [ [0,2], [1,1], [3,3] ] in { 4181 def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src), 4182 (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), 4183 sub_64), xoaddr:$src)>; 4184 def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src), 4185 (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))), 4186 sub_64), xoaddr:$src)>; 4187 } 4188 } 4189 4190 let Predicates = [HasP8Vector, IsLittleEndian, NoP9Vector] in { 4191 def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), 4192 (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), 4193 xoaddr:$src)>; 4194 def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), 4195 (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), 4196 xoaddr:$src)>; 4197 def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src), 4198 (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; 4199 def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), 4200 (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; 4201 } 4202 4203 let Predicates = [IsLittleEndian, HasVSX] in { 4204 // Little endian, available on all targets with VSX 4205 def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), 4206 (v2f64 (XXPERMDI 4207 (COPY_TO_REGCLASS $B, VSRC), 4208 (COPY_TO_REGCLASS $A, VSRC), 0))>; 4209 // Using VMRGEW to assemble the final vector would be a lower latency 4210 // solution. However, we choose to go with the slightly higher latency 4211 // XXPERMDI for 2 reasons: 4212 // 1. This is likely to occur in unrolled loops where regpressure is high, 4213 // so we want to use the latter as it has access to all 64 VSX registers. 4214 // 2. Using Altivec instructions in this sequence would likely cause the 4215 // allocation of Altivec registers even for the loads which in turn would 4216 // force the use of LXSIWZX for the loads, adding a cycle of latency to 4217 // each of the loads which would otherwise be able to use LFIWZX. 4218 def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)), 4219 (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32D, MrgFP.LD32C), 4220 (XXMRGHW MrgFP.LD32B, MrgFP.LD32A), 3))>; 4221 def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)), 4222 (VMRGEW MrgFP.AC, MrgFP.BD)>; 4223 def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1, 4224 DblToFlt.B0, DblToFlt.B1)), 4225 (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>; 4226 4227 // Convert 4 doubles to a vector of ints. 4228 def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B, 4229 DblToInt.C, DblToInt.D)), 4230 (v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>; 4231 def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B, 4232 DblToUInt.C, DblToUInt.D)), 4233 (v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>; 4234 def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S, 4235 ExtDbl.B0S, ExtDbl.B1S)), 4236 (v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>; 4237 def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U, 4238 ExtDbl.B0U, ExtDbl.B1U)), 4239 (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>; 4240 } 4241 4242 let Predicates = [HasDirectMove] in { 4243 // Endianness-neutral constant splat on P8 and newer targets. The reason 4244 // for this pattern is that on targets with direct moves, we don't expand 4245 // BUILD_VECTOR nodes for v4i32. 4246 def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A, 4247 immSExt5NonZero:$A, immSExt5NonZero:$A)), 4248 (v4i32 (VSPLTISW imm:$A))>; 4249 } 4250 4251 let Predicates = [IsBigEndian, HasDirectMove, NoP9Vector] in { 4252 // Big endian integer vectors using direct moves. 4253 def : Pat<(v2i64 (build_vector i64:$A, i64:$B)), 4254 (v2i64 (XXPERMDI 4255 (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 4256 (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>; 4257 def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), 4258 (XXPERMDI 4259 (COPY_TO_REGCLASS 4260 (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC), 4261 (COPY_TO_REGCLASS 4262 (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>; 4263 def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), 4264 (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; 4265 } 4266 4267 let Predicates = [IsLittleEndian, HasDirectMove, NoP9Vector] in { 4268 // Little endian integer vectors using direct moves. 4269 def : Pat<(v2i64 (build_vector i64:$A, i64:$B)), 4270 (v2i64 (XXPERMDI 4271 (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 4272 (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>; 4273 def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), 4274 (XXPERMDI 4275 (COPY_TO_REGCLASS 4276 (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC), 4277 (COPY_TO_REGCLASS 4278 (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>; 4279 def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), 4280 (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; 4281 } 4282 4283 let Predicates = [HasP8Vector] in { 4284 def : Pat<(v1i128 (bitconvert (v16i8 immAllOnesV))), 4285 (v1i128 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; 4286 def : Pat<(v2i64 (bitconvert (v16i8 immAllOnesV))), 4287 (v2i64 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; 4288 def : Pat<(v8i16 (bitconvert (v16i8 immAllOnesV))), 4289 (v8i16 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; 4290 def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))), 4291 (v16i8 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; 4292 } 4293 4294 let Predicates = [HasP9Vector] in { 4295 // Endianness-neutral patterns for const splats with ISA 3.0 instructions. 4296 def : Pat<(v4i32 (scalar_to_vector i32:$A)), 4297 (v4i32 (MTVSRWS $A))>; 4298 def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), 4299 (v4i32 (MTVSRWS $A))>; 4300 def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, 4301 immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, 4302 immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, 4303 immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, 4304 immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, 4305 immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, 4306 immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, 4307 immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)), 4308 (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>; 4309 def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), 4310 (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>; 4311 def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), 4312 (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>; 4313 def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)), 4314 (v4i32 (XXSPLTW (COPY_TO_REGCLASS 4315 (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>; 4316 def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)), 4317 (v4i32 (XXSPLTW (COPY_TO_REGCLASS 4318 (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>; 4319 def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)), 4320 (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS 4321 (DFLOADf32 iaddrX4:$A), 4322 VSFRC)), 0))>; 4323 def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)), 4324 (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS 4325 (DFLOADf32 iaddrX4:$A), 4326 VSFRC)), 0))>; 4327 def : Pat<(v4f32 (PPCldsplat xoaddr:$A)), 4328 (v4f32 (LXVWSX xoaddr:$A))>; 4329 def : Pat<(v4i32 (PPCldsplat xoaddr:$A)), 4330 (v4i32 (LXVWSX xoaddr:$A))>; 4331 } 4332 4333 let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in { 4334 def : Pat<(i64 (extractelt v2i64:$A, 1)), 4335 (i64 (MFVSRLD $A))>; 4336 // Better way to build integer vectors if we have MTVSRDD. Big endian. 4337 def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)), 4338 (v2i64 (MTVSRDD $rB, $rA))>; 4339 def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), 4340 (MTVSRDD 4341 (RLDIMI AnyExts.B, AnyExts.A, 32, 0), 4342 (RLDIMI AnyExts.D, AnyExts.C, 32, 0))>; 4343 } 4344 4345 let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in { 4346 def : Pat<(i64 (extractelt v2i64:$A, 0)), 4347 (i64 (MFVSRLD $A))>; 4348 // Better way to build integer vectors if we have MTVSRDD. Little endian. 4349 def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)), 4350 (v2i64 (MTVSRDD $rB, $rA))>; 4351 def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)), 4352 (MTVSRDD 4353 (RLDIMI AnyExts.C, AnyExts.D, 32, 0), 4354 (RLDIMI AnyExts.A, AnyExts.B, 32, 0))>; 4355 } 4356 // P9 Altivec instructions that can be used to build vectors. 4357 // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete 4358 // with complexities of existing build vector patterns in this file. 4359 let Predicates = [HasP9Altivec, IsLittleEndian] in { 4360 def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)), 4361 (v2i64 (VEXTSW2D $A))>; 4362 def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)), 4363 (v2i64 (VEXTSH2D $A))>; 4364 def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1, 4365 HWordToWord.LE_A2, HWordToWord.LE_A3)), 4366 (v4i32 (VEXTSH2W $A))>; 4367 def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1, 4368 ByteToWord.LE_A2, ByteToWord.LE_A3)), 4369 (v4i32 (VEXTSB2W $A))>; 4370 def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)), 4371 (v2i64 (VEXTSB2D $A))>; 4372 } 4373 4374 let Predicates = [HasP9Altivec, IsBigEndian] in { 4375 def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)), 4376 (v2i64 (VEXTSW2D $A))>; 4377 def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)), 4378 (v2i64 (VEXTSH2D $A))>; 4379 def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1, 4380 HWordToWord.BE_A2, HWordToWord.BE_A3)), 4381 (v4i32 (VEXTSH2W $A))>; 4382 def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1, 4383 ByteToWord.BE_A2, ByteToWord.BE_A3)), 4384 (v4i32 (VEXTSB2W $A))>; 4385 def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)), 4386 (v2i64 (VEXTSB2D $A))>; 4387 } 4388 4389 let Predicates = [HasP9Altivec] in { 4390 def: Pat<(v2i64 (PPCSExtVElems v16i8:$A)), 4391 (v2i64 (VEXTSB2D $A))>; 4392 def: Pat<(v2i64 (PPCSExtVElems v8i16:$A)), 4393 (v2i64 (VEXTSH2D $A))>; 4394 def: Pat<(v2i64 (PPCSExtVElems v4i32:$A)), 4395 (v2i64 (VEXTSW2D $A))>; 4396 def: Pat<(v4i32 (PPCSExtVElems v16i8:$A)), 4397 (v4i32 (VEXTSB2W $A))>; 4398 def: Pat<(v4i32 (PPCSExtVElems v8i16:$A)), 4399 (v4i32 (VEXTSH2W $A))>; 4400 } 4401} 4402 4403// Put this P9Altivec related definition here since it's possible to be 4404// selected to VSX instruction xvnegsp, avoid possible undef. 4405let Predicates = [HasP9Altivec] in { 4406 4407 def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 0))), 4408 (v4i32 (VABSDUW $A, $B))>; 4409 4410 def : Pat<(v8i16 (PPCvabsd v8i16:$A, v8i16:$B, (i32 0))), 4411 (v8i16 (VABSDUH $A, $B))>; 4412 4413 def : Pat<(v16i8 (PPCvabsd v16i8:$A, v16i8:$B, (i32 0))), 4414 (v16i8 (VABSDUB $A, $B))>; 4415 4416 // As PPCVABSD description, the last operand indicates whether do the 4417 // sign bit flip. 4418 def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))), 4419 (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>; 4420} 4421