1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a pattern matching instruction selector for PowerPC, 10 // converting from a legalized dag to a PPC dag. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MCTargetDesc/PPCMCTargetDesc.h" 15 #include "MCTargetDesc/PPCPredicates.h" 16 #include "PPC.h" 17 #include "PPCISelLowering.h" 18 #include "PPCMachineFunctionInfo.h" 19 #include "PPCSubtarget.h" 20 #include "PPCTargetMachine.h" 21 #include "llvm/ADT/APInt.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SmallPtrSet.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/Analysis/BranchProbabilityInfo.h" 28 #include "llvm/CodeGen/FunctionLoweringInfo.h" 29 #include "llvm/CodeGen/ISDOpcodes.h" 30 #include "llvm/CodeGen/MachineBasicBlock.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/SelectionDAG.h" 35 #include "llvm/CodeGen/SelectionDAGISel.h" 36 #include "llvm/CodeGen/SelectionDAGNodes.h" 37 #include "llvm/CodeGen/TargetInstrInfo.h" 38 #include "llvm/CodeGen/TargetRegisterInfo.h" 39 #include "llvm/CodeGen/ValueTypes.h" 40 #include "llvm/IR/BasicBlock.h" 41 #include "llvm/IR/DebugLoc.h" 42 #include "llvm/IR/Function.h" 43 #include "llvm/IR/GlobalValue.h" 44 #include "llvm/IR/InlineAsm.h" 45 #include "llvm/IR/InstrTypes.h" 46 #include "llvm/IR/Module.h" 47 #include "llvm/Support/Casting.h" 48 #include "llvm/Support/CodeGen.h" 49 #include "llvm/Support/CommandLine.h" 50 #include "llvm/Support/Compiler.h" 51 #include "llvm/Support/Debug.h" 52 #include "llvm/Support/ErrorHandling.h" 53 #include "llvm/Support/KnownBits.h" 54 #include "llvm/Support/MachineValueType.h" 55 #include "llvm/Support/MathExtras.h" 56 #include "llvm/Support/raw_ostream.h" 57 #include <algorithm> 58 #include <cassert> 59 #include <cstdint> 60 #include <iterator> 61 #include <limits> 62 #include <memory> 63 #include <new> 64 #include <tuple> 65 #include <utility> 66 67 using namespace llvm; 68 69 #define DEBUG_TYPE "ppc-codegen" 70 71 STATISTIC(NumSextSetcc, 72 "Number of (sext(setcc)) nodes expanded into GPR sequence."); 73 STATISTIC(NumZextSetcc, 74 "Number of (zext(setcc)) nodes expanded into GPR sequence."); 75 STATISTIC(SignExtensionsAdded, 76 "Number of sign extensions for compare inputs added."); 77 STATISTIC(ZeroExtensionsAdded, 78 "Number of zero extensions for compare inputs added."); 79 STATISTIC(NumLogicOpsOnComparison, 80 "Number of logical ops on i1 values calculated in GPR."); 81 STATISTIC(OmittedForNonExtendUses, 82 "Number of compares not eliminated as they have non-extending uses."); 83 STATISTIC(NumP9Setb, 84 "Number of compares lowered to setb."); 85 86 // FIXME: Remove this once the bug has been fixed! 87 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug", 88 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); 89 90 static cl::opt<bool> 91 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), 92 cl::desc("use aggressive ppc isel for bit permutations"), 93 cl::Hidden); 94 static cl::opt<bool> BPermRewriterNoMasking( 95 "ppc-bit-perm-rewriter-stress-rotates", 96 cl::desc("stress rotate selection in aggressive ppc isel for " 97 "bit permutations"), 98 cl::Hidden); 99 100 static cl::opt<bool> EnableBranchHint( 101 "ppc-use-branch-hint", cl::init(true), 102 cl::desc("Enable static hinting of branches on ppc"), 103 cl::Hidden); 104 105 static cl::opt<bool> EnableTLSOpt( 106 "ppc-tls-opt", cl::init(true), 107 cl::desc("Enable tls optimization peephole"), 108 cl::Hidden); 109 110 enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64, 111 ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32, 112 ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 }; 113 114 static cl::opt<ICmpInGPRType> CmpInGPR( 115 "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), 116 cl::desc("Specify the types of comparisons to emit GPR-only code for."), 117 cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), 118 clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), 119 clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), 120 clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), 121 clEnumValN(ICGPR_NonExtIn, "nonextin", 122 "Only comparisons where inputs don't need [sz]ext."), 123 clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), 124 clEnumValN(ICGPR_ZextI32, "zexti32", 125 "Only i32 comparisons with zext result."), 126 clEnumValN(ICGPR_ZextI64, "zexti64", 127 "Only i64 comparisons with zext result."), 128 clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), 129 clEnumValN(ICGPR_SextI32, "sexti32", 130 "Only i32 comparisons with sext result."), 131 clEnumValN(ICGPR_SextI64, "sexti64", 132 "Only i64 comparisons with sext result."))); 133 namespace { 134 135 //===--------------------------------------------------------------------===// 136 /// PPCDAGToDAGISel - PPC specific code to select PPC machine 137 /// instructions for SelectionDAG operations. 138 /// 139 class PPCDAGToDAGISel : public SelectionDAGISel { 140 const PPCTargetMachine &TM; 141 const PPCSubtarget *PPCSubTarget; 142 const PPCTargetLowering *PPCLowering; 143 unsigned GlobalBaseReg; 144 145 public: 146 explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel) 147 : SelectionDAGISel(tm, OptLevel), TM(tm) {} 148 149 bool runOnMachineFunction(MachineFunction &MF) override { 150 // Make sure we re-emit a set of the global base reg if necessary 151 GlobalBaseReg = 0; 152 PPCSubTarget = &MF.getSubtarget<PPCSubtarget>(); 153 PPCLowering = PPCSubTarget->getTargetLowering(); 154 SelectionDAGISel::runOnMachineFunction(MF); 155 156 if (!PPCSubTarget->isSVR4ABI()) 157 InsertVRSaveCode(MF); 158 159 return true; 160 } 161 162 void PreprocessISelDAG() override; 163 void PostprocessISelDAG() override; 164 165 /// getI16Imm - Return a target constant with the specified value, of type 166 /// i16. 167 inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) { 168 return CurDAG->getTargetConstant(Imm, dl, MVT::i16); 169 } 170 171 /// getI32Imm - Return a target constant with the specified value, of type 172 /// i32. 173 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 174 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 175 } 176 177 /// getI64Imm - Return a target constant with the specified value, of type 178 /// i64. 179 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) { 180 return CurDAG->getTargetConstant(Imm, dl, MVT::i64); 181 } 182 183 /// getSmallIPtrImm - Return a target constant of pointer type. 184 inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) { 185 return CurDAG->getTargetConstant( 186 Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); 187 } 188 189 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a 190 /// rotate and mask opcode and mask operation. 191 static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, 192 unsigned &SH, unsigned &MB, unsigned &ME); 193 194 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC 195 /// base register. Return the virtual register that holds this value. 196 SDNode *getGlobalBaseReg(); 197 198 void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); 199 200 // Select - Convert the specified operand from a target-independent to a 201 // target-specific node if it hasn't already been changed. 202 void Select(SDNode *N) override; 203 204 bool tryBitfieldInsert(SDNode *N); 205 bool tryBitPermutation(SDNode *N); 206 bool tryIntCompareInGPR(SDNode *N); 207 208 // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into 209 // an X-Form load instruction with the offset being a relocation coming from 210 // the PPCISD::ADD_TLS. 211 bool tryTLSXFormLoad(LoadSDNode *N); 212 // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into 213 // an X-Form store instruction with the offset being a relocation coming from 214 // the PPCISD::ADD_TLS. 215 bool tryTLSXFormStore(StoreSDNode *N); 216 /// SelectCC - Select a comparison of the specified values with the 217 /// specified condition code, returning the CR# of the expression. 218 SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, 219 const SDLoc &dl); 220 221 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc 222 /// immediate field. Note that the operand at this point is already the 223 /// result of a prior SelectAddressRegImm call. 224 bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { 225 if (N.getOpcode() == ISD::TargetConstant || 226 N.getOpcode() == ISD::TargetGlobalAddress) { 227 Out = N; 228 return true; 229 } 230 231 return false; 232 } 233 234 /// SelectAddrIdx - Given the specified address, check to see if it can be 235 /// represented as an indexed [r+r] operation. 236 /// This is for xform instructions whose associated displacement form is D. 237 /// The last parameter \p 0 means associated D form has no requirment for 16 238 /// bit signed displacement. 239 /// Returns false if it can be represented by [r+imm], which are preferred. 240 bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { 241 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 0); 242 } 243 244 /// SelectAddrIdx4 - Given the specified address, check to see if it can be 245 /// represented as an indexed [r+r] operation. 246 /// This is for xform instructions whose associated displacement form is DS. 247 /// The last parameter \p 4 means associated DS form 16 bit signed 248 /// displacement must be a multiple of 4. 249 /// Returns false if it can be represented by [r+imm], which are preferred. 250 bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) { 251 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 4); 252 } 253 254 /// SelectAddrIdx16 - Given the specified address, check to see if it can be 255 /// represented as an indexed [r+r] operation. 256 /// This is for xform instructions whose associated displacement form is DQ. 257 /// The last parameter \p 16 means associated DQ form 16 bit signed 258 /// displacement must be a multiple of 16. 259 /// Returns false if it can be represented by [r+imm], which are preferred. 260 bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) { 261 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 16); 262 } 263 264 /// SelectAddrIdxOnly - Given the specified address, force it to be 265 /// represented as an indexed [r+r] operation. 266 bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { 267 return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); 268 } 269 270 /// SelectAddrImm - Returns true if the address N can be represented by 271 /// a base register plus a signed 16-bit displacement [r+imm]. 272 /// The last parameter \p 0 means D form has no requirment for 16 bit signed 273 /// displacement. 274 bool SelectAddrImm(SDValue N, SDValue &Disp, 275 SDValue &Base) { 276 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0); 277 } 278 279 /// SelectAddrImmX4 - Returns true if the address N can be represented by 280 /// a base register plus a signed 16-bit displacement that is a multiple of 281 /// 4 (last parameter). Suitable for use by STD and friends. 282 bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { 283 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4); 284 } 285 286 /// SelectAddrImmX16 - Returns true if the address N can be represented by 287 /// a base register plus a signed 16-bit displacement that is a multiple of 288 /// 16(last parameter). Suitable for use by STXV and friends. 289 bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) { 290 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16); 291 } 292 293 // Select an address into a single register. 294 bool SelectAddr(SDValue N, SDValue &Base) { 295 Base = N; 296 return true; 297 } 298 299 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 300 /// inline asm expressions. It is always correct to compute the value into 301 /// a register. The case of adding a (possibly relocatable) constant to a 302 /// register can be improved, but it is wrong to substitute Reg+Reg for 303 /// Reg in an asm, because the load or store opcode would have to change. 304 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 305 unsigned ConstraintID, 306 std::vector<SDValue> &OutOps) override { 307 switch(ConstraintID) { 308 default: 309 errs() << "ConstraintID: " << ConstraintID << "\n"; 310 llvm_unreachable("Unexpected asm memory constraint"); 311 case InlineAsm::Constraint_es: 312 case InlineAsm::Constraint_i: 313 case InlineAsm::Constraint_m: 314 case InlineAsm::Constraint_o: 315 case InlineAsm::Constraint_Q: 316 case InlineAsm::Constraint_Z: 317 case InlineAsm::Constraint_Zy: 318 // We need to make sure that this one operand does not end up in r0 319 // (because we might end up lowering this as 0(%op)). 320 const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo(); 321 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); 322 SDLoc dl(Op); 323 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); 324 SDValue NewOp = 325 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 326 dl, Op.getValueType(), 327 Op, RC), 0); 328 329 OutOps.push_back(NewOp); 330 return false; 331 } 332 return true; 333 } 334 335 void InsertVRSaveCode(MachineFunction &MF); 336 337 StringRef getPassName() const override { 338 return "PowerPC DAG->DAG Pattern Instruction Selection"; 339 } 340 341 // Include the pieces autogenerated from the target description. 342 #include "PPCGenDAGISel.inc" 343 344 private: 345 bool trySETCC(SDNode *N); 346 347 void PeepholePPC64(); 348 void PeepholePPC64ZExt(); 349 void PeepholeCROps(); 350 351 SDValue combineToCMPB(SDNode *N); 352 void foldBoolExts(SDValue &Res, SDNode *&N); 353 354 bool AllUsersSelectZero(SDNode *N); 355 void SwapAllSelectUsers(SDNode *N); 356 357 bool isOffsetMultipleOf(SDNode *N, unsigned Val) const; 358 void transferMemOperands(SDNode *N, SDNode *Result); 359 }; 360 361 } // end anonymous namespace 362 363 /// InsertVRSaveCode - Once the entire function has been instruction selected, 364 /// all virtual registers are created and all machine instructions are built, 365 /// check to see if we need to save/restore VRSAVE. If so, do it. 366 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { 367 // Check to see if this function uses vector registers, which means we have to 368 // save and restore the VRSAVE register and update it with the regs we use. 369 // 370 // In this case, there will be virtual registers of vector type created 371 // by the scheduler. Detect them now. 372 bool HasVectorVReg = false; 373 for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) { 374 unsigned Reg = TargetRegisterInfo::index2VirtReg(i); 375 if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) { 376 HasVectorVReg = true; 377 break; 378 } 379 } 380 if (!HasVectorVReg) return; // nothing to do. 381 382 // If we have a vector register, we want to emit code into the entry and exit 383 // blocks to save and restore the VRSAVE register. We do this here (instead 384 // of marking all vector instructions as clobbering VRSAVE) for two reasons: 385 // 386 // 1. This (trivially) reduces the load on the register allocator, by not 387 // having to represent the live range of the VRSAVE register. 388 // 2. This (more significantly) allows us to create a temporary virtual 389 // register to hold the saved VRSAVE value, allowing this temporary to be 390 // register allocated, instead of forcing it to be spilled to the stack. 391 392 // Create two vregs - one to hold the VRSAVE register that is live-in to the 393 // function and one for the value after having bits or'd into it. 394 unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 395 unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 396 397 const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); 398 MachineBasicBlock &EntryBB = *Fn.begin(); 399 DebugLoc dl; 400 // Emit the following code into the entry block: 401 // InVRSAVE = MFVRSAVE 402 // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE 403 // MTVRSAVE UpdatedVRSAVE 404 MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point 405 BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE); 406 BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE), 407 UpdatedVRSAVE).addReg(InVRSAVE); 408 BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE); 409 410 // Find all return blocks, outputting a restore in each epilog. 411 for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { 412 if (BB->isReturnBlock()) { 413 IP = BB->end(); --IP; 414 415 // Skip over all terminator instructions, which are part of the return 416 // sequence. 417 MachineBasicBlock::iterator I2 = IP; 418 while (I2 != BB->begin() && (--I2)->isTerminator()) 419 IP = I2; 420 421 // Emit: MTVRSAVE InVRSave 422 BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE); 423 } 424 } 425 } 426 427 /// getGlobalBaseReg - Output the instructions required to put the 428 /// base address to use for accessing globals into a register. 429 /// 430 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { 431 if (!GlobalBaseReg) { 432 const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); 433 // Insert the set of GlobalBaseReg into the first MBB of the function 434 MachineBasicBlock &FirstMBB = MF->front(); 435 MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 436 const Module *M = MF->getFunction().getParent(); 437 DebugLoc dl; 438 439 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { 440 if (PPCSubTarget->isTargetELF()) { 441 GlobalBaseReg = PPC::R30; 442 if (!PPCSubTarget->isSecurePlt() && 443 M->getPICLevel() == PICLevel::SmallPIC) { 444 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR)); 445 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 446 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); 447 } else { 448 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); 449 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 450 unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 451 BuildMI(FirstMBB, MBBI, dl, 452 TII.get(PPC::UpdateGBR), GlobalBaseReg) 453 .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg); 454 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); 455 } 456 } else { 457 GlobalBaseReg = 458 RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass); 459 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); 460 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 461 } 462 } else { 463 // We must ensure that this sequence is dominated by the prologue. 464 // FIXME: This is a bit of a big hammer since we don't get the benefits 465 // of shrink-wrapping whenever we emit this instruction. Considering 466 // this is used in any function where we emit a jump table, this may be 467 // a significant limitation. We should consider inserting this in the 468 // block where it is used and then commoning this sequence up if it 469 // appears in multiple places. 470 // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of 471 // MovePCtoLR8. 472 MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true); 473 GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); 474 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); 475 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); 476 } 477 } 478 return CurDAG->getRegister(GlobalBaseReg, 479 PPCLowering->getPointerTy(CurDAG->getDataLayout())) 480 .getNode(); 481 } 482 483 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 484 /// operand. If so Imm will receive the 32-bit value. 485 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 486 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 487 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 488 return true; 489 } 490 return false; 491 } 492 493 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant 494 /// operand. If so Imm will receive the 64-bit value. 495 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) { 496 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) { 497 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 498 return true; 499 } 500 return false; 501 } 502 503 // isInt32Immediate - This method tests to see if a constant operand. 504 // If so Imm will receive the 32 bit value. 505 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 506 return isInt32Immediate(N.getNode(), Imm); 507 } 508 509 /// isInt64Immediate - This method tests to see if the value is a 64-bit 510 /// constant operand. If so Imm will receive the 64-bit value. 511 static bool isInt64Immediate(SDValue N, uint64_t &Imm) { 512 return isInt64Immediate(N.getNode(), Imm); 513 } 514 515 static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo, 516 const SDValue &DestMBB) { 517 assert(isa<BasicBlockSDNode>(DestMBB)); 518 519 if (!FuncInfo->BPI) return PPC::BR_NO_HINT; 520 521 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 522 const Instruction *BBTerm = BB->getTerminator(); 523 524 if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT; 525 526 const BasicBlock *TBB = BBTerm->getSuccessor(0); 527 const BasicBlock *FBB = BBTerm->getSuccessor(1); 528 529 auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB); 530 auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB); 531 532 // We only want to handle cases which are easy to predict at static time, e.g. 533 // C++ throw statement, that is very likely not taken, or calling never 534 // returned function, e.g. stdlib exit(). So we set Threshold to filter 535 // unwanted cases. 536 // 537 // Below is LLVM branch weight table, we only want to handle case 1, 2 538 // 539 // Case Taken:Nontaken Example 540 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(), 541 // 2. Invoke-terminating 1:1048575 542 // 3. Coldblock 4:64 __builtin_expect 543 // 4. Loop Branch 124:4 For loop 544 // 5. PH/ZH/FPH 20:12 545 const uint32_t Threshold = 10000; 546 547 if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb)) 548 return PPC::BR_NO_HINT; 549 550 LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() 551 << "::" << BB->getName() << "'\n" 552 << " -> " << TBB->getName() << ": " << TProb << "\n" 553 << " -> " << FBB->getName() << ": " << FProb << "\n"); 554 555 const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB); 556 557 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities, 558 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock 559 if (BBDN->getBasicBlock()->getBasicBlock() != TBB) 560 std::swap(TProb, FProb); 561 562 return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT; 563 } 564 565 // isOpcWithIntImmediate - This method tests to see if the node is a specific 566 // opcode and that it has a immediate integer right operand. 567 // If so Imm will receive the 32 bit value. 568 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 569 return N->getOpcode() == Opc 570 && isInt32Immediate(N->getOperand(1).getNode(), Imm); 571 } 572 573 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { 574 SDLoc dl(SN); 575 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 576 SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); 577 unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; 578 if (SN->hasOneUse()) 579 CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI, 580 getSmallIPtrImm(Offset, dl)); 581 else 582 ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, 583 getSmallIPtrImm(Offset, dl))); 584 } 585 586 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, 587 bool isShiftMask, unsigned &SH, 588 unsigned &MB, unsigned &ME) { 589 // Don't even go down this path for i64, since different logic will be 590 // necessary for rldicl/rldicr/rldimi. 591 if (N->getValueType(0) != MVT::i32) 592 return false; 593 594 unsigned Shift = 32; 595 unsigned Indeterminant = ~0; // bit mask marking indeterminant results 596 unsigned Opcode = N->getOpcode(); 597 if (N->getNumOperands() != 2 || 598 !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31)) 599 return false; 600 601 if (Opcode == ISD::SHL) { 602 // apply shift left to mask if it comes first 603 if (isShiftMask) Mask = Mask << Shift; 604 // determine which bits are made indeterminant by shift 605 Indeterminant = ~(0xFFFFFFFFu << Shift); 606 } else if (Opcode == ISD::SRL) { 607 // apply shift right to mask if it comes first 608 if (isShiftMask) Mask = Mask >> Shift; 609 // determine which bits are made indeterminant by shift 610 Indeterminant = ~(0xFFFFFFFFu >> Shift); 611 // adjust for the left rotate 612 Shift = 32 - Shift; 613 } else if (Opcode == ISD::ROTL) { 614 Indeterminant = 0; 615 } else { 616 return false; 617 } 618 619 // if the mask doesn't intersect any Indeterminant bits 620 if (Mask && !(Mask & Indeterminant)) { 621 SH = Shift & 31; 622 // make sure the mask is still a mask (wrap arounds may not be) 623 return isRunOfOnes(Mask, MB, ME); 624 } 625 return false; 626 } 627 628 bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { 629 SDValue Base = ST->getBasePtr(); 630 if (Base.getOpcode() != PPCISD::ADD_TLS) 631 return false; 632 SDValue Offset = ST->getOffset(); 633 if (!Offset.isUndef()) 634 return false; 635 636 SDLoc dl(ST); 637 EVT MemVT = ST->getMemoryVT(); 638 EVT RegVT = ST->getValue().getValueType(); 639 640 unsigned Opcode; 641 switch (MemVT.getSimpleVT().SimpleTy) { 642 default: 643 return false; 644 case MVT::i8: { 645 Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS; 646 break; 647 } 648 case MVT::i16: { 649 Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS; 650 break; 651 } 652 case MVT::i32: { 653 Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS; 654 break; 655 } 656 case MVT::i64: { 657 Opcode = PPC::STDXTLS; 658 break; 659 } 660 } 661 SDValue Chain = ST->getChain(); 662 SDVTList VTs = ST->getVTList(); 663 SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1), 664 Chain}; 665 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); 666 transferMemOperands(ST, MN); 667 ReplaceNode(ST, MN); 668 return true; 669 } 670 671 bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) { 672 SDValue Base = LD->getBasePtr(); 673 if (Base.getOpcode() != PPCISD::ADD_TLS) 674 return false; 675 SDValue Offset = LD->getOffset(); 676 if (!Offset.isUndef()) 677 return false; 678 679 SDLoc dl(LD); 680 EVT MemVT = LD->getMemoryVT(); 681 EVT RegVT = LD->getValueType(0); 682 unsigned Opcode; 683 switch (MemVT.getSimpleVT().SimpleTy) { 684 default: 685 return false; 686 case MVT::i8: { 687 Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS; 688 break; 689 } 690 case MVT::i16: { 691 Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS; 692 break; 693 } 694 case MVT::i32: { 695 Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS; 696 break; 697 } 698 case MVT::i64: { 699 Opcode = PPC::LDXTLS; 700 break; 701 } 702 } 703 SDValue Chain = LD->getChain(); 704 SDVTList VTs = LD->getVTList(); 705 SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain}; 706 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); 707 transferMemOperands(LD, MN); 708 ReplaceNode(LD, MN); 709 return true; 710 } 711 712 /// Turn an or of two masked values into the rotate left word immediate then 713 /// mask insert (rlwimi) instruction. 714 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { 715 SDValue Op0 = N->getOperand(0); 716 SDValue Op1 = N->getOperand(1); 717 SDLoc dl(N); 718 719 KnownBits LKnown = CurDAG->computeKnownBits(Op0); 720 KnownBits RKnown = CurDAG->computeKnownBits(Op1); 721 722 unsigned TargetMask = LKnown.Zero.getZExtValue(); 723 unsigned InsertMask = RKnown.Zero.getZExtValue(); 724 725 if ((TargetMask | InsertMask) == 0xFFFFFFFF) { 726 unsigned Op0Opc = Op0.getOpcode(); 727 unsigned Op1Opc = Op1.getOpcode(); 728 unsigned Value, SH = 0; 729 TargetMask = ~TargetMask; 730 InsertMask = ~InsertMask; 731 732 // If the LHS has a foldable shift and the RHS does not, then swap it to the 733 // RHS so that we can fold the shift into the insert. 734 if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) { 735 if (Op0.getOperand(0).getOpcode() == ISD::SHL || 736 Op0.getOperand(0).getOpcode() == ISD::SRL) { 737 if (Op1.getOperand(0).getOpcode() != ISD::SHL && 738 Op1.getOperand(0).getOpcode() != ISD::SRL) { 739 std::swap(Op0, Op1); 740 std::swap(Op0Opc, Op1Opc); 741 std::swap(TargetMask, InsertMask); 742 } 743 } 744 } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) { 745 if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL && 746 Op1.getOperand(0).getOpcode() != ISD::SRL) { 747 std::swap(Op0, Op1); 748 std::swap(Op0Opc, Op1Opc); 749 std::swap(TargetMask, InsertMask); 750 } 751 } 752 753 unsigned MB, ME; 754 if (isRunOfOnes(InsertMask, MB, ME)) { 755 if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && 756 isInt32Immediate(Op1.getOperand(1), Value)) { 757 Op1 = Op1.getOperand(0); 758 SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value; 759 } 760 if (Op1Opc == ISD::AND) { 761 // The AND mask might not be a constant, and we need to make sure that 762 // if we're going to fold the masking with the insert, all bits not 763 // know to be zero in the mask are known to be one. 764 KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1)); 765 bool CanFoldMask = InsertMask == MKnown.One.getZExtValue(); 766 767 unsigned SHOpc = Op1.getOperand(0).getOpcode(); 768 if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask && 769 isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) { 770 // Note that Value must be in range here (less than 32) because 771 // otherwise there would not be any bits set in InsertMask. 772 Op1 = Op1.getOperand(0).getOperand(0); 773 SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; 774 } 775 } 776 777 SH &= 31; 778 SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl), 779 getI32Imm(ME, dl) }; 780 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); 781 return true; 782 } 783 } 784 return false; 785 } 786 787 // Predict the number of instructions that would be generated by calling 788 // selectI64Imm(N). 789 static unsigned selectI64ImmInstrCountDirect(int64_t Imm) { 790 // Assume no remaining bits. 791 unsigned Remainder = 0; 792 // Assume no shift required. 793 unsigned Shift = 0; 794 795 // If it can't be represented as a 32 bit value. 796 if (!isInt<32>(Imm)) { 797 Shift = countTrailingZeros<uint64_t>(Imm); 798 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; 799 800 // If the shifted value fits 32 bits. 801 if (isInt<32>(ImmSh)) { 802 // Go with the shifted value. 803 Imm = ImmSh; 804 } else { 805 // Still stuck with a 64 bit value. 806 Remainder = Imm; 807 Shift = 32; 808 Imm >>= 32; 809 } 810 } 811 812 // Intermediate operand. 813 unsigned Result = 0; 814 815 // Handle first 32 bits. 816 unsigned Lo = Imm & 0xFFFF; 817 818 // Simple value. 819 if (isInt<16>(Imm)) { 820 // Just the Lo bits. 821 ++Result; 822 } else if (Lo) { 823 // Handle the Hi bits and Lo bits. 824 Result += 2; 825 } else { 826 // Just the Hi bits. 827 ++Result; 828 } 829 830 // If no shift, we're done. 831 if (!Shift) return Result; 832 833 // If Hi word == Lo word, 834 // we can use rldimi to insert the Lo word into Hi word. 835 if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { 836 ++Result; 837 return Result; 838 } 839 840 // Shift for next step if the upper 32-bits were not zero. 841 if (Imm) 842 ++Result; 843 844 // Add in the last bits as required. 845 if ((Remainder >> 16) & 0xFFFF) 846 ++Result; 847 if (Remainder & 0xFFFF) 848 ++Result; 849 850 return Result; 851 } 852 853 static uint64_t Rot64(uint64_t Imm, unsigned R) { 854 return (Imm << R) | (Imm >> (64 - R)); 855 } 856 857 static unsigned selectI64ImmInstrCount(int64_t Imm) { 858 unsigned Count = selectI64ImmInstrCountDirect(Imm); 859 860 // If the instruction count is 1 or 2, we do not need further analysis 861 // since rotate + load constant requires at least 2 instructions. 862 if (Count <= 2) 863 return Count; 864 865 for (unsigned r = 1; r < 63; ++r) { 866 uint64_t RImm = Rot64(Imm, r); 867 unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1; 868 Count = std::min(Count, RCount); 869 870 // See comments in selectI64Imm for an explanation of the logic below. 871 unsigned LS = findLastSet(RImm); 872 if (LS != r-1) 873 continue; 874 875 uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); 876 uint64_t RImmWithOnes = RImm | OnesMask; 877 878 RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1; 879 Count = std::min(Count, RCount); 880 } 881 882 return Count; 883 } 884 885 // Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount 886 // (above) needs to be kept in sync with this function. 887 static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, 888 int64_t Imm) { 889 // Assume no remaining bits. 890 unsigned Remainder = 0; 891 // Assume no shift required. 892 unsigned Shift = 0; 893 894 // If it can't be represented as a 32 bit value. 895 if (!isInt<32>(Imm)) { 896 Shift = countTrailingZeros<uint64_t>(Imm); 897 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; 898 899 // If the shifted value fits 32 bits. 900 if (isInt<32>(ImmSh)) { 901 // Go with the shifted value. 902 Imm = ImmSh; 903 } else { 904 // Still stuck with a 64 bit value. 905 Remainder = Imm; 906 Shift = 32; 907 Imm >>= 32; 908 } 909 } 910 911 // Intermediate operand. 912 SDNode *Result; 913 914 // Handle first 32 bits. 915 unsigned Lo = Imm & 0xFFFF; 916 unsigned Hi = (Imm >> 16) & 0xFFFF; 917 918 auto getI32Imm = [CurDAG, dl](unsigned Imm) { 919 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 920 }; 921 922 // Simple value. 923 if (isInt<16>(Imm)) { 924 uint64_t SextImm = SignExtend64(Lo, 16); 925 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); 926 // Just the Lo bits. 927 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); 928 } else if (Lo) { 929 // Handle the Hi bits. 930 unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8; 931 Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi)); 932 // And Lo bits. 933 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, 934 SDValue(Result, 0), getI32Imm(Lo)); 935 } else { 936 // Just the Hi bits. 937 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi)); 938 } 939 940 // If no shift, we're done. 941 if (!Shift) return Result; 942 943 // If Hi word == Lo word, 944 // we can use rldimi to insert the Lo word into Hi word. 945 if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { 946 SDValue Ops[] = 947 { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)}; 948 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); 949 } 950 951 // Shift for next step if the upper 32-bits were not zero. 952 if (Imm) { 953 Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, 954 SDValue(Result, 0), 955 getI32Imm(Shift), 956 getI32Imm(63 - Shift)); 957 } 958 959 // Add in the last bits as required. 960 if ((Hi = (Remainder >> 16) & 0xFFFF)) { 961 Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, 962 SDValue(Result, 0), getI32Imm(Hi)); 963 } 964 if ((Lo = Remainder & 0xFFFF)) { 965 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, 966 SDValue(Result, 0), getI32Imm(Lo)); 967 } 968 969 return Result; 970 } 971 972 static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, 973 int64_t Imm) { 974 unsigned Count = selectI64ImmInstrCountDirect(Imm); 975 976 // If the instruction count is 1 or 2, we do not need further analysis 977 // since rotate + load constant requires at least 2 instructions. 978 if (Count <= 2) 979 return selectI64ImmDirect(CurDAG, dl, Imm); 980 981 unsigned RMin = 0; 982 983 int64_t MatImm; 984 unsigned MaskEnd; 985 986 for (unsigned r = 1; r < 63; ++r) { 987 uint64_t RImm = Rot64(Imm, r); 988 unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1; 989 if (RCount < Count) { 990 Count = RCount; 991 RMin = r; 992 MatImm = RImm; 993 MaskEnd = 63; 994 } 995 996 // If the immediate to generate has many trailing zeros, it might be 997 // worthwhile to generate a rotated value with too many leading ones 998 // (because that's free with li/lis's sign-extension semantics), and then 999 // mask them off after rotation. 1000 1001 unsigned LS = findLastSet(RImm); 1002 // We're adding (63-LS) higher-order ones, and we expect to mask them off 1003 // after performing the inverse rotation by (64-r). So we need that: 1004 // 63-LS == 64-r => LS == r-1 1005 if (LS != r-1) 1006 continue; 1007 1008 uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); 1009 uint64_t RImmWithOnes = RImm | OnesMask; 1010 1011 RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1; 1012 if (RCount < Count) { 1013 Count = RCount; 1014 RMin = r; 1015 MatImm = RImmWithOnes; 1016 MaskEnd = LS; 1017 } 1018 } 1019 1020 if (!RMin) 1021 return selectI64ImmDirect(CurDAG, dl, Imm); 1022 1023 auto getI32Imm = [CurDAG, dl](unsigned Imm) { 1024 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 1025 }; 1026 1027 SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0); 1028 return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val, 1029 getI32Imm(64 - RMin), getI32Imm(MaskEnd)); 1030 } 1031 1032 static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) { 1033 unsigned MaxTruncation = 0; 1034 // Cannot use range-based for loop here as we need the actual use (i.e. we 1035 // need the operand number corresponding to the use). A range-based for 1036 // will unbox the use and provide an SDNode*. 1037 for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end(); 1038 Use != UseEnd; ++Use) { 1039 unsigned Opc = 1040 Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode(); 1041 switch (Opc) { 1042 default: return 0; 1043 case ISD::TRUNCATE: 1044 if (Use->isMachineOpcode()) 1045 return 0; 1046 MaxTruncation = 1047 std::max(MaxTruncation, Use->getValueType(0).getSizeInBits()); 1048 continue; 1049 case ISD::STORE: { 1050 if (Use->isMachineOpcode()) 1051 return 0; 1052 StoreSDNode *STN = cast<StoreSDNode>(*Use); 1053 unsigned MemVTSize = STN->getMemoryVT().getSizeInBits(); 1054 if (MemVTSize == 64 || Use.getOperandNo() != 0) 1055 return 0; 1056 MaxTruncation = std::max(MaxTruncation, MemVTSize); 1057 continue; 1058 } 1059 case PPC::STW8: 1060 case PPC::STWX8: 1061 case PPC::STWU8: 1062 case PPC::STWUX8: 1063 if (Use.getOperandNo() != 0) 1064 return 0; 1065 MaxTruncation = std::max(MaxTruncation, 32u); 1066 continue; 1067 case PPC::STH8: 1068 case PPC::STHX8: 1069 case PPC::STHU8: 1070 case PPC::STHUX8: 1071 if (Use.getOperandNo() != 0) 1072 return 0; 1073 MaxTruncation = std::max(MaxTruncation, 16u); 1074 continue; 1075 case PPC::STB8: 1076 case PPC::STBX8: 1077 case PPC::STBU8: 1078 case PPC::STBUX8: 1079 if (Use.getOperandNo() != 0) 1080 return 0; 1081 MaxTruncation = std::max(MaxTruncation, 8u); 1082 continue; 1083 } 1084 } 1085 return MaxTruncation; 1086 } 1087 1088 // Select a 64-bit constant. 1089 static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) { 1090 SDLoc dl(N); 1091 1092 // Get 64 bit value. 1093 int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue(); 1094 if (unsigned MinSize = allUsesTruncate(CurDAG, N)) { 1095 uint64_t SextImm = SignExtend64(Imm, MinSize); 1096 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); 1097 if (isInt<16>(SextImm)) 1098 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); 1099 } 1100 return selectI64Imm(CurDAG, dl, Imm); 1101 } 1102 1103 namespace { 1104 1105 class BitPermutationSelector { 1106 struct ValueBit { 1107 SDValue V; 1108 1109 // The bit number in the value, using a convention where bit 0 is the 1110 // lowest-order bit. 1111 unsigned Idx; 1112 1113 // ConstZero means a bit we need to mask off. 1114 // Variable is a bit comes from an input variable. 1115 // VariableKnownToBeZero is also a bit comes from an input variable, 1116 // but it is known to be already zero. So we do not need to mask them. 1117 enum Kind { 1118 ConstZero, 1119 Variable, 1120 VariableKnownToBeZero 1121 } K; 1122 1123 ValueBit(SDValue V, unsigned I, Kind K = Variable) 1124 : V(V), Idx(I), K(K) {} 1125 ValueBit(Kind K = Variable) 1126 : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {} 1127 1128 bool isZero() const { 1129 return K == ConstZero || K == VariableKnownToBeZero; 1130 } 1131 1132 bool hasValue() const { 1133 return K == Variable || K == VariableKnownToBeZero; 1134 } 1135 1136 SDValue getValue() const { 1137 assert(hasValue() && "Cannot get the value of a constant bit"); 1138 return V; 1139 } 1140 1141 unsigned getValueBitIndex() const { 1142 assert(hasValue() && "Cannot get the value bit index of a constant bit"); 1143 return Idx; 1144 } 1145 }; 1146 1147 // A bit group has the same underlying value and the same rotate factor. 1148 struct BitGroup { 1149 SDValue V; 1150 unsigned RLAmt; 1151 unsigned StartIdx, EndIdx; 1152 1153 // This rotation amount assumes that the lower 32 bits of the quantity are 1154 // replicated in the high 32 bits by the rotation operator (which is done 1155 // by rlwinm and friends in 64-bit mode). 1156 bool Repl32; 1157 // Did converting to Repl32 == true change the rotation factor? If it did, 1158 // it decreased it by 32. 1159 bool Repl32CR; 1160 // Was this group coalesced after setting Repl32 to true? 1161 bool Repl32Coalesced; 1162 1163 BitGroup(SDValue V, unsigned R, unsigned S, unsigned E) 1164 : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false), 1165 Repl32Coalesced(false) { 1166 LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R 1167 << " [" << S << ", " << E << "]\n"); 1168 } 1169 }; 1170 1171 // Information on each (Value, RLAmt) pair (like the number of groups 1172 // associated with each) used to choose the lowering method. 1173 struct ValueRotInfo { 1174 SDValue V; 1175 unsigned RLAmt = std::numeric_limits<unsigned>::max(); 1176 unsigned NumGroups = 0; 1177 unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max(); 1178 bool Repl32 = false; 1179 1180 ValueRotInfo() = default; 1181 1182 // For sorting (in reverse order) by NumGroups, and then by 1183 // FirstGroupStartIdx. 1184 bool operator < (const ValueRotInfo &Other) const { 1185 // We need to sort so that the non-Repl32 come first because, when we're 1186 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit 1187 // masking operation. 1188 if (Repl32 < Other.Repl32) 1189 return true; 1190 else if (Repl32 > Other.Repl32) 1191 return false; 1192 else if (NumGroups > Other.NumGroups) 1193 return true; 1194 else if (NumGroups < Other.NumGroups) 1195 return false; 1196 else if (RLAmt == 0 && Other.RLAmt != 0) 1197 return true; 1198 else if (RLAmt != 0 && Other.RLAmt == 0) 1199 return false; 1200 else if (FirstGroupStartIdx < Other.FirstGroupStartIdx) 1201 return true; 1202 return false; 1203 } 1204 }; 1205 1206 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>; 1207 using ValueBitsMemoizer = 1208 DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>; 1209 ValueBitsMemoizer Memoizer; 1210 1211 // Return a pair of bool and a SmallVector pointer to a memoization entry. 1212 // The bool is true if something interesting was deduced, otherwise if we're 1213 // providing only a generic representation of V (or something else likewise 1214 // uninteresting for instruction selection) through the SmallVector. 1215 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V, 1216 unsigned NumBits) { 1217 auto &ValueEntry = Memoizer[V]; 1218 if (ValueEntry) 1219 return std::make_pair(ValueEntry->first, &ValueEntry->second); 1220 ValueEntry.reset(new ValueBitsMemoizedValue()); 1221 bool &Interesting = ValueEntry->first; 1222 SmallVector<ValueBit, 64> &Bits = ValueEntry->second; 1223 Bits.resize(NumBits); 1224 1225 switch (V.getOpcode()) { 1226 default: break; 1227 case ISD::ROTL: 1228 if (isa<ConstantSDNode>(V.getOperand(1))) { 1229 unsigned RotAmt = V.getConstantOperandVal(1); 1230 1231 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1232 1233 for (unsigned i = 0; i < NumBits; ++i) 1234 Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt]; 1235 1236 return std::make_pair(Interesting = true, &Bits); 1237 } 1238 break; 1239 case ISD::SHL: 1240 if (isa<ConstantSDNode>(V.getOperand(1))) { 1241 unsigned ShiftAmt = V.getConstantOperandVal(1); 1242 1243 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1244 1245 for (unsigned i = ShiftAmt; i < NumBits; ++i) 1246 Bits[i] = LHSBits[i - ShiftAmt]; 1247 1248 for (unsigned i = 0; i < ShiftAmt; ++i) 1249 Bits[i] = ValueBit(ValueBit::ConstZero); 1250 1251 return std::make_pair(Interesting = true, &Bits); 1252 } 1253 break; 1254 case ISD::SRL: 1255 if (isa<ConstantSDNode>(V.getOperand(1))) { 1256 unsigned ShiftAmt = V.getConstantOperandVal(1); 1257 1258 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1259 1260 for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) 1261 Bits[i] = LHSBits[i + ShiftAmt]; 1262 1263 for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) 1264 Bits[i] = ValueBit(ValueBit::ConstZero); 1265 1266 return std::make_pair(Interesting = true, &Bits); 1267 } 1268 break; 1269 case ISD::AND: 1270 if (isa<ConstantSDNode>(V.getOperand(1))) { 1271 uint64_t Mask = V.getConstantOperandVal(1); 1272 1273 const SmallVector<ValueBit, 64> *LHSBits; 1274 // Mark this as interesting, only if the LHS was also interesting. This 1275 // prevents the overall procedure from matching a single immediate 'and' 1276 // (which is non-optimal because such an and might be folded with other 1277 // things if we don't select it here). 1278 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits); 1279 1280 for (unsigned i = 0; i < NumBits; ++i) 1281 if (((Mask >> i) & 1) == 1) 1282 Bits[i] = (*LHSBits)[i]; 1283 else { 1284 // AND instruction masks this bit. If the input is already zero, 1285 // we have nothing to do here. Otherwise, make the bit ConstZero. 1286 if ((*LHSBits)[i].isZero()) 1287 Bits[i] = (*LHSBits)[i]; 1288 else 1289 Bits[i] = ValueBit(ValueBit::ConstZero); 1290 } 1291 1292 return std::make_pair(Interesting, &Bits); 1293 } 1294 break; 1295 case ISD::OR: { 1296 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1297 const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second; 1298 1299 bool AllDisjoint = true; 1300 SDValue LastVal = SDValue(); 1301 unsigned LastIdx = 0; 1302 for (unsigned i = 0; i < NumBits; ++i) { 1303 if (LHSBits[i].isZero() && RHSBits[i].isZero()) { 1304 // If both inputs are known to be zero and one is ConstZero and 1305 // another is VariableKnownToBeZero, we can select whichever 1306 // we like. To minimize the number of bit groups, we select 1307 // VariableKnownToBeZero if this bit is the next bit of the same 1308 // input variable from the previous bit. Otherwise, we select 1309 // ConstZero. 1310 if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal && 1311 LHSBits[i].getValueBitIndex() == LastIdx + 1) 1312 Bits[i] = LHSBits[i]; 1313 else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal && 1314 RHSBits[i].getValueBitIndex() == LastIdx + 1) 1315 Bits[i] = RHSBits[i]; 1316 else 1317 Bits[i] = ValueBit(ValueBit::ConstZero); 1318 } 1319 else if (LHSBits[i].isZero()) 1320 Bits[i] = RHSBits[i]; 1321 else if (RHSBits[i].isZero()) 1322 Bits[i] = LHSBits[i]; 1323 else { 1324 AllDisjoint = false; 1325 break; 1326 } 1327 // We remember the value and bit index of this bit. 1328 if (Bits[i].hasValue()) { 1329 LastVal = Bits[i].getValue(); 1330 LastIdx = Bits[i].getValueBitIndex(); 1331 } 1332 else { 1333 if (LastVal) LastVal = SDValue(); 1334 LastIdx = 0; 1335 } 1336 } 1337 1338 if (!AllDisjoint) 1339 break; 1340 1341 return std::make_pair(Interesting = true, &Bits); 1342 } 1343 case ISD::ZERO_EXTEND: { 1344 // We support only the case with zero extension from i32 to i64 so far. 1345 if (V.getValueType() != MVT::i64 || 1346 V.getOperand(0).getValueType() != MVT::i32) 1347 break; 1348 1349 const SmallVector<ValueBit, 64> *LHSBits; 1350 const unsigned NumOperandBits = 32; 1351 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), 1352 NumOperandBits); 1353 1354 for (unsigned i = 0; i < NumOperandBits; ++i) 1355 Bits[i] = (*LHSBits)[i]; 1356 1357 for (unsigned i = NumOperandBits; i < NumBits; ++i) 1358 Bits[i] = ValueBit(ValueBit::ConstZero); 1359 1360 return std::make_pair(Interesting, &Bits); 1361 } 1362 case ISD::TRUNCATE: { 1363 EVT FromType = V.getOperand(0).getValueType(); 1364 EVT ToType = V.getValueType(); 1365 // We support only the case with truncate from i64 to i32. 1366 if (FromType != MVT::i64 || ToType != MVT::i32) 1367 break; 1368 const unsigned NumAllBits = FromType.getSizeInBits(); 1369 SmallVector<ValueBit, 64> *InBits; 1370 std::tie(Interesting, InBits) = getValueBits(V.getOperand(0), 1371 NumAllBits); 1372 const unsigned NumValidBits = ToType.getSizeInBits(); 1373 1374 // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value. 1375 // So, we cannot include this truncate. 1376 bool UseUpper32bit = false; 1377 for (unsigned i = 0; i < NumValidBits; ++i) 1378 if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) { 1379 UseUpper32bit = true; 1380 break; 1381 } 1382 if (UseUpper32bit) 1383 break; 1384 1385 for (unsigned i = 0; i < NumValidBits; ++i) 1386 Bits[i] = (*InBits)[i]; 1387 1388 return std::make_pair(Interesting, &Bits); 1389 } 1390 case ISD::AssertZext: { 1391 // For AssertZext, we look through the operand and 1392 // mark the bits known to be zero. 1393 const SmallVector<ValueBit, 64> *LHSBits; 1394 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), 1395 NumBits); 1396 1397 EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT(); 1398 const unsigned NumValidBits = FromType.getSizeInBits(); 1399 for (unsigned i = 0; i < NumValidBits; ++i) 1400 Bits[i] = (*LHSBits)[i]; 1401 1402 // These bits are known to be zero. 1403 for (unsigned i = NumValidBits; i < NumBits; ++i) 1404 Bits[i] = ValueBit((*LHSBits)[i].getValue(), 1405 (*LHSBits)[i].getValueBitIndex(), 1406 ValueBit::VariableKnownToBeZero); 1407 1408 return std::make_pair(Interesting, &Bits); 1409 } 1410 case ISD::LOAD: 1411 LoadSDNode *LD = cast<LoadSDNode>(V); 1412 if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) { 1413 EVT VT = LD->getMemoryVT(); 1414 const unsigned NumValidBits = VT.getSizeInBits(); 1415 1416 for (unsigned i = 0; i < NumValidBits; ++i) 1417 Bits[i] = ValueBit(V, i); 1418 1419 // These bits are known to be zero. 1420 for (unsigned i = NumValidBits; i < NumBits; ++i) 1421 Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero); 1422 1423 // Zero-extending load itself cannot be optimized. So, it is not 1424 // interesting by itself though it gives useful information. 1425 return std::make_pair(Interesting = false, &Bits); 1426 } 1427 break; 1428 } 1429 1430 for (unsigned i = 0; i < NumBits; ++i) 1431 Bits[i] = ValueBit(V, i); 1432 1433 return std::make_pair(Interesting = false, &Bits); 1434 } 1435 1436 // For each value (except the constant ones), compute the left-rotate amount 1437 // to get it from its original to final position. 1438 void computeRotationAmounts() { 1439 NeedMask = false; 1440 RLAmt.resize(Bits.size()); 1441 for (unsigned i = 0; i < Bits.size(); ++i) 1442 if (Bits[i].hasValue()) { 1443 unsigned VBI = Bits[i].getValueBitIndex(); 1444 if (i >= VBI) 1445 RLAmt[i] = i - VBI; 1446 else 1447 RLAmt[i] = Bits.size() - (VBI - i); 1448 } else if (Bits[i].isZero()) { 1449 NeedMask = true; 1450 RLAmt[i] = UINT32_MAX; 1451 } else { 1452 llvm_unreachable("Unknown value bit type"); 1453 } 1454 } 1455 1456 // Collect groups of consecutive bits with the same underlying value and 1457 // rotation factor. If we're doing late masking, we ignore zeros, otherwise 1458 // they break up groups. 1459 void collectBitGroups(bool LateMask) { 1460 BitGroups.clear(); 1461 1462 unsigned LastRLAmt = RLAmt[0]; 1463 SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue(); 1464 unsigned LastGroupStartIdx = 0; 1465 bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); 1466 for (unsigned i = 1; i < Bits.size(); ++i) { 1467 unsigned ThisRLAmt = RLAmt[i]; 1468 SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue(); 1469 if (LateMask && !ThisValue) { 1470 ThisValue = LastValue; 1471 ThisRLAmt = LastRLAmt; 1472 // If we're doing late masking, then the first bit group always starts 1473 // at zero (even if the first bits were zero). 1474 if (BitGroups.empty()) 1475 LastGroupStartIdx = 0; 1476 } 1477 1478 // If this bit is known to be zero and the current group is a bit group 1479 // of zeros, we do not need to terminate the current bit group even the 1480 // Value or RLAmt does not match here. Instead, we terminate this group 1481 // when the first non-zero bit appears later. 1482 if (IsGroupOfZeros && Bits[i].isZero()) 1483 continue; 1484 1485 // If this bit has the same underlying value and the same rotate factor as 1486 // the last one, then they're part of the same group. 1487 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue) 1488 // We cannot continue the current group if this bits is not known to 1489 // be zero in a bit group of zeros. 1490 if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero())) 1491 continue; 1492 1493 if (LastValue.getNode()) 1494 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, 1495 i-1)); 1496 LastRLAmt = ThisRLAmt; 1497 LastValue = ThisValue; 1498 LastGroupStartIdx = i; 1499 IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); 1500 } 1501 if (LastValue.getNode()) 1502 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, 1503 Bits.size()-1)); 1504 1505 if (BitGroups.empty()) 1506 return; 1507 1508 // We might be able to combine the first and last groups. 1509 if (BitGroups.size() > 1) { 1510 // If the first and last groups are the same, then remove the first group 1511 // in favor of the last group, making the ending index of the last group 1512 // equal to the ending index of the to-be-removed first group. 1513 if (BitGroups[0].StartIdx == 0 && 1514 BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 && 1515 BitGroups[0].V == BitGroups[BitGroups.size()-1].V && 1516 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) { 1517 LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n"); 1518 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx; 1519 BitGroups.erase(BitGroups.begin()); 1520 } 1521 } 1522 } 1523 1524 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups 1525 // associated with each. If the number of groups are same, we prefer a group 1526 // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate 1527 // instruction. If there is a degeneracy, pick the one that occurs 1528 // first (in the final value). 1529 void collectValueRotInfo() { 1530 ValueRots.clear(); 1531 1532 for (auto &BG : BitGroups) { 1533 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0); 1534 ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)]; 1535 VRI.V = BG.V; 1536 VRI.RLAmt = BG.RLAmt; 1537 VRI.Repl32 = BG.Repl32; 1538 VRI.NumGroups += 1; 1539 VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx); 1540 } 1541 1542 // Now that we've collected the various ValueRotInfo instances, we need to 1543 // sort them. 1544 ValueRotsVec.clear(); 1545 for (auto &I : ValueRots) { 1546 ValueRotsVec.push_back(I.second); 1547 } 1548 llvm::sort(ValueRotsVec); 1549 } 1550 1551 // In 64-bit mode, rlwinm and friends have a rotation operator that 1552 // replicates the low-order 32 bits into the high-order 32-bits. The mask 1553 // indices of these instructions can only be in the lower 32 bits, so they 1554 // can only represent some 64-bit bit groups. However, when they can be used, 1555 // the 32-bit replication can be used to represent, as a single bit group, 1556 // otherwise separate bit groups. We'll convert to replicated-32-bit bit 1557 // groups when possible. Returns true if any of the bit groups were 1558 // converted. 1559 void assignRepl32BitGroups() { 1560 // If we have bits like this: 1561 // 1562 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 1563 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24 1564 // Groups: | RLAmt = 8 | RLAmt = 40 | 1565 // 1566 // But, making use of a 32-bit operation that replicates the low-order 32 1567 // bits into the high-order 32 bits, this can be one bit group with a RLAmt 1568 // of 8. 1569 1570 auto IsAllLow32 = [this](BitGroup & BG) { 1571 if (BG.StartIdx <= BG.EndIdx) { 1572 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) { 1573 if (!Bits[i].hasValue()) 1574 continue; 1575 if (Bits[i].getValueBitIndex() >= 32) 1576 return false; 1577 } 1578 } else { 1579 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) { 1580 if (!Bits[i].hasValue()) 1581 continue; 1582 if (Bits[i].getValueBitIndex() >= 32) 1583 return false; 1584 } 1585 for (unsigned i = 0; i <= BG.EndIdx; ++i) { 1586 if (!Bits[i].hasValue()) 1587 continue; 1588 if (Bits[i].getValueBitIndex() >= 32) 1589 return false; 1590 } 1591 } 1592 1593 return true; 1594 }; 1595 1596 for (auto &BG : BitGroups) { 1597 // If this bit group has RLAmt of 0 and will not be merged with 1598 // another bit group, we don't benefit from Repl32. We don't mark 1599 // such group to give more freedom for later instruction selection. 1600 if (BG.RLAmt == 0) { 1601 auto PotentiallyMerged = [this](BitGroup & BG) { 1602 for (auto &BG2 : BitGroups) 1603 if (&BG != &BG2 && BG.V == BG2.V && 1604 (BG2.RLAmt == 0 || BG2.RLAmt == 32)) 1605 return true; 1606 return false; 1607 }; 1608 if (!PotentiallyMerged(BG)) 1609 continue; 1610 } 1611 if (BG.StartIdx < 32 && BG.EndIdx < 32) { 1612 if (IsAllLow32(BG)) { 1613 if (BG.RLAmt >= 32) { 1614 BG.RLAmt -= 32; 1615 BG.Repl32CR = true; 1616 } 1617 1618 BG.Repl32 = true; 1619 1620 LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for " 1621 << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " [" 1622 << BG.StartIdx << ", " << BG.EndIdx << "]\n"); 1623 } 1624 } 1625 } 1626 1627 // Now walk through the bit groups, consolidating where possible. 1628 for (auto I = BitGroups.begin(); I != BitGroups.end();) { 1629 // We might want to remove this bit group by merging it with the previous 1630 // group (which might be the ending group). 1631 auto IP = (I == BitGroups.begin()) ? 1632 std::prev(BitGroups.end()) : std::prev(I); 1633 if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt && 1634 I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) { 1635 1636 LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " 1637 << I->V.getNode() << " RLAmt = " << I->RLAmt << " [" 1638 << I->StartIdx << ", " << I->EndIdx 1639 << "] with group with range [" << IP->StartIdx << ", " 1640 << IP->EndIdx << "]\n"); 1641 1642 IP->EndIdx = I->EndIdx; 1643 IP->Repl32CR = IP->Repl32CR || I->Repl32CR; 1644 IP->Repl32Coalesced = true; 1645 I = BitGroups.erase(I); 1646 continue; 1647 } else { 1648 // There is a special case worth handling: If there is a single group 1649 // covering the entire upper 32 bits, and it can be merged with both 1650 // the next and previous groups (which might be the same group), then 1651 // do so. If it is the same group (so there will be only one group in 1652 // total), then we need to reverse the order of the range so that it 1653 // covers the entire 64 bits. 1654 if (I->StartIdx == 32 && I->EndIdx == 63) { 1655 assert(std::next(I) == BitGroups.end() && 1656 "bit group ends at index 63 but there is another?"); 1657 auto IN = BitGroups.begin(); 1658 1659 if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V && 1660 (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt && 1661 IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP && 1662 IsAllLow32(*I)) { 1663 1664 LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode() 1665 << " RLAmt = " << I->RLAmt << " [" << I->StartIdx 1666 << ", " << I->EndIdx 1667 << "] with 32-bit replicated groups with ranges [" 1668 << IP->StartIdx << ", " << IP->EndIdx << "] and [" 1669 << IN->StartIdx << ", " << IN->EndIdx << "]\n"); 1670 1671 if (IP == IN) { 1672 // There is only one other group; change it to cover the whole 1673 // range (backward, so that it can still be Repl32 but cover the 1674 // whole 64-bit range). 1675 IP->StartIdx = 31; 1676 IP->EndIdx = 30; 1677 IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32; 1678 IP->Repl32Coalesced = true; 1679 I = BitGroups.erase(I); 1680 } else { 1681 // There are two separate groups, one before this group and one 1682 // after us (at the beginning). We're going to remove this group, 1683 // but also the group at the very beginning. 1684 IP->EndIdx = IN->EndIdx; 1685 IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32; 1686 IP->Repl32Coalesced = true; 1687 I = BitGroups.erase(I); 1688 BitGroups.erase(BitGroups.begin()); 1689 } 1690 1691 // This must be the last group in the vector (and we might have 1692 // just invalidated the iterator above), so break here. 1693 break; 1694 } 1695 } 1696 } 1697 1698 ++I; 1699 } 1700 } 1701 1702 SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 1703 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 1704 } 1705 1706 uint64_t getZerosMask() { 1707 uint64_t Mask = 0; 1708 for (unsigned i = 0; i < Bits.size(); ++i) { 1709 if (Bits[i].hasValue()) 1710 continue; 1711 Mask |= (UINT64_C(1) << i); 1712 } 1713 1714 return ~Mask; 1715 } 1716 1717 // This method extends an input value to 64 bit if input is 32-bit integer. 1718 // While selecting instructions in BitPermutationSelector in 64-bit mode, 1719 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included. 1720 // In such case, we extend it to 64 bit to be consistent with other values. 1721 SDValue ExtendToInt64(SDValue V, const SDLoc &dl) { 1722 if (V.getValueSizeInBits() == 64) 1723 return V; 1724 1725 assert(V.getValueSizeInBits() == 32); 1726 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 1727 SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, 1728 MVT::i64), 0); 1729 SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, 1730 MVT::i64, ImDef, V, 1731 SubRegIdx), 0); 1732 return ExtVal; 1733 } 1734 1735 SDValue TruncateToInt32(SDValue V, const SDLoc &dl) { 1736 if (V.getValueSizeInBits() == 32) 1737 return V; 1738 1739 assert(V.getValueSizeInBits() == 64); 1740 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 1741 SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, 1742 MVT::i32, V, SubRegIdx), 0); 1743 return SubVal; 1744 } 1745 1746 // Depending on the number of groups for a particular value, it might be 1747 // better to rotate, mask explicitly (using andi/andis), and then or the 1748 // result. Select this part of the result first. 1749 void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { 1750 if (BPermRewriterNoMasking) 1751 return; 1752 1753 for (ValueRotInfo &VRI : ValueRotsVec) { 1754 unsigned Mask = 0; 1755 for (unsigned i = 0; i < Bits.size(); ++i) { 1756 if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V) 1757 continue; 1758 if (RLAmt[i] != VRI.RLAmt) 1759 continue; 1760 Mask |= (1u << i); 1761 } 1762 1763 // Compute the masks for andi/andis that would be necessary. 1764 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; 1765 assert((ANDIMask != 0 || ANDISMask != 0) && 1766 "No set bits in mask for value bit groups"); 1767 bool NeedsRotate = VRI.RLAmt != 0; 1768 1769 // We're trying to minimize the number of instructions. If we have one 1770 // group, using one of andi/andis can break even. If we have three 1771 // groups, we can use both andi and andis and break even (to use both 1772 // andi and andis we also need to or the results together). We need four 1773 // groups if we also need to rotate. To use andi/andis we need to do more 1774 // than break even because rotate-and-mask instructions tend to be easier 1775 // to schedule. 1776 1777 // FIXME: We've biased here against using andi/andis, which is right for 1778 // POWER cores, but not optimal everywhere. For example, on the A2, 1779 // andi/andis have single-cycle latency whereas the rotate-and-mask 1780 // instructions take two cycles, and it would be better to bias toward 1781 // andi/andis in break-even cases. 1782 1783 unsigned NumAndInsts = (unsigned) NeedsRotate + 1784 (unsigned) (ANDIMask != 0) + 1785 (unsigned) (ANDISMask != 0) + 1786 (unsigned) (ANDIMask != 0 && ANDISMask != 0) + 1787 (unsigned) (bool) Res; 1788 1789 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() 1790 << " RL: " << VRI.RLAmt << ":" 1791 << "\n\t\t\tisel using masking: " << NumAndInsts 1792 << " using rotates: " << VRI.NumGroups << "\n"); 1793 1794 if (NumAndInsts >= VRI.NumGroups) 1795 continue; 1796 1797 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n"); 1798 1799 if (InstCnt) *InstCnt += NumAndInsts; 1800 1801 SDValue VRot; 1802 if (VRI.RLAmt) { 1803 SDValue Ops[] = 1804 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), 1805 getI32Imm(0, dl), getI32Imm(31, dl) }; 1806 VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 1807 Ops), 0); 1808 } else { 1809 VRot = TruncateToInt32(VRI.V, dl); 1810 } 1811 1812 SDValue ANDIVal, ANDISVal; 1813 if (ANDIMask != 0) 1814 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32, 1815 VRot, getI32Imm(ANDIMask, dl)), 0); 1816 if (ANDISMask != 0) 1817 ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32, 1818 VRot, getI32Imm(ANDISMask, dl)), 0); 1819 1820 SDValue TotalVal; 1821 if (!ANDIVal) 1822 TotalVal = ANDISVal; 1823 else if (!ANDISVal) 1824 TotalVal = ANDIVal; 1825 else 1826 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1827 ANDIVal, ANDISVal), 0); 1828 1829 if (!Res) 1830 Res = TotalVal; 1831 else 1832 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1833 Res, TotalVal), 0); 1834 1835 // Now, remove all groups with this underlying value and rotation 1836 // factor. 1837 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 1838 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; 1839 }); 1840 } 1841 } 1842 1843 // Instruction selection for the 32-bit case. 1844 SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) { 1845 SDLoc dl(N); 1846 SDValue Res; 1847 1848 if (InstCnt) *InstCnt = 0; 1849 1850 // Take care of cases that should use andi/andis first. 1851 SelectAndParts32(dl, Res, InstCnt); 1852 1853 // If we've not yet selected a 'starting' instruction, and we have no zeros 1854 // to fill in, select the (Value, RLAmt) with the highest priority (largest 1855 // number of groups), and start with this rotated value. 1856 if ((!NeedMask || LateMask) && !Res) { 1857 ValueRotInfo &VRI = ValueRotsVec[0]; 1858 if (VRI.RLAmt) { 1859 if (InstCnt) *InstCnt += 1; 1860 SDValue Ops[] = 1861 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), 1862 getI32Imm(0, dl), getI32Imm(31, dl) }; 1863 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 1864 0); 1865 } else { 1866 Res = TruncateToInt32(VRI.V, dl); 1867 } 1868 1869 // Now, remove all groups with this underlying value and rotation factor. 1870 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 1871 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; 1872 }); 1873 } 1874 1875 if (InstCnt) *InstCnt += BitGroups.size(); 1876 1877 // Insert the other groups (one at a time). 1878 for (auto &BG : BitGroups) { 1879 if (!Res) { 1880 SDValue Ops[] = 1881 { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), 1882 getI32Imm(Bits.size() - BG.EndIdx - 1, dl), 1883 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; 1884 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 1885 } else { 1886 SDValue Ops[] = 1887 { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), 1888 getI32Imm(Bits.size() - BG.EndIdx - 1, dl), 1889 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; 1890 Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0); 1891 } 1892 } 1893 1894 if (LateMask) { 1895 unsigned Mask = (unsigned) getZerosMask(); 1896 1897 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; 1898 assert((ANDIMask != 0 || ANDISMask != 0) && 1899 "No set bits in zeros mask?"); 1900 1901 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + 1902 (unsigned) (ANDISMask != 0) + 1903 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 1904 1905 SDValue ANDIVal, ANDISVal; 1906 if (ANDIMask != 0) 1907 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32, 1908 Res, getI32Imm(ANDIMask, dl)), 0); 1909 if (ANDISMask != 0) 1910 ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32, 1911 Res, getI32Imm(ANDISMask, dl)), 0); 1912 1913 if (!ANDIVal) 1914 Res = ANDISVal; 1915 else if (!ANDISVal) 1916 Res = ANDIVal; 1917 else 1918 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1919 ANDIVal, ANDISVal), 0); 1920 } 1921 1922 return Res.getNode(); 1923 } 1924 1925 unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32, 1926 unsigned MaskStart, unsigned MaskEnd, 1927 bool IsIns) { 1928 // In the notation used by the instructions, 'start' and 'end' are reversed 1929 // because bits are counted from high to low order. 1930 unsigned InstMaskStart = 64 - MaskEnd - 1, 1931 InstMaskEnd = 64 - MaskStart - 1; 1932 1933 if (Repl32) 1934 return 1; 1935 1936 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) || 1937 InstMaskEnd == 63 - RLAmt) 1938 return 1; 1939 1940 return 2; 1941 } 1942 1943 // For 64-bit values, not all combinations of rotates and masks are 1944 // available. Produce one if it is available. 1945 SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt, 1946 bool Repl32, unsigned MaskStart, unsigned MaskEnd, 1947 unsigned *InstCnt = nullptr) { 1948 // In the notation used by the instructions, 'start' and 'end' are reversed 1949 // because bits are counted from high to low order. 1950 unsigned InstMaskStart = 64 - MaskEnd - 1, 1951 InstMaskEnd = 64 - MaskStart - 1; 1952 1953 if (InstCnt) *InstCnt += 1; 1954 1955 if (Repl32) { 1956 // This rotation amount assumes that the lower 32 bits of the quantity 1957 // are replicated in the high 32 bits by the rotation operator (which is 1958 // done by rlwinm and friends). 1959 assert(InstMaskStart >= 32 && "Mask cannot start out of range"); 1960 assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); 1961 SDValue Ops[] = 1962 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1963 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; 1964 return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64, 1965 Ops), 0); 1966 } 1967 1968 if (InstMaskEnd == 63) { 1969 SDValue Ops[] = 1970 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1971 getI32Imm(InstMaskStart, dl) }; 1972 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0); 1973 } 1974 1975 if (InstMaskStart == 0) { 1976 SDValue Ops[] = 1977 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1978 getI32Imm(InstMaskEnd, dl) }; 1979 return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0); 1980 } 1981 1982 if (InstMaskEnd == 63 - RLAmt) { 1983 SDValue Ops[] = 1984 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1985 getI32Imm(InstMaskStart, dl) }; 1986 return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0); 1987 } 1988 1989 // We cannot do this with a single instruction, so we'll use two. The 1990 // problem is that we're not free to choose both a rotation amount and mask 1991 // start and end independently. We can choose an arbitrary mask start and 1992 // end, but then the rotation amount is fixed. Rotation, however, can be 1993 // inverted, and so by applying an "inverse" rotation first, we can get the 1994 // desired result. 1995 if (InstCnt) *InstCnt += 1; 1996 1997 // The rotation mask for the second instruction must be MaskStart. 1998 unsigned RLAmt2 = MaskStart; 1999 // The first instruction must rotate V so that the overall rotation amount 2000 // is RLAmt. 2001 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; 2002 if (RLAmt1) 2003 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); 2004 return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd); 2005 } 2006 2007 // For 64-bit values, not all combinations of rotates and masks are 2008 // available. Produce a rotate-mask-and-insert if one is available. 2009 SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl, 2010 unsigned RLAmt, bool Repl32, unsigned MaskStart, 2011 unsigned MaskEnd, unsigned *InstCnt = nullptr) { 2012 // In the notation used by the instructions, 'start' and 'end' are reversed 2013 // because bits are counted from high to low order. 2014 unsigned InstMaskStart = 64 - MaskEnd - 1, 2015 InstMaskEnd = 64 - MaskStart - 1; 2016 2017 if (InstCnt) *InstCnt += 1; 2018 2019 if (Repl32) { 2020 // This rotation amount assumes that the lower 32 bits of the quantity 2021 // are replicated in the high 32 bits by the rotation operator (which is 2022 // done by rlwinm and friends). 2023 assert(InstMaskStart >= 32 && "Mask cannot start out of range"); 2024 assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); 2025 SDValue Ops[] = 2026 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 2027 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; 2028 return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, 2029 Ops), 0); 2030 } 2031 2032 if (InstMaskEnd == 63 - RLAmt) { 2033 SDValue Ops[] = 2034 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 2035 getI32Imm(InstMaskStart, dl) }; 2036 return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0); 2037 } 2038 2039 // We cannot do this with a single instruction, so we'll use two. The 2040 // problem is that we're not free to choose both a rotation amount and mask 2041 // start and end independently. We can choose an arbitrary mask start and 2042 // end, but then the rotation amount is fixed. Rotation, however, can be 2043 // inverted, and so by applying an "inverse" rotation first, we can get the 2044 // desired result. 2045 if (InstCnt) *InstCnt += 1; 2046 2047 // The rotation mask for the second instruction must be MaskStart. 2048 unsigned RLAmt2 = MaskStart; 2049 // The first instruction must rotate V so that the overall rotation amount 2050 // is RLAmt. 2051 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; 2052 if (RLAmt1) 2053 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); 2054 return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd); 2055 } 2056 2057 void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { 2058 if (BPermRewriterNoMasking) 2059 return; 2060 2061 // The idea here is the same as in the 32-bit version, but with additional 2062 // complications from the fact that Repl32 might be true. Because we 2063 // aggressively convert bit groups to Repl32 form (which, for small 2064 // rotation factors, involves no other change), and then coalesce, it might 2065 // be the case that a single 64-bit masking operation could handle both 2066 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32 2067 // form allowed coalescing, then we must use a 32-bit rotaton in order to 2068 // completely capture the new combined bit group. 2069 2070 for (ValueRotInfo &VRI : ValueRotsVec) { 2071 uint64_t Mask = 0; 2072 2073 // We need to add to the mask all bits from the associated bit groups. 2074 // If Repl32 is false, we need to add bits from bit groups that have 2075 // Repl32 true, but are trivially convertable to Repl32 false. Such a 2076 // group is trivially convertable if it overlaps only with the lower 32 2077 // bits, and the group has not been coalesced. 2078 auto MatchingBG = [VRI](const BitGroup &BG) { 2079 if (VRI.V != BG.V) 2080 return false; 2081 2082 unsigned EffRLAmt = BG.RLAmt; 2083 if (!VRI.Repl32 && BG.Repl32) { 2084 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx && 2085 !BG.Repl32Coalesced) { 2086 if (BG.Repl32CR) 2087 EffRLAmt += 32; 2088 } else { 2089 return false; 2090 } 2091 } else if (VRI.Repl32 != BG.Repl32) { 2092 return false; 2093 } 2094 2095 return VRI.RLAmt == EffRLAmt; 2096 }; 2097 2098 for (auto &BG : BitGroups) { 2099 if (!MatchingBG(BG)) 2100 continue; 2101 2102 if (BG.StartIdx <= BG.EndIdx) { 2103 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) 2104 Mask |= (UINT64_C(1) << i); 2105 } else { 2106 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) 2107 Mask |= (UINT64_C(1) << i); 2108 for (unsigned i = 0; i <= BG.EndIdx; ++i) 2109 Mask |= (UINT64_C(1) << i); 2110 } 2111 } 2112 2113 // We can use the 32-bit andi/andis technique if the mask does not 2114 // require any higher-order bits. This can save an instruction compared 2115 // to always using the general 64-bit technique. 2116 bool Use32BitInsts = isUInt<32>(Mask); 2117 // Compute the masks for andi/andis that would be necessary. 2118 unsigned ANDIMask = (Mask & UINT16_MAX), 2119 ANDISMask = (Mask >> 16) & UINT16_MAX; 2120 2121 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)); 2122 2123 unsigned NumAndInsts = (unsigned) NeedsRotate + 2124 (unsigned) (bool) Res; 2125 if (Use32BitInsts) 2126 NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + 2127 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 2128 else 2129 NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1; 2130 2131 unsigned NumRLInsts = 0; 2132 bool FirstBG = true; 2133 bool MoreBG = false; 2134 for (auto &BG : BitGroups) { 2135 if (!MatchingBG(BG)) { 2136 MoreBG = true; 2137 continue; 2138 } 2139 NumRLInsts += 2140 SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, 2141 !FirstBG); 2142 FirstBG = false; 2143 } 2144 2145 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() 2146 << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") 2147 << "\n\t\t\tisel using masking: " << NumAndInsts 2148 << " using rotates: " << NumRLInsts << "\n"); 2149 2150 // When we'd use andi/andis, we bias toward using the rotates (andi only 2151 // has a record form, and is cracked on POWER cores). However, when using 2152 // general 64-bit constant formation, bias toward the constant form, 2153 // because that exposes more opportunities for CSE. 2154 if (NumAndInsts > NumRLInsts) 2155 continue; 2156 // When merging multiple bit groups, instruction or is used. 2157 // But when rotate is used, rldimi can inert the rotated value into any 2158 // register, so instruction or can be avoided. 2159 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts) 2160 continue; 2161 2162 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n"); 2163 2164 if (InstCnt) *InstCnt += NumAndInsts; 2165 2166 SDValue VRot; 2167 // We actually need to generate a rotation if we have a non-zero rotation 2168 // factor or, in the Repl32 case, if we care about any of the 2169 // higher-order replicated bits. In the latter case, we generate a mask 2170 // backward so that it actually includes the entire 64 bits. 2171 if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask))) 2172 VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, 2173 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63); 2174 else 2175 VRot = VRI.V; 2176 2177 SDValue TotalVal; 2178 if (Use32BitInsts) { 2179 assert((ANDIMask != 0 || ANDISMask != 0) && 2180 "No set bits in mask when using 32-bit ands for 64-bit value"); 2181 2182 SDValue ANDIVal, ANDISVal; 2183 if (ANDIMask != 0) 2184 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64, 2185 ExtendToInt64(VRot, dl), 2186 getI32Imm(ANDIMask, dl)), 2187 0); 2188 if (ANDISMask != 0) 2189 ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64, 2190 ExtendToInt64(VRot, dl), 2191 getI32Imm(ANDISMask, dl)), 2192 0); 2193 2194 if (!ANDIVal) 2195 TotalVal = ANDISVal; 2196 else if (!ANDISVal) 2197 TotalVal = ANDIVal; 2198 else 2199 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2200 ExtendToInt64(ANDIVal, dl), ANDISVal), 0); 2201 } else { 2202 TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); 2203 TotalVal = 2204 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, 2205 ExtendToInt64(VRot, dl), TotalVal), 2206 0); 2207 } 2208 2209 if (!Res) 2210 Res = TotalVal; 2211 else 2212 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2213 ExtendToInt64(Res, dl), TotalVal), 2214 0); 2215 2216 // Now, remove all groups with this underlying value and rotation 2217 // factor. 2218 eraseMatchingBitGroups(MatchingBG); 2219 } 2220 } 2221 2222 // Instruction selection for the 64-bit case. 2223 SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) { 2224 SDLoc dl(N); 2225 SDValue Res; 2226 2227 if (InstCnt) *InstCnt = 0; 2228 2229 // Take care of cases that should use andi/andis first. 2230 SelectAndParts64(dl, Res, InstCnt); 2231 2232 // If we've not yet selected a 'starting' instruction, and we have no zeros 2233 // to fill in, select the (Value, RLAmt) with the highest priority (largest 2234 // number of groups), and start with this rotated value. 2235 if ((!NeedMask || LateMask) && !Res) { 2236 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32 2237 // groups will come first, and so the VRI representing the largest number 2238 // of groups might not be first (it might be the first Repl32 groups). 2239 unsigned MaxGroupsIdx = 0; 2240 if (!ValueRotsVec[0].Repl32) { 2241 for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i) 2242 if (ValueRotsVec[i].Repl32) { 2243 if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups) 2244 MaxGroupsIdx = i; 2245 break; 2246 } 2247 } 2248 2249 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx]; 2250 bool NeedsRotate = false; 2251 if (VRI.RLAmt) { 2252 NeedsRotate = true; 2253 } else if (VRI.Repl32) { 2254 for (auto &BG : BitGroups) { 2255 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt || 2256 BG.Repl32 != VRI.Repl32) 2257 continue; 2258 2259 // We don't need a rotate if the bit group is confined to the lower 2260 // 32 bits. 2261 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx) 2262 continue; 2263 2264 NeedsRotate = true; 2265 break; 2266 } 2267 } 2268 2269 if (NeedsRotate) 2270 Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, 2271 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63, 2272 InstCnt); 2273 else 2274 Res = VRI.V; 2275 2276 // Now, remove all groups with this underlying value and rotation factor. 2277 if (Res) 2278 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 2279 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt && 2280 BG.Repl32 == VRI.Repl32; 2281 }); 2282 } 2283 2284 // Because 64-bit rotates are more flexible than inserts, we might have a 2285 // preference regarding which one we do first (to save one instruction). 2286 if (!Res) 2287 for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) { 2288 if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, 2289 false) < 2290 SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, 2291 true)) { 2292 if (I != BitGroups.begin()) { 2293 BitGroup BG = *I; 2294 BitGroups.erase(I); 2295 BitGroups.insert(BitGroups.begin(), BG); 2296 } 2297 2298 break; 2299 } 2300 } 2301 2302 // Insert the other groups (one at a time). 2303 for (auto &BG : BitGroups) { 2304 if (!Res) 2305 Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx, 2306 BG.EndIdx, InstCnt); 2307 else 2308 Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32, 2309 BG.StartIdx, BG.EndIdx, InstCnt); 2310 } 2311 2312 if (LateMask) { 2313 uint64_t Mask = getZerosMask(); 2314 2315 // We can use the 32-bit andi/andis technique if the mask does not 2316 // require any higher-order bits. This can save an instruction compared 2317 // to always using the general 64-bit technique. 2318 bool Use32BitInsts = isUInt<32>(Mask); 2319 // Compute the masks for andi/andis that would be necessary. 2320 unsigned ANDIMask = (Mask & UINT16_MAX), 2321 ANDISMask = (Mask >> 16) & UINT16_MAX; 2322 2323 if (Use32BitInsts) { 2324 assert((ANDIMask != 0 || ANDISMask != 0) && 2325 "No set bits in mask when using 32-bit ands for 64-bit value"); 2326 2327 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + 2328 (unsigned) (ANDISMask != 0) + 2329 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 2330 2331 SDValue ANDIVal, ANDISVal; 2332 if (ANDIMask != 0) 2333 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64, 2334 ExtendToInt64(Res, dl), getI32Imm(ANDIMask, dl)), 0); 2335 if (ANDISMask != 0) 2336 ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64, 2337 ExtendToInt64(Res, dl), getI32Imm(ANDISMask, dl)), 0); 2338 2339 if (!ANDIVal) 2340 Res = ANDISVal; 2341 else if (!ANDISVal) 2342 Res = ANDIVal; 2343 else 2344 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2345 ExtendToInt64(ANDIVal, dl), ANDISVal), 0); 2346 } else { 2347 if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1; 2348 2349 SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); 2350 Res = 2351 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, 2352 ExtendToInt64(Res, dl), MaskVal), 0); 2353 } 2354 } 2355 2356 return Res.getNode(); 2357 } 2358 2359 SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) { 2360 // Fill in BitGroups. 2361 collectBitGroups(LateMask); 2362 if (BitGroups.empty()) 2363 return nullptr; 2364 2365 // For 64-bit values, figure out when we can use 32-bit instructions. 2366 if (Bits.size() == 64) 2367 assignRepl32BitGroups(); 2368 2369 // Fill in ValueRotsVec. 2370 collectValueRotInfo(); 2371 2372 if (Bits.size() == 32) { 2373 return Select32(N, LateMask, InstCnt); 2374 } else { 2375 assert(Bits.size() == 64 && "Not 64 bits here?"); 2376 return Select64(N, LateMask, InstCnt); 2377 } 2378 2379 return nullptr; 2380 } 2381 2382 void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) { 2383 BitGroups.erase(remove_if(BitGroups, F), BitGroups.end()); 2384 } 2385 2386 SmallVector<ValueBit, 64> Bits; 2387 2388 bool NeedMask; 2389 SmallVector<unsigned, 64> RLAmt; 2390 2391 SmallVector<BitGroup, 16> BitGroups; 2392 2393 DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots; 2394 SmallVector<ValueRotInfo, 16> ValueRotsVec; 2395 2396 SelectionDAG *CurDAG; 2397 2398 public: 2399 BitPermutationSelector(SelectionDAG *DAG) 2400 : CurDAG(DAG) {} 2401 2402 // Here we try to match complex bit permutations into a set of 2403 // rotate-and-shift/shift/and/or instructions, using a set of heuristics 2404 // known to produce optimal code for common cases (like i32 byte swapping). 2405 SDNode *Select(SDNode *N) { 2406 Memoizer.clear(); 2407 auto Result = 2408 getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits()); 2409 if (!Result.first) 2410 return nullptr; 2411 Bits = std::move(*Result.second); 2412 2413 LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction" 2414 " selection for: "); 2415 LLVM_DEBUG(N->dump(CurDAG)); 2416 2417 // Fill it RLAmt and set NeedMask. 2418 computeRotationAmounts(); 2419 2420 if (!NeedMask) 2421 return Select(N, false); 2422 2423 // We currently have two techniques for handling results with zeros: early 2424 // masking (the default) and late masking. Late masking is sometimes more 2425 // efficient, but because the structure of the bit groups is different, it 2426 // is hard to tell without generating both and comparing the results. With 2427 // late masking, we ignore zeros in the resulting value when inserting each 2428 // set of bit groups, and then mask in the zeros at the end. With early 2429 // masking, we only insert the non-zero parts of the result at every step. 2430 2431 unsigned InstCnt = 0, InstCntLateMask = 0; 2432 LLVM_DEBUG(dbgs() << "\tEarly masking:\n"); 2433 SDNode *RN = Select(N, false, &InstCnt); 2434 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n"); 2435 2436 LLVM_DEBUG(dbgs() << "\tLate masking:\n"); 2437 SDNode *RNLM = Select(N, true, &InstCntLateMask); 2438 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask 2439 << " instructions\n"); 2440 2441 if (InstCnt <= InstCntLateMask) { 2442 LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n"); 2443 return RN; 2444 } 2445 2446 LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n"); 2447 return RNLM; 2448 } 2449 }; 2450 2451 class IntegerCompareEliminator { 2452 SelectionDAG *CurDAG; 2453 PPCDAGToDAGISel *S; 2454 // Conversion type for interpreting results of a 32-bit instruction as 2455 // a 64-bit value or vice versa. 2456 enum ExtOrTruncConversion { Ext, Trunc }; 2457 2458 // Modifiers to guide how an ISD::SETCC node's result is to be computed 2459 // in a GPR. 2460 // ZExtOrig - use the original condition code, zero-extend value 2461 // ZExtInvert - invert the condition code, zero-extend value 2462 // SExtOrig - use the original condition code, sign-extend value 2463 // SExtInvert - invert the condition code, sign-extend value 2464 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert }; 2465 2466 // Comparisons against zero to emit GPR code sequences for. Each of these 2467 // sequences may need to be emitted for two or more equivalent patterns. 2468 // For example (a >= 0) == (a > -1). The direction of the comparison (</>) 2469 // matters as well as the extension type: sext (-1/0), zext (1/0). 2470 // GEZExt - (zext (LHS >= 0)) 2471 // GESExt - (sext (LHS >= 0)) 2472 // LEZExt - (zext (LHS <= 0)) 2473 // LESExt - (sext (LHS <= 0)) 2474 enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt }; 2475 2476 SDNode *tryEXTEND(SDNode *N); 2477 SDNode *tryLogicOpOfCompares(SDNode *N); 2478 SDValue computeLogicOpInGPR(SDValue LogicOp); 2479 SDValue signExtendInputIfNeeded(SDValue Input); 2480 SDValue zeroExtendInputIfNeeded(SDValue Input); 2481 SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); 2482 SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, 2483 ZeroCompare CmpTy); 2484 SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2485 int64_t RHSValue, SDLoc dl); 2486 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2487 int64_t RHSValue, SDLoc dl); 2488 SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2489 int64_t RHSValue, SDLoc dl); 2490 SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2491 int64_t RHSValue, SDLoc dl); 2492 SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); 2493 2494 public: 2495 IntegerCompareEliminator(SelectionDAG *DAG, 2496 PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) { 2497 assert(CurDAG->getTargetLoweringInfo() 2498 .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && 2499 "Only expecting to use this on 64 bit targets."); 2500 } 2501 SDNode *Select(SDNode *N) { 2502 if (CmpInGPR == ICGPR_None) 2503 return nullptr; 2504 switch (N->getOpcode()) { 2505 default: break; 2506 case ISD::ZERO_EXTEND: 2507 if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 || 2508 CmpInGPR == ICGPR_SextI64) 2509 return nullptr; 2510 LLVM_FALLTHROUGH; 2511 case ISD::SIGN_EXTEND: 2512 if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 || 2513 CmpInGPR == ICGPR_ZextI64) 2514 return nullptr; 2515 return tryEXTEND(N); 2516 case ISD::AND: 2517 case ISD::OR: 2518 case ISD::XOR: 2519 return tryLogicOpOfCompares(N); 2520 } 2521 return nullptr; 2522 } 2523 }; 2524 2525 static bool isLogicOp(unsigned Opc) { 2526 return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; 2527 } 2528 // The obvious case for wanting to keep the value in a GPR. Namely, the 2529 // result of the comparison is actually needed in a GPR. 2530 SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) { 2531 assert((N->getOpcode() == ISD::ZERO_EXTEND || 2532 N->getOpcode() == ISD::SIGN_EXTEND) && 2533 "Expecting a zero/sign extend node!"); 2534 SDValue WideRes; 2535 // If we are zero-extending the result of a logical operation on i1 2536 // values, we can keep the values in GPRs. 2537 if (isLogicOp(N->getOperand(0).getOpcode()) && 2538 N->getOperand(0).getValueType() == MVT::i1 && 2539 N->getOpcode() == ISD::ZERO_EXTEND) 2540 WideRes = computeLogicOpInGPR(N->getOperand(0)); 2541 else if (N->getOperand(0).getOpcode() != ISD::SETCC) 2542 return nullptr; 2543 else 2544 WideRes = 2545 getSETCCInGPR(N->getOperand(0), 2546 N->getOpcode() == ISD::SIGN_EXTEND ? 2547 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); 2548 2549 if (!WideRes) 2550 return nullptr; 2551 2552 SDLoc dl(N); 2553 bool Input32Bit = WideRes.getValueType() == MVT::i32; 2554 bool Output32Bit = N->getValueType(0) == MVT::i32; 2555 2556 NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0; 2557 NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1; 2558 2559 SDValue ConvOp = WideRes; 2560 if (Input32Bit != Output32Bit) 2561 ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext : 2562 ExtOrTruncConversion::Trunc); 2563 return ConvOp.getNode(); 2564 } 2565 2566 // Attempt to perform logical operations on the results of comparisons while 2567 // keeping the values in GPRs. Without doing so, these would end up being 2568 // lowered to CR-logical operations which suffer from significant latency and 2569 // low ILP. 2570 SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) { 2571 if (N->getValueType(0) != MVT::i1) 2572 return nullptr; 2573 assert(isLogicOp(N->getOpcode()) && 2574 "Expected a logic operation on setcc results."); 2575 SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); 2576 if (!LoweredLogical) 2577 return nullptr; 2578 2579 SDLoc dl(N); 2580 bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; 2581 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; 2582 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); 2583 SDValue LHS = LoweredLogical.getOperand(0); 2584 SDValue RHS = LoweredLogical.getOperand(1); 2585 SDValue WideOp; 2586 SDValue OpToConvToRecForm; 2587 2588 // Look through any 32-bit to 64-bit implicit extend nodes to find the 2589 // opcode that is input to the XORI. 2590 if (IsBitwiseNegate && 2591 LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) 2592 OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); 2593 else if (IsBitwiseNegate) 2594 // If the input to the XORI isn't an extension, that's what we're after. 2595 OpToConvToRecForm = LoweredLogical.getOperand(0); 2596 else 2597 // If this is not an XORI, it is a reg-reg logical op and we can convert 2598 // it to record-form. 2599 OpToConvToRecForm = LoweredLogical; 2600 2601 // Get the record-form version of the node we're looking to use to get the 2602 // CR result from. 2603 uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); 2604 int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); 2605 2606 // Convert the right node to record-form. This is either the logical we're 2607 // looking at or it is the input node to the negation (if we're looking at 2608 // a bitwise negation). 2609 if (NewOpc != -1 && IsBitwiseNegate) { 2610 // The input to the XORI has a record-form. Use it. 2611 assert(LoweredLogical.getConstantOperandVal(1) == 1 && 2612 "Expected a PPC::XORI8 only for bitwise negation."); 2613 // Emit the record-form instruction. 2614 std::vector<SDValue> Ops; 2615 for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) 2616 Ops.push_back(OpToConvToRecForm.getOperand(i)); 2617 2618 WideOp = 2619 SDValue(CurDAG->getMachineNode(NewOpc, dl, 2620 OpToConvToRecForm.getValueType(), 2621 MVT::Glue, Ops), 0); 2622 } else { 2623 assert((NewOpc != -1 || !IsBitwiseNegate) && 2624 "No record form available for AND8/OR8/XOR8?"); 2625 WideOp = 2626 SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl, 2627 MVT::i64, MVT::Glue, LHS, RHS), 0); 2628 } 2629 2630 // Select this node to a single bit from CR0 set by the record-form node 2631 // just created. For bitwise negation, use the EQ bit which is the equivalent 2632 // of negating the result (i.e. it is a bit set when the result of the 2633 // operation is zero). 2634 SDValue SRIdxVal = 2635 CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); 2636 SDValue CRBit = 2637 SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, 2638 MVT::i1, CR0Reg, SRIdxVal, 2639 WideOp.getValue(1)), 0); 2640 return CRBit.getNode(); 2641 } 2642 2643 // Lower a logical operation on i1 values into a GPR sequence if possible. 2644 // The result can be kept in a GPR if requested. 2645 // Three types of inputs can be handled: 2646 // - SETCC 2647 // - TRUNCATE 2648 // - Logical operation (AND/OR/XOR) 2649 // There is also a special case that is handled (namely a complement operation 2650 // achieved with xor %a, -1). 2651 SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) { 2652 assert(isLogicOp(LogicOp.getOpcode()) && 2653 "Can only handle logic operations here."); 2654 assert(LogicOp.getValueType() == MVT::i1 && 2655 "Can only handle logic operations on i1 values here."); 2656 SDLoc dl(LogicOp); 2657 SDValue LHS, RHS; 2658 2659 // Special case: xor %a, -1 2660 bool IsBitwiseNegation = isBitwiseNot(LogicOp); 2661 2662 // Produces a GPR sequence for each operand of the binary logic operation. 2663 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates 2664 // the value in a GPR and for logic operations, it will recursively produce 2665 // a GPR sequence for the operation. 2666 auto getLogicOperand = [&] (SDValue Operand) -> SDValue { 2667 unsigned OperandOpcode = Operand.getOpcode(); 2668 if (OperandOpcode == ISD::SETCC) 2669 return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); 2670 else if (OperandOpcode == ISD::TRUNCATE) { 2671 SDValue InputOp = Operand.getOperand(0); 2672 EVT InVT = InputOp.getValueType(); 2673 return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : 2674 PPC::RLDICL, dl, InVT, InputOp, 2675 S->getI64Imm(0, dl), 2676 S->getI64Imm(63, dl)), 0); 2677 } else if (isLogicOp(OperandOpcode)) 2678 return computeLogicOpInGPR(Operand); 2679 return SDValue(); 2680 }; 2681 LHS = getLogicOperand(LogicOp.getOperand(0)); 2682 RHS = getLogicOperand(LogicOp.getOperand(1)); 2683 2684 // If a GPR sequence can't be produced for the LHS we can't proceed. 2685 // Not producing a GPR sequence for the RHS is only a problem if this isn't 2686 // a bitwise negation operation. 2687 if (!LHS || (!RHS && !IsBitwiseNegation)) 2688 return SDValue(); 2689 2690 NumLogicOpsOnComparison++; 2691 2692 // We will use the inputs as 64-bit values. 2693 if (LHS.getValueType() == MVT::i32) 2694 LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); 2695 if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) 2696 RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); 2697 2698 unsigned NewOpc; 2699 switch (LogicOp.getOpcode()) { 2700 default: llvm_unreachable("Unknown logic operation."); 2701 case ISD::AND: NewOpc = PPC::AND8; break; 2702 case ISD::OR: NewOpc = PPC::OR8; break; 2703 case ISD::XOR: NewOpc = PPC::XOR8; break; 2704 } 2705 2706 if (IsBitwiseNegation) { 2707 RHS = S->getI64Imm(1, dl); 2708 NewOpc = PPC::XORI8; 2709 } 2710 2711 return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); 2712 2713 } 2714 2715 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. 2716 /// Otherwise just reinterpret it as a 64-bit value. 2717 /// Useful when emitting comparison code for 32-bit values without using 2718 /// the compare instruction (which only considers the lower 32-bits). 2719 SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) { 2720 assert(Input.getValueType() == MVT::i32 && 2721 "Can only sign-extend 32-bit values here."); 2722 unsigned Opc = Input.getOpcode(); 2723 2724 // The value was sign extended and then truncated to 32-bits. No need to 2725 // sign extend it again. 2726 if (Opc == ISD::TRUNCATE && 2727 (Input.getOperand(0).getOpcode() == ISD::AssertSext || 2728 Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)) 2729 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2730 2731 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); 2732 // The input is a sign-extending load. All ppc sign-extending loads 2733 // sign-extend to the full 64-bits. 2734 if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD) 2735 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2736 2737 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); 2738 // We don't sign-extend constants. 2739 if (InputConst) 2740 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2741 2742 SDLoc dl(Input); 2743 SignExtensionsAdded++; 2744 return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl, 2745 MVT::i64, Input), 0); 2746 } 2747 2748 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it. 2749 /// Otherwise just reinterpret it as a 64-bit value. 2750 /// Useful when emitting comparison code for 32-bit values without using 2751 /// the compare instruction (which only considers the lower 32-bits). 2752 SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) { 2753 assert(Input.getValueType() == MVT::i32 && 2754 "Can only zero-extend 32-bit values here."); 2755 unsigned Opc = Input.getOpcode(); 2756 2757 // The only condition under which we can omit the actual extend instruction: 2758 // - The value is a positive constant 2759 // - The value comes from a load that isn't a sign-extending load 2760 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext. 2761 bool IsTruncateOfZExt = Opc == ISD::TRUNCATE && 2762 (Input.getOperand(0).getOpcode() == ISD::AssertZext || 2763 Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND); 2764 if (IsTruncateOfZExt) 2765 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2766 2767 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); 2768 if (InputConst && InputConst->getSExtValue() >= 0) 2769 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2770 2771 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); 2772 // The input is a load that doesn't sign-extend (it will be zero-extended). 2773 if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD) 2774 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2775 2776 // None of the above, need to zero-extend. 2777 SDLoc dl(Input); 2778 ZeroExtensionsAdded++; 2779 return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input, 2780 S->getI64Imm(0, dl), 2781 S->getI64Imm(32, dl)), 0); 2782 } 2783 2784 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of 2785 // course not actual zero/sign extensions that will generate machine code, 2786 // they're just a way to reinterpret a 32 bit value in a register as a 2787 // 64 bit value and vice-versa. 2788 SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes, 2789 ExtOrTruncConversion Conv) { 2790 SDLoc dl(NatWidthRes); 2791 2792 // For reinterpreting 32-bit values as 64 bit values, we generate 2793 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1> 2794 if (Conv == ExtOrTruncConversion::Ext) { 2795 SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); 2796 SDValue SubRegIdx = 2797 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 2798 return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, 2799 ImDef, NatWidthRes, SubRegIdx), 0); 2800 } 2801 2802 assert(Conv == ExtOrTruncConversion::Trunc && 2803 "Unknown convertion between 32 and 64 bit values."); 2804 // For reinterpreting 64-bit values as 32-bit values, we just need to 2805 // EXTRACT_SUBREG (i.e. extract the low word). 2806 SDValue SubRegIdx = 2807 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 2808 return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, 2809 NatWidthRes, SubRegIdx), 0); 2810 } 2811 2812 // Produce a GPR sequence for compound comparisons (<=, >=) against zero. 2813 // Handle both zero-extensions and sign-extensions. 2814 SDValue 2815 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, 2816 ZeroCompare CmpTy) { 2817 EVT InVT = LHS.getValueType(); 2818 bool Is32Bit = InVT == MVT::i32; 2819 SDValue ToExtend; 2820 2821 // Produce the value that needs to be either zero or sign extended. 2822 switch (CmpTy) { 2823 case ZeroCompare::GEZExt: 2824 case ZeroCompare::GESExt: 2825 ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8, 2826 dl, InVT, LHS, LHS), 0); 2827 break; 2828 case ZeroCompare::LEZExt: 2829 case ZeroCompare::LESExt: { 2830 if (Is32Bit) { 2831 // Upper 32 bits cannot be undefined for this sequence. 2832 LHS = signExtendInputIfNeeded(LHS); 2833 SDValue Neg = 2834 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); 2835 ToExtend = 2836 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 2837 Neg, S->getI64Imm(1, dl), 2838 S->getI64Imm(63, dl)), 0); 2839 } else { 2840 SDValue Addi = 2841 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, 2842 S->getI64Imm(~0ULL, dl)), 0); 2843 ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2844 Addi, LHS), 0); 2845 } 2846 break; 2847 } 2848 } 2849 2850 // For 64-bit sequences, the extensions are the same for the GE/LE cases. 2851 if (!Is32Bit && 2852 (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt)) 2853 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 2854 ToExtend, S->getI64Imm(1, dl), 2855 S->getI64Imm(63, dl)), 0); 2856 if (!Is32Bit && 2857 (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt)) 2858 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend, 2859 S->getI64Imm(63, dl)), 0); 2860 2861 assert(Is32Bit && "Should have handled the 32-bit sequences above."); 2862 // For 32-bit sequences, the extensions differ between GE/LE cases. 2863 switch (CmpTy) { 2864 case ZeroCompare::GEZExt: { 2865 SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl), 2866 S->getI32Imm(31, dl) }; 2867 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 2868 ShiftOps), 0); 2869 } 2870 case ZeroCompare::GESExt: 2871 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend, 2872 S->getI32Imm(31, dl)), 0); 2873 case ZeroCompare::LEZExt: 2874 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend, 2875 S->getI32Imm(1, dl)), 0); 2876 case ZeroCompare::LESExt: 2877 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend, 2878 S->getI32Imm(-1, dl)), 0); 2879 } 2880 2881 // The above case covers all the enumerators so it can't have a default clause 2882 // to avoid compiler warnings. 2883 llvm_unreachable("Unknown zero-comparison type."); 2884 } 2885 2886 /// Produces a zero-extended result of comparing two 32-bit values according to 2887 /// the passed condition code. 2888 SDValue 2889 IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS, 2890 ISD::CondCode CC, 2891 int64_t RHSValue, SDLoc dl) { 2892 if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || 2893 CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext) 2894 return SDValue(); 2895 bool IsRHSZero = RHSValue == 0; 2896 bool IsRHSOne = RHSValue == 1; 2897 bool IsRHSNegOne = RHSValue == -1LL; 2898 switch (CC) { 2899 default: return SDValue(); 2900 case ISD::SETEQ: { 2901 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5) 2902 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5) 2903 SDValue Xor = IsRHSZero ? LHS : 2904 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2905 SDValue Clz = 2906 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 2907 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), 2908 S->getI32Imm(31, dl) }; 2909 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 2910 ShiftOps), 0); 2911 } 2912 case ISD::SETNE: { 2913 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) 2914 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) 2915 SDValue Xor = IsRHSZero ? LHS : 2916 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2917 SDValue Clz = 2918 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 2919 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), 2920 S->getI32Imm(31, dl) }; 2921 SDValue Shift = 2922 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); 2923 return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, 2924 S->getI32Imm(1, dl)), 0); 2925 } 2926 case ISD::SETGE: { 2927 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1) 2928 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31) 2929 if(IsRHSZero) 2930 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 2931 2932 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) 2933 // by swapping inputs and falling through. 2934 std::swap(LHS, RHS); 2935 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 2936 IsRHSZero = RHSConst && RHSConst->isNullValue(); 2937 LLVM_FALLTHROUGH; 2938 } 2939 case ISD::SETLE: { 2940 if (CmpInGPR == ICGPR_NonExtIn) 2941 return SDValue(); 2942 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1) 2943 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1) 2944 if(IsRHSZero) { 2945 if (CmpInGPR == ICGPR_NonExtIn) 2946 return SDValue(); 2947 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 2948 } 2949 2950 // The upper 32-bits of the register can't be undefined for this sequence. 2951 LHS = signExtendInputIfNeeded(LHS); 2952 RHS = signExtendInputIfNeeded(RHS); 2953 SDValue Sub = 2954 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); 2955 SDValue Shift = 2956 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub, 2957 S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 2958 0); 2959 return 2960 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, 2961 MVT::i64, Shift, S->getI32Imm(1, dl)), 0); 2962 } 2963 case ISD::SETGT: { 2964 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63) 2965 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31) 2966 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63) 2967 // Handle SETLT -1 (which is equivalent to SETGE 0). 2968 if (IsRHSNegOne) 2969 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 2970 2971 if (IsRHSZero) { 2972 if (CmpInGPR == ICGPR_NonExtIn) 2973 return SDValue(); 2974 // The upper 32-bits of the register can't be undefined for this sequence. 2975 LHS = signExtendInputIfNeeded(LHS); 2976 RHS = signExtendInputIfNeeded(RHS); 2977 SDValue Neg = 2978 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); 2979 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 2980 Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0); 2981 } 2982 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as 2983 // (%b < %a) by swapping inputs and falling through. 2984 std::swap(LHS, RHS); 2985 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 2986 IsRHSZero = RHSConst && RHSConst->isNullValue(); 2987 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 2988 LLVM_FALLTHROUGH; 2989 } 2990 case ISD::SETLT: { 2991 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63) 2992 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1) 2993 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31) 2994 // Handle SETLT 1 (which is equivalent to SETLE 0). 2995 if (IsRHSOne) { 2996 if (CmpInGPR == ICGPR_NonExtIn) 2997 return SDValue(); 2998 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 2999 } 3000 3001 if (IsRHSZero) { 3002 SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl), 3003 S->getI32Imm(31, dl) }; 3004 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 3005 ShiftOps), 0); 3006 } 3007 3008 if (CmpInGPR == ICGPR_NonExtIn) 3009 return SDValue(); 3010 // The upper 32-bits of the register can't be undefined for this sequence. 3011 LHS = signExtendInputIfNeeded(LHS); 3012 RHS = signExtendInputIfNeeded(RHS); 3013 SDValue SUBFNode = 3014 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 3015 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3016 SUBFNode, S->getI64Imm(1, dl), 3017 S->getI64Imm(63, dl)), 0); 3018 } 3019 case ISD::SETUGE: 3020 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1) 3021 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1) 3022 std::swap(LHS, RHS); 3023 LLVM_FALLTHROUGH; 3024 case ISD::SETULE: { 3025 if (CmpInGPR == ICGPR_NonExtIn) 3026 return SDValue(); 3027 // The upper 32-bits of the register can't be undefined for this sequence. 3028 LHS = zeroExtendInputIfNeeded(LHS); 3029 RHS = zeroExtendInputIfNeeded(RHS); 3030 SDValue Subtract = 3031 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); 3032 SDValue SrdiNode = 3033 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3034 Subtract, S->getI64Imm(1, dl), 3035 S->getI64Imm(63, dl)), 0); 3036 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode, 3037 S->getI32Imm(1, dl)), 0); 3038 } 3039 case ISD::SETUGT: 3040 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63) 3041 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63) 3042 std::swap(LHS, RHS); 3043 LLVM_FALLTHROUGH; 3044 case ISD::SETULT: { 3045 if (CmpInGPR == ICGPR_NonExtIn) 3046 return SDValue(); 3047 // The upper 32-bits of the register can't be undefined for this sequence. 3048 LHS = zeroExtendInputIfNeeded(LHS); 3049 RHS = zeroExtendInputIfNeeded(RHS); 3050 SDValue Subtract = 3051 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 3052 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3053 Subtract, S->getI64Imm(1, dl), 3054 S->getI64Imm(63, dl)), 0); 3055 } 3056 } 3057 } 3058 3059 /// Produces a sign-extended result of comparing two 32-bit values according to 3060 /// the passed condition code. 3061 SDValue 3062 IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS, 3063 ISD::CondCode CC, 3064 int64_t RHSValue, SDLoc dl) { 3065 if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || 3066 CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext) 3067 return SDValue(); 3068 bool IsRHSZero = RHSValue == 0; 3069 bool IsRHSOne = RHSValue == 1; 3070 bool IsRHSNegOne = RHSValue == -1LL; 3071 3072 switch (CC) { 3073 default: return SDValue(); 3074 case ISD::SETEQ: { 3075 // (sext (setcc %a, %b, seteq)) -> 3076 // (ashr (shl (ctlz (xor %a, %b)), 58), 63) 3077 // (sext (setcc %a, 0, seteq)) -> 3078 // (ashr (shl (ctlz %a), 58), 63) 3079 SDValue CountInput = IsRHSZero ? LHS : 3080 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 3081 SDValue Cntlzw = 3082 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0); 3083 SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl), 3084 S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; 3085 SDValue Slwi = 3086 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0); 3087 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0); 3088 } 3089 case ISD::SETNE: { 3090 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and 3091 // flip the bit, finally take 2's complement. 3092 // (sext (setcc %a, %b, setne)) -> 3093 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) 3094 // Same as above, but the first xor is not needed. 3095 // (sext (setcc %a, 0, setne)) -> 3096 // (neg (xor (lshr (ctlz %a), 5), 1)) 3097 SDValue Xor = IsRHSZero ? LHS : 3098 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 3099 SDValue Clz = 3100 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 3101 SDValue ShiftOps[] = 3102 { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; 3103 SDValue Shift = 3104 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); 3105 SDValue Xori = 3106 SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, 3107 S->getI32Imm(1, dl)), 0); 3108 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); 3109 } 3110 case ISD::SETGE: { 3111 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1) 3112 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31) 3113 if (IsRHSZero) 3114 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3115 3116 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) 3117 // by swapping inputs and falling through. 3118 std::swap(LHS, RHS); 3119 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3120 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3121 LLVM_FALLTHROUGH; 3122 } 3123 case ISD::SETLE: { 3124 if (CmpInGPR == ICGPR_NonExtIn) 3125 return SDValue(); 3126 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1) 3127 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1) 3128 if (IsRHSZero) 3129 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3130 3131 // The upper 32-bits of the register can't be undefined for this sequence. 3132 LHS = signExtendInputIfNeeded(LHS); 3133 RHS = signExtendInputIfNeeded(RHS); 3134 SDValue SUBFNode = 3135 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue, 3136 LHS, RHS), 0); 3137 SDValue Srdi = 3138 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3139 SUBFNode, S->getI64Imm(1, dl), 3140 S->getI64Imm(63, dl)), 0); 3141 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi, 3142 S->getI32Imm(-1, dl)), 0); 3143 } 3144 case ISD::SETGT: { 3145 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63) 3146 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31) 3147 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63) 3148 if (IsRHSNegOne) 3149 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3150 if (IsRHSZero) { 3151 if (CmpInGPR == ICGPR_NonExtIn) 3152 return SDValue(); 3153 // The upper 32-bits of the register can't be undefined for this sequence. 3154 LHS = signExtendInputIfNeeded(LHS); 3155 RHS = signExtendInputIfNeeded(RHS); 3156 SDValue Neg = 3157 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); 3158 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg, 3159 S->getI64Imm(63, dl)), 0); 3160 } 3161 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as 3162 // (%b < %a) by swapping inputs and falling through. 3163 std::swap(LHS, RHS); 3164 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3165 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3166 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 3167 LLVM_FALLTHROUGH; 3168 } 3169 case ISD::SETLT: { 3170 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63) 3171 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1) 3172 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31) 3173 if (IsRHSOne) { 3174 if (CmpInGPR == ICGPR_NonExtIn) 3175 return SDValue(); 3176 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3177 } 3178 if (IsRHSZero) 3179 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS, 3180 S->getI32Imm(31, dl)), 0); 3181 3182 if (CmpInGPR == ICGPR_NonExtIn) 3183 return SDValue(); 3184 // The upper 32-bits of the register can't be undefined for this sequence. 3185 LHS = signExtendInputIfNeeded(LHS); 3186 RHS = signExtendInputIfNeeded(RHS); 3187 SDValue SUBFNode = 3188 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 3189 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3190 SUBFNode, S->getI64Imm(63, dl)), 0); 3191 } 3192 case ISD::SETUGE: 3193 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1) 3194 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1) 3195 std::swap(LHS, RHS); 3196 LLVM_FALLTHROUGH; 3197 case ISD::SETULE: { 3198 if (CmpInGPR == ICGPR_NonExtIn) 3199 return SDValue(); 3200 // The upper 32-bits of the register can't be undefined for this sequence. 3201 LHS = zeroExtendInputIfNeeded(LHS); 3202 RHS = zeroExtendInputIfNeeded(RHS); 3203 SDValue Subtract = 3204 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); 3205 SDValue Shift = 3206 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract, 3207 S->getI32Imm(1, dl), S->getI32Imm(63,dl)), 3208 0); 3209 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift, 3210 S->getI32Imm(-1, dl)), 0); 3211 } 3212 case ISD::SETUGT: 3213 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63) 3214 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63) 3215 std::swap(LHS, RHS); 3216 LLVM_FALLTHROUGH; 3217 case ISD::SETULT: { 3218 if (CmpInGPR == ICGPR_NonExtIn) 3219 return SDValue(); 3220 // The upper 32-bits of the register can't be undefined for this sequence. 3221 LHS = zeroExtendInputIfNeeded(LHS); 3222 RHS = zeroExtendInputIfNeeded(RHS); 3223 SDValue Subtract = 3224 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 3225 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3226 Subtract, S->getI64Imm(63, dl)), 0); 3227 } 3228 } 3229 } 3230 3231 /// Produces a zero-extended result of comparing two 64-bit values according to 3232 /// the passed condition code. 3233 SDValue 3234 IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS, 3235 ISD::CondCode CC, 3236 int64_t RHSValue, SDLoc dl) { 3237 if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || 3238 CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext) 3239 return SDValue(); 3240 bool IsRHSZero = RHSValue == 0; 3241 bool IsRHSOne = RHSValue == 1; 3242 bool IsRHSNegOne = RHSValue == -1LL; 3243 switch (CC) { 3244 default: return SDValue(); 3245 case ISD::SETEQ: { 3246 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) 3247 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) 3248 SDValue Xor = IsRHSZero ? LHS : 3249 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3250 SDValue Clz = 3251 SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); 3252 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, 3253 S->getI64Imm(58, dl), 3254 S->getI64Imm(63, dl)), 0); 3255 } 3256 case ISD::SETNE: { 3257 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) 3258 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA) 3259 // {addcz.reg, addcz.CA} = (addcarry %a, -1) 3260 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA) 3261 SDValue Xor = IsRHSZero ? LHS : 3262 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3263 SDValue AC = 3264 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, 3265 Xor, S->getI32Imm(~0U, dl)), 0); 3266 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC, 3267 Xor, AC.getValue(1)), 0); 3268 } 3269 case ISD::SETGE: { 3270 // {subc.reg, subc.CA} = (subcarry %a, %b) 3271 // (zext (setcc %a, %b, setge)) -> 3272 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA) 3273 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63) 3274 if (IsRHSZero) 3275 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 3276 std::swap(LHS, RHS); 3277 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3278 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3279 LLVM_FALLTHROUGH; 3280 } 3281 case ISD::SETLE: { 3282 // {subc.reg, subc.CA} = (subcarry %b, %a) 3283 // (zext (setcc %a, %b, setge)) -> 3284 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA) 3285 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63) 3286 if (IsRHSZero) 3287 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 3288 SDValue ShiftL = 3289 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, 3290 S->getI64Imm(1, dl), 3291 S->getI64Imm(63, dl)), 0); 3292 SDValue ShiftR = 3293 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, 3294 S->getI64Imm(63, dl)), 0); 3295 SDValue SubtractCarry = 3296 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3297 LHS, RHS), 1); 3298 return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, 3299 ShiftR, ShiftL, SubtractCarry), 0); 3300 } 3301 case ISD::SETGT: { 3302 // {subc.reg, subc.CA} = (subcarry %b, %a) 3303 // (zext (setcc %a, %b, setgt)) -> 3304 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) 3305 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63) 3306 if (IsRHSNegOne) 3307 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 3308 if (IsRHSZero) { 3309 SDValue Addi = 3310 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, 3311 S->getI64Imm(~0ULL, dl)), 0); 3312 SDValue Nor = 3313 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0); 3314 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor, 3315 S->getI64Imm(1, dl), 3316 S->getI64Imm(63, dl)), 0); 3317 } 3318 std::swap(LHS, RHS); 3319 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3320 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3321 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 3322 LLVM_FALLTHROUGH; 3323 } 3324 case ISD::SETLT: { 3325 // {subc.reg, subc.CA} = (subcarry %a, %b) 3326 // (zext (setcc %a, %b, setlt)) -> 3327 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) 3328 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63) 3329 if (IsRHSOne) 3330 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 3331 if (IsRHSZero) 3332 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, 3333 S->getI64Imm(1, dl), 3334 S->getI64Imm(63, dl)), 0); 3335 SDValue SRADINode = 3336 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3337 LHS, S->getI64Imm(63, dl)), 0); 3338 SDValue SRDINode = 3339 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3340 RHS, S->getI64Imm(1, dl), 3341 S->getI64Imm(63, dl)), 0); 3342 SDValue SUBFC8Carry = 3343 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3344 RHS, LHS), 1); 3345 SDValue ADDE8Node = 3346 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, 3347 SRDINode, SRADINode, SUBFC8Carry), 0); 3348 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, 3349 ADDE8Node, S->getI64Imm(1, dl)), 0); 3350 } 3351 case ISD::SETUGE: 3352 // {subc.reg, subc.CA} = (subcarry %a, %b) 3353 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1) 3354 std::swap(LHS, RHS); 3355 LLVM_FALLTHROUGH; 3356 case ISD::SETULE: { 3357 // {subc.reg, subc.CA} = (subcarry %b, %a) 3358 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1) 3359 SDValue SUBFC8Carry = 3360 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3361 LHS, RHS), 1); 3362 SDValue SUBFE8Node = 3363 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, 3364 LHS, LHS, SUBFC8Carry), 0); 3365 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, 3366 SUBFE8Node, S->getI64Imm(1, dl)), 0); 3367 } 3368 case ISD::SETUGT: 3369 // {subc.reg, subc.CA} = (subcarry %b, %a) 3370 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA) 3371 std::swap(LHS, RHS); 3372 LLVM_FALLTHROUGH; 3373 case ISD::SETULT: { 3374 // {subc.reg, subc.CA} = (subcarry %a, %b) 3375 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA) 3376 SDValue SubtractCarry = 3377 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3378 RHS, LHS), 1); 3379 SDValue ExtSub = 3380 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, 3381 LHS, LHS, SubtractCarry), 0); 3382 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, 3383 ExtSub), 0); 3384 } 3385 } 3386 } 3387 3388 /// Produces a sign-extended result of comparing two 64-bit values according to 3389 /// the passed condition code. 3390 SDValue 3391 IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS, 3392 ISD::CondCode CC, 3393 int64_t RHSValue, SDLoc dl) { 3394 if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || 3395 CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext) 3396 return SDValue(); 3397 bool IsRHSZero = RHSValue == 0; 3398 bool IsRHSOne = RHSValue == 1; 3399 bool IsRHSNegOne = RHSValue == -1LL; 3400 switch (CC) { 3401 default: return SDValue(); 3402 case ISD::SETEQ: { 3403 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) 3404 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) 3405 // {addcz.reg, addcz.CA} = (addcarry %a, -1) 3406 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) 3407 SDValue AddInput = IsRHSZero ? LHS : 3408 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3409 SDValue Addic = 3410 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, 3411 AddInput, S->getI32Imm(~0U, dl)), 0); 3412 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, 3413 Addic, Addic.getValue(1)), 0); 3414 } 3415 case ISD::SETNE: { 3416 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b)) 3417 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA) 3418 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a) 3419 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA) 3420 SDValue Xor = IsRHSZero ? LHS : 3421 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3422 SDValue SC = 3423 SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue, 3424 Xor, S->getI32Imm(0, dl)), 0); 3425 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC, 3426 SC, SC.getValue(1)), 0); 3427 } 3428 case ISD::SETGE: { 3429 // {subc.reg, subc.CA} = (subcarry %a, %b) 3430 // (zext (setcc %a, %b, setge)) -> 3431 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA)) 3432 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63)) 3433 if (IsRHSZero) 3434 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3435 std::swap(LHS, RHS); 3436 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3437 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3438 LLVM_FALLTHROUGH; 3439 } 3440 case ISD::SETLE: { 3441 // {subc.reg, subc.CA} = (subcarry %b, %a) 3442 // (zext (setcc %a, %b, setge)) -> 3443 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA)) 3444 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63) 3445 if (IsRHSZero) 3446 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3447 SDValue ShiftR = 3448 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, 3449 S->getI64Imm(63, dl)), 0); 3450 SDValue ShiftL = 3451 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, 3452 S->getI64Imm(1, dl), 3453 S->getI64Imm(63, dl)), 0); 3454 SDValue SubtractCarry = 3455 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3456 LHS, RHS), 1); 3457 SDValue Adde = 3458 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, 3459 ShiftR, ShiftL, SubtractCarry), 0); 3460 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0); 3461 } 3462 case ISD::SETGT: { 3463 // {subc.reg, subc.CA} = (subcarry %b, %a) 3464 // (zext (setcc %a, %b, setgt)) -> 3465 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) 3466 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63) 3467 if (IsRHSNegOne) 3468 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3469 if (IsRHSZero) { 3470 SDValue Add = 3471 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, 3472 S->getI64Imm(-1, dl)), 0); 3473 SDValue Nor = 3474 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0); 3475 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor, 3476 S->getI64Imm(63, dl)), 0); 3477 } 3478 std::swap(LHS, RHS); 3479 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3480 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3481 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 3482 LLVM_FALLTHROUGH; 3483 } 3484 case ISD::SETLT: { 3485 // {subc.reg, subc.CA} = (subcarry %a, %b) 3486 // (zext (setcc %a, %b, setlt)) -> 3487 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) 3488 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63) 3489 if (IsRHSOne) 3490 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3491 if (IsRHSZero) { 3492 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS, 3493 S->getI64Imm(63, dl)), 0); 3494 } 3495 SDValue SRADINode = 3496 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3497 LHS, S->getI64Imm(63, dl)), 0); 3498 SDValue SRDINode = 3499 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3500 RHS, S->getI64Imm(1, dl), 3501 S->getI64Imm(63, dl)), 0); 3502 SDValue SUBFC8Carry = 3503 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3504 RHS, LHS), 1); 3505 SDValue ADDE8Node = 3506 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, 3507 SRDINode, SRADINode, SUBFC8Carry), 0); 3508 SDValue XORI8Node = 3509 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, 3510 ADDE8Node, S->getI64Imm(1, dl)), 0); 3511 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, 3512 XORI8Node), 0); 3513 } 3514 case ISD::SETUGE: 3515 // {subc.reg, subc.CA} = (subcarry %a, %b) 3516 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA) 3517 std::swap(LHS, RHS); 3518 LLVM_FALLTHROUGH; 3519 case ISD::SETULE: { 3520 // {subc.reg, subc.CA} = (subcarry %b, %a) 3521 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA) 3522 SDValue SubtractCarry = 3523 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3524 LHS, RHS), 1); 3525 SDValue ExtSub = 3526 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS, 3527 LHS, SubtractCarry), 0); 3528 return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, 3529 ExtSub, ExtSub), 0); 3530 } 3531 case ISD::SETUGT: 3532 // {subc.reg, subc.CA} = (subcarry %b, %a) 3533 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA) 3534 std::swap(LHS, RHS); 3535 LLVM_FALLTHROUGH; 3536 case ISD::SETULT: { 3537 // {subc.reg, subc.CA} = (subcarry %a, %b) 3538 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA) 3539 SDValue SubCarry = 3540 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3541 RHS, LHS), 1); 3542 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, 3543 LHS, LHS, SubCarry), 0); 3544 } 3545 } 3546 } 3547 3548 /// Do all uses of this SDValue need the result in a GPR? 3549 /// This is meant to be used on values that have type i1 since 3550 /// it is somewhat meaningless to ask if values of other types 3551 /// should be kept in GPR's. 3552 static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { 3553 assert(Compare.getOpcode() == ISD::SETCC && 3554 "An ISD::SETCC node required here."); 3555 3556 // For values that have a single use, the caller should obviously already have 3557 // checked if that use is an extending use. We check the other uses here. 3558 if (Compare.hasOneUse()) 3559 return true; 3560 // We want the value in a GPR if it is being extended, used for a select, or 3561 // used in logical operations. 3562 for (auto CompareUse : Compare.getNode()->uses()) 3563 if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && 3564 CompareUse->getOpcode() != ISD::ZERO_EXTEND && 3565 CompareUse->getOpcode() != ISD::SELECT && 3566 !isLogicOp(CompareUse->getOpcode())) { 3567 OmittedForNonExtendUses++; 3568 return false; 3569 } 3570 return true; 3571 } 3572 3573 /// Returns an equivalent of a SETCC node but with the result the same width as 3574 /// the inputs. This can also be used for SELECT_CC if either the true or false 3575 /// values is a power of two while the other is zero. 3576 SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare, 3577 SetccInGPROpts ConvOpts) { 3578 assert((Compare.getOpcode() == ISD::SETCC || 3579 Compare.getOpcode() == ISD::SELECT_CC) && 3580 "An ISD::SETCC node required here."); 3581 3582 // Don't convert this comparison to a GPR sequence because there are uses 3583 // of the i1 result (i.e. uses that require the result in the CR). 3584 if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) 3585 return SDValue(); 3586 3587 SDValue LHS = Compare.getOperand(0); 3588 SDValue RHS = Compare.getOperand(1); 3589 3590 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC. 3591 int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2; 3592 ISD::CondCode CC = 3593 cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get(); 3594 EVT InputVT = LHS.getValueType(); 3595 if (InputVT != MVT::i32 && InputVT != MVT::i64) 3596 return SDValue(); 3597 3598 if (ConvOpts == SetccInGPROpts::ZExtInvert || 3599 ConvOpts == SetccInGPROpts::SExtInvert) 3600 CC = ISD::getSetCCInverse(CC, true); 3601 3602 bool Inputs32Bit = InputVT == MVT::i32; 3603 3604 SDLoc dl(Compare); 3605 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3606 int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; 3607 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || 3608 ConvOpts == SetccInGPROpts::SExtInvert; 3609 3610 if (IsSext && Inputs32Bit) 3611 return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); 3612 else if (Inputs32Bit) 3613 return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); 3614 else if (IsSext) 3615 return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); 3616 return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); 3617 } 3618 3619 } // end anonymous namespace 3620 3621 bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) { 3622 if (N->getValueType(0) != MVT::i32 && 3623 N->getValueType(0) != MVT::i64) 3624 return false; 3625 3626 // This optimization will emit code that assumes 64-bit registers 3627 // so we don't want to run it in 32-bit mode. Also don't run it 3628 // on functions that are not to be optimized. 3629 if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) 3630 return false; 3631 3632 switch (N->getOpcode()) { 3633 default: break; 3634 case ISD::ZERO_EXTEND: 3635 case ISD::SIGN_EXTEND: 3636 case ISD::AND: 3637 case ISD::OR: 3638 case ISD::XOR: { 3639 IntegerCompareEliminator ICmpElim(CurDAG, this); 3640 if (SDNode *New = ICmpElim.Select(N)) { 3641 ReplaceNode(N, New); 3642 return true; 3643 } 3644 } 3645 } 3646 return false; 3647 } 3648 3649 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { 3650 if (N->getValueType(0) != MVT::i32 && 3651 N->getValueType(0) != MVT::i64) 3652 return false; 3653 3654 if (!UseBitPermRewriter) 3655 return false; 3656 3657 switch (N->getOpcode()) { 3658 default: break; 3659 case ISD::ROTL: 3660 case ISD::SHL: 3661 case ISD::SRL: 3662 case ISD::AND: 3663 case ISD::OR: { 3664 BitPermutationSelector BPS(CurDAG); 3665 if (SDNode *New = BPS.Select(N)) { 3666 ReplaceNode(N, New); 3667 return true; 3668 } 3669 return false; 3670 } 3671 } 3672 3673 return false; 3674 } 3675 3676 /// SelectCC - Select a comparison of the specified values with the specified 3677 /// condition code, returning the CR# of the expression. 3678 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, 3679 const SDLoc &dl) { 3680 // Always select the LHS. 3681 unsigned Opc; 3682 3683 if (LHS.getValueType() == MVT::i32) { 3684 unsigned Imm; 3685 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 3686 if (isInt32Immediate(RHS, Imm)) { 3687 // SETEQ/SETNE comparison with 16-bit immediate, fold it. 3688 if (isUInt<16>(Imm)) 3689 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, 3690 getI32Imm(Imm & 0xFFFF, dl)), 3691 0); 3692 // If this is a 16-bit signed immediate, fold it. 3693 if (isInt<16>((int)Imm)) 3694 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, 3695 getI32Imm(Imm & 0xFFFF, dl)), 3696 0); 3697 3698 // For non-equality comparisons, the default code would materialize the 3699 // constant, then compare against it, like this: 3700 // lis r2, 4660 3701 // ori r2, r2, 22136 3702 // cmpw cr0, r3, r2 3703 // Since we are just comparing for equality, we can emit this instead: 3704 // xoris r0,r3,0x1234 3705 // cmplwi cr0,r0,0x5678 3706 // beq cr0,L6 3707 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS, 3708 getI32Imm(Imm >> 16, dl)), 0); 3709 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor, 3710 getI32Imm(Imm & 0xFFFF, dl)), 0); 3711 } 3712 Opc = PPC::CMPLW; 3713 } else if (ISD::isUnsignedIntSetCC(CC)) { 3714 if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm)) 3715 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, 3716 getI32Imm(Imm & 0xFFFF, dl)), 0); 3717 Opc = PPC::CMPLW; 3718 } else { 3719 int16_t SImm; 3720 if (isIntS16Immediate(RHS, SImm)) 3721 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, 3722 getI32Imm((int)SImm & 0xFFFF, 3723 dl)), 3724 0); 3725 Opc = PPC::CMPW; 3726 } 3727 } else if (LHS.getValueType() == MVT::i64) { 3728 uint64_t Imm; 3729 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 3730 if (isInt64Immediate(RHS.getNode(), Imm)) { 3731 // SETEQ/SETNE comparison with 16-bit immediate, fold it. 3732 if (isUInt<16>(Imm)) 3733 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, 3734 getI32Imm(Imm & 0xFFFF, dl)), 3735 0); 3736 // If this is a 16-bit signed immediate, fold it. 3737 if (isInt<16>(Imm)) 3738 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, 3739 getI32Imm(Imm & 0xFFFF, dl)), 3740 0); 3741 3742 // For non-equality comparisons, the default code would materialize the 3743 // constant, then compare against it, like this: 3744 // lis r2, 4660 3745 // ori r2, r2, 22136 3746 // cmpd cr0, r3, r2 3747 // Since we are just comparing for equality, we can emit this instead: 3748 // xoris r0,r3,0x1234 3749 // cmpldi cr0,r0,0x5678 3750 // beq cr0,L6 3751 if (isUInt<32>(Imm)) { 3752 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS, 3753 getI64Imm(Imm >> 16, dl)), 0); 3754 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor, 3755 getI64Imm(Imm & 0xFFFF, dl)), 3756 0); 3757 } 3758 } 3759 Opc = PPC::CMPLD; 3760 } else if (ISD::isUnsignedIntSetCC(CC)) { 3761 if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm)) 3762 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, 3763 getI64Imm(Imm & 0xFFFF, dl)), 0); 3764 Opc = PPC::CMPLD; 3765 } else { 3766 int16_t SImm; 3767 if (isIntS16Immediate(RHS, SImm)) 3768 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, 3769 getI64Imm(SImm & 0xFFFF, dl)), 3770 0); 3771 Opc = PPC::CMPD; 3772 } 3773 } else if (LHS.getValueType() == MVT::f32) { 3774 if (PPCSubTarget->hasSPE()) { 3775 switch (CC) { 3776 default: 3777 case ISD::SETEQ: 3778 case ISD::SETNE: 3779 Opc = PPC::EFSCMPEQ; 3780 break; 3781 case ISD::SETLT: 3782 case ISD::SETGE: 3783 case ISD::SETOLT: 3784 case ISD::SETOGE: 3785 case ISD::SETULT: 3786 case ISD::SETUGE: 3787 Opc = PPC::EFSCMPLT; 3788 break; 3789 case ISD::SETGT: 3790 case ISD::SETLE: 3791 case ISD::SETOGT: 3792 case ISD::SETOLE: 3793 case ISD::SETUGT: 3794 case ISD::SETULE: 3795 Opc = PPC::EFSCMPGT; 3796 break; 3797 } 3798 } else 3799 Opc = PPC::FCMPUS; 3800 } else if (LHS.getValueType() == MVT::f64) { 3801 if (PPCSubTarget->hasSPE()) { 3802 switch (CC) { 3803 default: 3804 case ISD::SETEQ: 3805 case ISD::SETNE: 3806 Opc = PPC::EFDCMPEQ; 3807 break; 3808 case ISD::SETLT: 3809 case ISD::SETGE: 3810 case ISD::SETOLT: 3811 case ISD::SETOGE: 3812 case ISD::SETULT: 3813 case ISD::SETUGE: 3814 Opc = PPC::EFDCMPLT; 3815 break; 3816 case ISD::SETGT: 3817 case ISD::SETLE: 3818 case ISD::SETOGT: 3819 case ISD::SETOLE: 3820 case ISD::SETUGT: 3821 case ISD::SETULE: 3822 Opc = PPC::EFDCMPGT; 3823 break; 3824 } 3825 } else 3826 Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; 3827 } else { 3828 assert(LHS.getValueType() == MVT::f128 && "Unknown vt!"); 3829 assert(PPCSubTarget->hasVSX() && "__float128 requires VSX"); 3830 Opc = PPC::XSCMPUQP; 3831 } 3832 return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); 3833 } 3834 3835 static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) { 3836 switch (CC) { 3837 case ISD::SETUEQ: 3838 case ISD::SETONE: 3839 case ISD::SETOLE: 3840 case ISD::SETOGE: 3841 llvm_unreachable("Should be lowered by legalize!"); 3842 default: llvm_unreachable("Unknown condition!"); 3843 case ISD::SETOEQ: 3844 case ISD::SETEQ: return PPC::PRED_EQ; 3845 case ISD::SETUNE: 3846 case ISD::SETNE: return PPC::PRED_NE; 3847 case ISD::SETOLT: 3848 case ISD::SETLT: return PPC::PRED_LT; 3849 case ISD::SETULE: 3850 case ISD::SETLE: return PPC::PRED_LE; 3851 case ISD::SETOGT: 3852 case ISD::SETGT: return PPC::PRED_GT; 3853 case ISD::SETUGE: 3854 case ISD::SETGE: return PPC::PRED_GE; 3855 case ISD::SETO: return PPC::PRED_NU; 3856 case ISD::SETUO: return PPC::PRED_UN; 3857 // These two are invalid for floating point. Assume we have int. 3858 case ISD::SETULT: return PPC::PRED_LT; 3859 case ISD::SETUGT: return PPC::PRED_GT; 3860 } 3861 } 3862 3863 /// getCRIdxForSetCC - Return the index of the condition register field 3864 /// associated with the SetCC condition, and whether or not the field is 3865 /// treated as inverted. That is, lt = 0; ge = 0 inverted. 3866 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { 3867 Invert = false; 3868 switch (CC) { 3869 default: llvm_unreachable("Unknown condition!"); 3870 case ISD::SETOLT: 3871 case ISD::SETLT: return 0; // Bit #0 = SETOLT 3872 case ISD::SETOGT: 3873 case ISD::SETGT: return 1; // Bit #1 = SETOGT 3874 case ISD::SETOEQ: 3875 case ISD::SETEQ: return 2; // Bit #2 = SETOEQ 3876 case ISD::SETUO: return 3; // Bit #3 = SETUO 3877 case ISD::SETUGE: 3878 case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE 3879 case ISD::SETULE: 3880 case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE 3881 case ISD::SETUNE: 3882 case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE 3883 case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO 3884 case ISD::SETUEQ: 3885 case ISD::SETOGE: 3886 case ISD::SETOLE: 3887 case ISD::SETONE: 3888 llvm_unreachable("Invalid branch code: should be expanded by legalize"); 3889 // These are invalid for floating point. Assume integer. 3890 case ISD::SETULT: return 0; 3891 case ISD::SETUGT: return 1; 3892 } 3893 } 3894 3895 // getVCmpInst: return the vector compare instruction for the specified 3896 // vector type and condition code. Since this is for altivec specific code, 3897 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32). 3898 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, 3899 bool HasVSX, bool &Swap, bool &Negate) { 3900 Swap = false; 3901 Negate = false; 3902 3903 if (VecVT.isFloatingPoint()) { 3904 /* Handle some cases by swapping input operands. */ 3905 switch (CC) { 3906 case ISD::SETLE: CC = ISD::SETGE; Swap = true; break; 3907 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; 3908 case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break; 3909 case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break; 3910 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; 3911 case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break; 3912 default: break; 3913 } 3914 /* Handle some cases by negating the result. */ 3915 switch (CC) { 3916 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; 3917 case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break; 3918 case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break; 3919 case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break; 3920 default: break; 3921 } 3922 /* We have instructions implementing the remaining cases. */ 3923 switch (CC) { 3924 case ISD::SETEQ: 3925 case ISD::SETOEQ: 3926 if (VecVT == MVT::v4f32) 3927 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; 3928 else if (VecVT == MVT::v2f64) 3929 return PPC::XVCMPEQDP; 3930 break; 3931 case ISD::SETGT: 3932 case ISD::SETOGT: 3933 if (VecVT == MVT::v4f32) 3934 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP; 3935 else if (VecVT == MVT::v2f64) 3936 return PPC::XVCMPGTDP; 3937 break; 3938 case ISD::SETGE: 3939 case ISD::SETOGE: 3940 if (VecVT == MVT::v4f32) 3941 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP; 3942 else if (VecVT == MVT::v2f64) 3943 return PPC::XVCMPGEDP; 3944 break; 3945 default: 3946 break; 3947 } 3948 llvm_unreachable("Invalid floating-point vector compare condition"); 3949 } else { 3950 /* Handle some cases by swapping input operands. */ 3951 switch (CC) { 3952 case ISD::SETGE: CC = ISD::SETLE; Swap = true; break; 3953 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; 3954 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; 3955 case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break; 3956 default: break; 3957 } 3958 /* Handle some cases by negating the result. */ 3959 switch (CC) { 3960 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; 3961 case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break; 3962 case ISD::SETLE: CC = ISD::SETGT; Negate = true; break; 3963 case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break; 3964 default: break; 3965 } 3966 /* We have instructions implementing the remaining cases. */ 3967 switch (CC) { 3968 case ISD::SETEQ: 3969 case ISD::SETUEQ: 3970 if (VecVT == MVT::v16i8) 3971 return PPC::VCMPEQUB; 3972 else if (VecVT == MVT::v8i16) 3973 return PPC::VCMPEQUH; 3974 else if (VecVT == MVT::v4i32) 3975 return PPC::VCMPEQUW; 3976 else if (VecVT == MVT::v2i64) 3977 return PPC::VCMPEQUD; 3978 break; 3979 case ISD::SETGT: 3980 if (VecVT == MVT::v16i8) 3981 return PPC::VCMPGTSB; 3982 else if (VecVT == MVT::v8i16) 3983 return PPC::VCMPGTSH; 3984 else if (VecVT == MVT::v4i32) 3985 return PPC::VCMPGTSW; 3986 else if (VecVT == MVT::v2i64) 3987 return PPC::VCMPGTSD; 3988 break; 3989 case ISD::SETUGT: 3990 if (VecVT == MVT::v16i8) 3991 return PPC::VCMPGTUB; 3992 else if (VecVT == MVT::v8i16) 3993 return PPC::VCMPGTUH; 3994 else if (VecVT == MVT::v4i32) 3995 return PPC::VCMPGTUW; 3996 else if (VecVT == MVT::v2i64) 3997 return PPC::VCMPGTUD; 3998 break; 3999 default: 4000 break; 4001 } 4002 llvm_unreachable("Invalid integer vector compare condition"); 4003 } 4004 } 4005 4006 bool PPCDAGToDAGISel::trySETCC(SDNode *N) { 4007 SDLoc dl(N); 4008 unsigned Imm; 4009 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 4010 EVT PtrVT = 4011 CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); 4012 bool isPPC64 = (PtrVT == MVT::i64); 4013 4014 if (!PPCSubTarget->useCRBits() && 4015 isInt32Immediate(N->getOperand(1), Imm)) { 4016 // We can codegen setcc op, imm very efficiently compared to a brcond. 4017 // Check for those cases here. 4018 // setcc op, 0 4019 if (Imm == 0) { 4020 SDValue Op = N->getOperand(0); 4021 switch (CC) { 4022 default: break; 4023 case ISD::SETEQ: { 4024 Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); 4025 SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl), 4026 getI32Imm(31, dl) }; 4027 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4028 return true; 4029 } 4030 case ISD::SETNE: { 4031 if (isPPC64) break; 4032 SDValue AD = 4033 SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 4034 Op, getI32Imm(~0U, dl)), 0); 4035 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1)); 4036 return true; 4037 } 4038 case ISD::SETLT: { 4039 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), 4040 getI32Imm(31, dl) }; 4041 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4042 return true; 4043 } 4044 case ISD::SETGT: { 4045 SDValue T = 4046 SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0); 4047 T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); 4048 SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl), 4049 getI32Imm(31, dl) }; 4050 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4051 return true; 4052 } 4053 } 4054 } else if (Imm == ~0U) { // setcc op, -1 4055 SDValue Op = N->getOperand(0); 4056 switch (CC) { 4057 default: break; 4058 case ISD::SETEQ: 4059 if (isPPC64) break; 4060 Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 4061 Op, getI32Imm(1, dl)), 0); 4062 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, 4063 SDValue(CurDAG->getMachineNode(PPC::LI, dl, 4064 MVT::i32, 4065 getI32Imm(0, dl)), 4066 0), Op.getValue(1)); 4067 return true; 4068 case ISD::SETNE: { 4069 if (isPPC64) break; 4070 Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); 4071 SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 4072 Op, getI32Imm(~0U, dl)); 4073 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op, 4074 SDValue(AD, 1)); 4075 return true; 4076 } 4077 case ISD::SETLT: { 4078 SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op, 4079 getI32Imm(1, dl)), 0); 4080 SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD, 4081 Op), 0); 4082 SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl), 4083 getI32Imm(31, dl) }; 4084 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4085 return true; 4086 } 4087 case ISD::SETGT: { 4088 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), 4089 getI32Imm(31, dl) }; 4090 Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 4091 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl)); 4092 return true; 4093 } 4094 } 4095 } 4096 } 4097 4098 SDValue LHS = N->getOperand(0); 4099 SDValue RHS = N->getOperand(1); 4100 4101 // Altivec Vector compare instructions do not set any CR register by default and 4102 // vector compare operations return the same type as the operands. 4103 if (LHS.getValueType().isVector()) { 4104 if (PPCSubTarget->hasQPX() || PPCSubTarget->hasSPE()) 4105 return false; 4106 4107 EVT VecVT = LHS.getValueType(); 4108 bool Swap, Negate; 4109 unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC, 4110 PPCSubTarget->hasVSX(), Swap, Negate); 4111 if (Swap) 4112 std::swap(LHS, RHS); 4113 4114 EVT ResVT = VecVT.changeVectorElementTypeToInteger(); 4115 if (Negate) { 4116 SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0); 4117 CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, 4118 ResVT, VCmp, VCmp); 4119 return true; 4120 } 4121 4122 CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS); 4123 return true; 4124 } 4125 4126 if (PPCSubTarget->useCRBits()) 4127 return false; 4128 4129 bool Inv; 4130 unsigned Idx = getCRIdxForSetCC(CC, Inv); 4131 SDValue CCReg = SelectCC(LHS, RHS, CC, dl); 4132 SDValue IntCR; 4133 4134 // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that 4135 // The correct compare instruction is already set by SelectCC() 4136 if (PPCSubTarget->hasSPE() && LHS.getValueType().isFloatingPoint()) { 4137 Idx = 1; 4138 } 4139 4140 // Force the ccreg into CR7. 4141 SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); 4142 4143 SDValue InFlag(nullptr, 0); // Null incoming flag value. 4144 CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, 4145 InFlag).getValue(1); 4146 4147 IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, 4148 CCReg), 0); 4149 4150 SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl), 4151 getI32Imm(31, dl), getI32Imm(31, dl) }; 4152 if (!Inv) { 4153 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4154 return true; 4155 } 4156 4157 // Get the specified bit. 4158 SDValue Tmp = 4159 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 4160 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl)); 4161 return true; 4162 } 4163 4164 /// Does this node represent a load/store node whose address can be represented 4165 /// with a register plus an immediate that's a multiple of \p Val: 4166 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { 4167 LoadSDNode *LDN = dyn_cast<LoadSDNode>(N); 4168 StoreSDNode *STN = dyn_cast<StoreSDNode>(N); 4169 SDValue AddrOp; 4170 if (LDN) 4171 AddrOp = LDN->getOperand(1); 4172 else if (STN) 4173 AddrOp = STN->getOperand(2); 4174 4175 // If the address points a frame object or a frame object with an offset, 4176 // we need to check the object alignment. 4177 short Imm = 0; 4178 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>( 4179 AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) : 4180 AddrOp)) { 4181 // If op0 is a frame index that is under aligned, we can't do it either, 4182 // because it is translated to r31 or r1 + slot + offset. We won't know the 4183 // slot number until the stack frame is finalized. 4184 const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo(); 4185 unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex()); 4186 if ((SlotAlign % Val) != 0) 4187 return false; 4188 4189 // If we have an offset, we need further check on the offset. 4190 if (AddrOp.getOpcode() != ISD::ADD) 4191 return true; 4192 } 4193 4194 if (AddrOp.getOpcode() == ISD::ADD) 4195 return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val); 4196 4197 // If the address comes from the outside, the offset will be zero. 4198 return AddrOp.getOpcode() == ISD::CopyFromReg; 4199 } 4200 4201 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 4202 // Transfer memoperands. 4203 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 4204 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 4205 } 4206 4207 static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, 4208 bool &NeedSwapOps, bool &IsUnCmp) { 4209 4210 assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here."); 4211 4212 SDValue LHS = N->getOperand(0); 4213 SDValue RHS = N->getOperand(1); 4214 SDValue TrueRes = N->getOperand(2); 4215 SDValue FalseRes = N->getOperand(3); 4216 ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes); 4217 if (!TrueConst) 4218 return false; 4219 4220 assert((N->getSimpleValueType(0) == MVT::i64 || 4221 N->getSimpleValueType(0) == MVT::i32) && 4222 "Expecting either i64 or i32 here."); 4223 4224 // We are looking for any of: 4225 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1) 4226 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1) 4227 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq) 4228 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq) 4229 int64_t TrueResVal = TrueConst->getSExtValue(); 4230 if ((TrueResVal < -1 || TrueResVal > 1) || 4231 (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) || 4232 (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) || 4233 (TrueResVal == 0 && 4234 (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ))) 4235 return false; 4236 4237 bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC; 4238 SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0); 4239 if (SetOrSelCC.getOpcode() != ISD::SETCC && 4240 SetOrSelCC.getOpcode() != ISD::SELECT_CC) 4241 return false; 4242 4243 // Without this setb optimization, the outer SELECT_CC will be manually 4244 // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass 4245 // transforms pseudo instruction to isel instruction. When there are more than 4246 // one use for result like zext/sext, with current optimization we only see 4247 // isel is replaced by setb but can't see any significant gain. Since 4248 // setb has longer latency than original isel, we should avoid this. Another 4249 // point is that setb requires comparison always kept, it can break the 4250 // opportunity to get the comparison away if we have in future. 4251 if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse())) 4252 return false; 4253 4254 SDValue InnerLHS = SetOrSelCC.getOperand(0); 4255 SDValue InnerRHS = SetOrSelCC.getOperand(1); 4256 ISD::CondCode InnerCC = 4257 cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get(); 4258 // If the inner comparison is a select_cc, make sure the true/false values are 4259 // 1/-1 and canonicalize it if needed. 4260 if (InnerIsSel) { 4261 ConstantSDNode *SelCCTrueConst = 4262 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2)); 4263 ConstantSDNode *SelCCFalseConst = 4264 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3)); 4265 if (!SelCCTrueConst || !SelCCFalseConst) 4266 return false; 4267 int64_t SelCCTVal = SelCCTrueConst->getSExtValue(); 4268 int64_t SelCCFVal = SelCCFalseConst->getSExtValue(); 4269 // The values must be -1/1 (requiring a swap) or 1/-1. 4270 if (SelCCTVal == -1 && SelCCFVal == 1) { 4271 std::swap(InnerLHS, InnerRHS); 4272 } else if (SelCCTVal != 1 || SelCCFVal != -1) 4273 return false; 4274 } 4275 4276 // Canonicalize unsigned case 4277 if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) { 4278 IsUnCmp = true; 4279 InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT; 4280 } 4281 4282 bool InnerSwapped = false; 4283 if (LHS == InnerRHS && RHS == InnerLHS) 4284 InnerSwapped = true; 4285 else if (LHS != InnerLHS || RHS != InnerRHS) 4286 return false; 4287 4288 switch (CC) { 4289 // (select_cc lhs, rhs, 0, \ 4290 // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq) 4291 case ISD::SETEQ: 4292 if (!InnerIsSel) 4293 return false; 4294 if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT) 4295 return false; 4296 NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped; 4297 break; 4298 4299 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt) 4300 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt) 4301 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt) 4302 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt) 4303 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt) 4304 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt) 4305 case ISD::SETULT: 4306 if (!IsUnCmp && InnerCC != ISD::SETNE) 4307 return false; 4308 IsUnCmp = true; 4309 LLVM_FALLTHROUGH; 4310 case ISD::SETLT: 4311 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) || 4312 (InnerCC == ISD::SETLT && InnerSwapped)) 4313 NeedSwapOps = (TrueResVal == 1); 4314 else 4315 return false; 4316 break; 4317 4318 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt) 4319 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt) 4320 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt) 4321 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt) 4322 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt) 4323 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt) 4324 case ISD::SETUGT: 4325 if (!IsUnCmp && InnerCC != ISD::SETNE) 4326 return false; 4327 IsUnCmp = true; 4328 LLVM_FALLTHROUGH; 4329 case ISD::SETGT: 4330 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) || 4331 (InnerCC == ISD::SETGT && InnerSwapped)) 4332 NeedSwapOps = (TrueResVal == -1); 4333 else 4334 return false; 4335 break; 4336 4337 default: 4338 return false; 4339 } 4340 4341 LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: "); 4342 LLVM_DEBUG(N->dump()); 4343 4344 return true; 4345 } 4346 4347 // Select - Convert the specified operand from a target-independent to a 4348 // target-specific node if it hasn't already been changed. 4349 void PPCDAGToDAGISel::Select(SDNode *N) { 4350 SDLoc dl(N); 4351 if (N->isMachineOpcode()) { 4352 N->setNodeId(-1); 4353 return; // Already selected. 4354 } 4355 4356 // In case any misguided DAG-level optimizations form an ADD with a 4357 // TargetConstant operand, crash here instead of miscompiling (by selecting 4358 // an r+r add instead of some kind of r+i add). 4359 if (N->getOpcode() == ISD::ADD && 4360 N->getOperand(1).getOpcode() == ISD::TargetConstant) 4361 llvm_unreachable("Invalid ADD with TargetConstant operand"); 4362 4363 // Try matching complex bit permutations before doing anything else. 4364 if (tryBitPermutation(N)) 4365 return; 4366 4367 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR). 4368 if (tryIntCompareInGPR(N)) 4369 return; 4370 4371 switch (N->getOpcode()) { 4372 default: break; 4373 4374 case ISD::Constant: 4375 if (N->getValueType(0) == MVT::i64) { 4376 ReplaceNode(N, selectI64Imm(CurDAG, N)); 4377 return; 4378 } 4379 break; 4380 4381 case ISD::SETCC: 4382 if (trySETCC(N)) 4383 return; 4384 break; 4385 // These nodes will be transformed into GETtlsADDR32 node, which 4386 // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT 4387 case PPCISD::ADDI_TLSLD_L_ADDR: 4388 case PPCISD::ADDI_TLSGD_L_ADDR: { 4389 const Module *Mod = MF->getFunction().getParent(); 4390 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || 4391 !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() || 4392 Mod->getPICLevel() == PICLevel::SmallPIC) 4393 break; 4394 // Attach global base pointer on GETtlsADDR32 node in order to 4395 // generate secure plt code for TLS symbols. 4396 getGlobalBaseReg(); 4397 } break; 4398 case PPCISD::CALL: { 4399 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || 4400 !TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt() || 4401 !PPCSubTarget->isTargetELF()) 4402 break; 4403 4404 SDValue Op = N->getOperand(1); 4405 4406 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) { 4407 if (GA->getTargetFlags() == PPCII::MO_PLT) 4408 getGlobalBaseReg(); 4409 } 4410 else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { 4411 if (ES->getTargetFlags() == PPCII::MO_PLT) 4412 getGlobalBaseReg(); 4413 } 4414 } 4415 break; 4416 4417 case PPCISD::GlobalBaseReg: 4418 ReplaceNode(N, getGlobalBaseReg()); 4419 return; 4420 4421 case ISD::FrameIndex: 4422 selectFrameIndex(N, N); 4423 return; 4424 4425 case PPCISD::MFOCRF: { 4426 SDValue InFlag = N->getOperand(1); 4427 ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, 4428 N->getOperand(0), InFlag)); 4429 return; 4430 } 4431 4432 case PPCISD::READ_TIME_BASE: 4433 ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32, 4434 MVT::Other, N->getOperand(0))); 4435 return; 4436 4437 case PPCISD::SRA_ADDZE: { 4438 SDValue N0 = N->getOperand(0); 4439 SDValue ShiftAmt = 4440 CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))-> 4441 getConstantIntValue(), dl, 4442 N->getValueType(0)); 4443 if (N->getValueType(0) == MVT::i64) { 4444 SDNode *Op = 4445 CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue, 4446 N0, ShiftAmt); 4447 CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0), 4448 SDValue(Op, 1)); 4449 return; 4450 } else { 4451 assert(N->getValueType(0) == MVT::i32 && 4452 "Expecting i64 or i32 in PPCISD::SRA_ADDZE"); 4453 SDNode *Op = 4454 CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue, 4455 N0, ShiftAmt); 4456 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0), 4457 SDValue(Op, 1)); 4458 return; 4459 } 4460 } 4461 4462 case ISD::STORE: { 4463 // Change TLS initial-exec D-form stores to X-form stores. 4464 StoreSDNode *ST = cast<StoreSDNode>(N); 4465 if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() && 4466 ST->getAddressingMode() != ISD::PRE_INC) 4467 if (tryTLSXFormStore(ST)) 4468 return; 4469 break; 4470 } 4471 case ISD::LOAD: { 4472 // Handle preincrement loads. 4473 LoadSDNode *LD = cast<LoadSDNode>(N); 4474 EVT LoadedVT = LD->getMemoryVT(); 4475 4476 // Normal loads are handled by code generated from the .td file. 4477 if (LD->getAddressingMode() != ISD::PRE_INC) { 4478 // Change TLS initial-exec D-form loads to X-form loads. 4479 if (EnableTLSOpt && PPCSubTarget->isELFv2ABI()) 4480 if (tryTLSXFormLoad(LD)) 4481 return; 4482 break; 4483 } 4484 4485 SDValue Offset = LD->getOffset(); 4486 if (Offset.getOpcode() == ISD::TargetConstant || 4487 Offset.getOpcode() == ISD::TargetGlobalAddress) { 4488 4489 unsigned Opcode; 4490 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; 4491 if (LD->getValueType(0) != MVT::i64) { 4492 // Handle PPC32 integer and normal FP loads. 4493 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 4494 switch (LoadedVT.getSimpleVT().SimpleTy) { 4495 default: llvm_unreachable("Invalid PPC load type!"); 4496 case MVT::f64: Opcode = PPC::LFDU; break; 4497 case MVT::f32: Opcode = PPC::LFSU; break; 4498 case MVT::i32: Opcode = PPC::LWZU; break; 4499 case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break; 4500 case MVT::i1: 4501 case MVT::i8: Opcode = PPC::LBZU; break; 4502 } 4503 } else { 4504 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); 4505 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 4506 switch (LoadedVT.getSimpleVT().SimpleTy) { 4507 default: llvm_unreachable("Invalid PPC load type!"); 4508 case MVT::i64: Opcode = PPC::LDU; break; 4509 case MVT::i32: Opcode = PPC::LWZU8; break; 4510 case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break; 4511 case MVT::i1: 4512 case MVT::i8: Opcode = PPC::LBZU8; break; 4513 } 4514 } 4515 4516 SDValue Chain = LD->getChain(); 4517 SDValue Base = LD->getBasePtr(); 4518 SDValue Ops[] = { Offset, Base, Chain }; 4519 SDNode *MN = CurDAG->getMachineNode( 4520 Opcode, dl, LD->getValueType(0), 4521 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); 4522 transferMemOperands(N, MN); 4523 ReplaceNode(N, MN); 4524 return; 4525 } else { 4526 unsigned Opcode; 4527 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; 4528 if (LD->getValueType(0) != MVT::i64) { 4529 // Handle PPC32 integer and normal FP loads. 4530 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 4531 switch (LoadedVT.getSimpleVT().SimpleTy) { 4532 default: llvm_unreachable("Invalid PPC load type!"); 4533 case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX 4534 case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX 4535 case MVT::f64: Opcode = PPC::LFDUX; break; 4536 case MVT::f32: Opcode = PPC::LFSUX; break; 4537 case MVT::i32: Opcode = PPC::LWZUX; break; 4538 case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break; 4539 case MVT::i1: 4540 case MVT::i8: Opcode = PPC::LBZUX; break; 4541 } 4542 } else { 4543 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); 4544 assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && 4545 "Invalid sext update load"); 4546 switch (LoadedVT.getSimpleVT().SimpleTy) { 4547 default: llvm_unreachable("Invalid PPC load type!"); 4548 case MVT::i64: Opcode = PPC::LDUX; break; 4549 case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break; 4550 case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break; 4551 case MVT::i1: 4552 case MVT::i8: Opcode = PPC::LBZUX8; break; 4553 } 4554 } 4555 4556 SDValue Chain = LD->getChain(); 4557 SDValue Base = LD->getBasePtr(); 4558 SDValue Ops[] = { Base, Offset, Chain }; 4559 SDNode *MN = CurDAG->getMachineNode( 4560 Opcode, dl, LD->getValueType(0), 4561 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); 4562 transferMemOperands(N, MN); 4563 ReplaceNode(N, MN); 4564 return; 4565 } 4566 } 4567 4568 case ISD::AND: { 4569 unsigned Imm, Imm2, SH, MB, ME; 4570 uint64_t Imm64; 4571 4572 // If this is an and of a value rotated between 0 and 31 bits and then and'd 4573 // with a mask, emit rlwinm 4574 if (isInt32Immediate(N->getOperand(1), Imm) && 4575 isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) { 4576 SDValue Val = N->getOperand(0).getOperand(0); 4577 SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl), 4578 getI32Imm(ME, dl) }; 4579 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4580 return; 4581 } 4582 // If this is just a masked value where the input is not handled above, and 4583 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm 4584 if (isInt32Immediate(N->getOperand(1), Imm) && 4585 isRunOfOnes(Imm, MB, ME) && 4586 N->getOperand(0).getOpcode() != ISD::ROTL) { 4587 SDValue Val = N->getOperand(0); 4588 SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl), 4589 getI32Imm(ME, dl) }; 4590 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4591 return; 4592 } 4593 // If this is a 64-bit zero-extension mask, emit rldicl. 4594 if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && 4595 isMask_64(Imm64)) { 4596 SDValue Val = N->getOperand(0); 4597 MB = 64 - countTrailingOnes(Imm64); 4598 SH = 0; 4599 4600 if (Val.getOpcode() == ISD::ANY_EXTEND) { 4601 auto Op0 = Val.getOperand(0); 4602 if ( Op0.getOpcode() == ISD::SRL && 4603 isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { 4604 4605 auto ResultType = Val.getNode()->getValueType(0); 4606 auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, 4607 ResultType); 4608 SDValue IDVal (ImDef, 0); 4609 4610 Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, 4611 ResultType, IDVal, Op0.getOperand(0), 4612 getI32Imm(1, dl)), 0); 4613 SH = 64 - Imm; 4614 } 4615 } 4616 4617 // If the operand is a logical right shift, we can fold it into this 4618 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) 4619 // for n <= mb. The right shift is really a left rotate followed by a 4620 // mask, and this mask is a more-restrictive sub-mask of the mask implied 4621 // by the shift. 4622 if (Val.getOpcode() == ISD::SRL && 4623 isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { 4624 assert(Imm < 64 && "Illegal shift amount"); 4625 Val = Val.getOperand(0); 4626 SH = 64 - Imm; 4627 } 4628 4629 SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; 4630 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); 4631 return; 4632 } 4633 // If this is a negated 64-bit zero-extension mask, 4634 // i.e. the immediate is a sequence of ones from most significant side 4635 // and all zero for reminder, we should use rldicr. 4636 if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && 4637 isMask_64(~Imm64)) { 4638 SDValue Val = N->getOperand(0); 4639 MB = 63 - countTrailingOnes(~Imm64); 4640 SH = 0; 4641 SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; 4642 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); 4643 return; 4644 } 4645 4646 // AND X, 0 -> 0, not "rlwinm 32". 4647 if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { 4648 ReplaceUses(SDValue(N, 0), N->getOperand(1)); 4649 return; 4650 } 4651 // ISD::OR doesn't get all the bitfield insertion fun. 4652 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a 4653 // bitfield insert. 4654 if (isInt32Immediate(N->getOperand(1), Imm) && 4655 N->getOperand(0).getOpcode() == ISD::OR && 4656 isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) { 4657 // The idea here is to check whether this is equivalent to: 4658 // (c1 & m) | (x & ~m) 4659 // where m is a run-of-ones mask. The logic here is that, for each bit in 4660 // c1 and c2: 4661 // - if both are 1, then the output will be 1. 4662 // - if both are 0, then the output will be 0. 4663 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will 4664 // come from x. 4665 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will 4666 // be 0. 4667 // If that last condition is never the case, then we can form m from the 4668 // bits that are the same between c1 and c2. 4669 unsigned MB, ME; 4670 if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) { 4671 SDValue Ops[] = { N->getOperand(0).getOperand(0), 4672 N->getOperand(0).getOperand(1), 4673 getI32Imm(0, dl), getI32Imm(MB, dl), 4674 getI32Imm(ME, dl) }; 4675 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); 4676 return; 4677 } 4678 } 4679 4680 // Other cases are autogenerated. 4681 break; 4682 } 4683 case ISD::OR: { 4684 if (N->getValueType(0) == MVT::i32) 4685 if (tryBitfieldInsert(N)) 4686 return; 4687 4688 int16_t Imm; 4689 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && 4690 isIntS16Immediate(N->getOperand(1), Imm)) { 4691 KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0)); 4692 4693 // If this is equivalent to an add, then we can fold it with the 4694 // FrameIndex calculation. 4695 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { 4696 selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); 4697 return; 4698 } 4699 } 4700 4701 // OR with a 32-bit immediate can be handled by ori + oris 4702 // without creating an immediate in a GPR. 4703 uint64_t Imm64 = 0; 4704 bool IsPPC64 = PPCSubTarget->isPPC64(); 4705 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && 4706 (Imm64 & ~0xFFFFFFFFuLL) == 0) { 4707 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later. 4708 uint64_t ImmHi = Imm64 >> 16; 4709 uint64_t ImmLo = Imm64 & 0xFFFF; 4710 if (ImmHi != 0 && ImmLo != 0) { 4711 SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, 4712 N->getOperand(0), 4713 getI16Imm(ImmLo, dl)); 4714 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; 4715 CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1); 4716 return; 4717 } 4718 } 4719 4720 // Other cases are autogenerated. 4721 break; 4722 } 4723 case ISD::XOR: { 4724 // XOR with a 32-bit immediate can be handled by xori + xoris 4725 // without creating an immediate in a GPR. 4726 uint64_t Imm64 = 0; 4727 bool IsPPC64 = PPCSubTarget->isPPC64(); 4728 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && 4729 (Imm64 & ~0xFFFFFFFFuLL) == 0) { 4730 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later. 4731 uint64_t ImmHi = Imm64 >> 16; 4732 uint64_t ImmLo = Imm64 & 0xFFFF; 4733 if (ImmHi != 0 && ImmLo != 0) { 4734 SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, 4735 N->getOperand(0), 4736 getI16Imm(ImmLo, dl)); 4737 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; 4738 CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1); 4739 return; 4740 } 4741 } 4742 4743 break; 4744 } 4745 case ISD::ADD: { 4746 int16_t Imm; 4747 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && 4748 isIntS16Immediate(N->getOperand(1), Imm)) { 4749 selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); 4750 return; 4751 } 4752 4753 break; 4754 } 4755 case ISD::SHL: { 4756 unsigned Imm, SH, MB, ME; 4757 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && 4758 isRotateAndMask(N, Imm, true, SH, MB, ME)) { 4759 SDValue Ops[] = { N->getOperand(0).getOperand(0), 4760 getI32Imm(SH, dl), getI32Imm(MB, dl), 4761 getI32Imm(ME, dl) }; 4762 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4763 return; 4764 } 4765 4766 // Other cases are autogenerated. 4767 break; 4768 } 4769 case ISD::SRL: { 4770 unsigned Imm, SH, MB, ME; 4771 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && 4772 isRotateAndMask(N, Imm, true, SH, MB, ME)) { 4773 SDValue Ops[] = { N->getOperand(0).getOperand(0), 4774 getI32Imm(SH, dl), getI32Imm(MB, dl), 4775 getI32Imm(ME, dl) }; 4776 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4777 return; 4778 } 4779 4780 // Other cases are autogenerated. 4781 break; 4782 } 4783 // FIXME: Remove this once the ANDI glue bug is fixed: 4784 case PPCISD::ANDIo_1_EQ_BIT: 4785 case PPCISD::ANDIo_1_GT_BIT: { 4786 if (!ANDIGlueBug) 4787 break; 4788 4789 EVT InVT = N->getOperand(0).getValueType(); 4790 assert((InVT == MVT::i64 || InVT == MVT::i32) && 4791 "Invalid input type for ANDIo_1_EQ_BIT"); 4792 4793 unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo; 4794 SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue, 4795 N->getOperand(0), 4796 CurDAG->getTargetConstant(1, dl, InVT)), 4797 0); 4798 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); 4799 SDValue SRIdxVal = 4800 CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ? 4801 PPC::sub_eq : PPC::sub_gt, dl, MVT::i32); 4802 4803 CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg, 4804 SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */); 4805 return; 4806 } 4807 case ISD::SELECT_CC: { 4808 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 4809 EVT PtrVT = 4810 CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); 4811 bool isPPC64 = (PtrVT == MVT::i64); 4812 4813 // If this is a select of i1 operands, we'll pattern match it. 4814 if (PPCSubTarget->useCRBits() && 4815 N->getOperand(0).getValueType() == MVT::i1) 4816 break; 4817 4818 if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) { 4819 bool NeedSwapOps = false; 4820 bool IsUnCmp = false; 4821 if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) { 4822 SDValue LHS = N->getOperand(0); 4823 SDValue RHS = N->getOperand(1); 4824 if (NeedSwapOps) 4825 std::swap(LHS, RHS); 4826 4827 // Make use of SelectCC to generate the comparison to set CR bits, for 4828 // equality comparisons having one literal operand, SelectCC probably 4829 // doesn't need to materialize the whole literal and just use xoris to 4830 // check it first, it leads the following comparison result can't 4831 // exactly represent GT/LT relationship. So to avoid this we specify 4832 // SETGT/SETUGT here instead of SETEQ. 4833 SDValue GenCC = 4834 SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl); 4835 CurDAG->SelectNodeTo( 4836 N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB, 4837 N->getValueType(0), GenCC); 4838 NumP9Setb++; 4839 return; 4840 } 4841 } 4842 4843 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc 4844 if (!isPPC64) 4845 if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 4846 if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2))) 4847 if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3))) 4848 if (N1C->isNullValue() && N3C->isNullValue() && 4849 N2C->getZExtValue() == 1ULL && CC == ISD::SETNE && 4850 // FIXME: Implement this optzn for PPC64. 4851 N->getValueType(0) == MVT::i32) { 4852 SDNode *Tmp = 4853 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 4854 N->getOperand(0), getI32Imm(~0U, dl)); 4855 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0), 4856 N->getOperand(0), SDValue(Tmp, 1)); 4857 return; 4858 } 4859 4860 SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); 4861 4862 if (N->getValueType(0) == MVT::i1) { 4863 // An i1 select is: (c & t) | (!c & f). 4864 bool Inv; 4865 unsigned Idx = getCRIdxForSetCC(CC, Inv); 4866 4867 unsigned SRI; 4868 switch (Idx) { 4869 default: llvm_unreachable("Invalid CC index"); 4870 case 0: SRI = PPC::sub_lt; break; 4871 case 1: SRI = PPC::sub_gt; break; 4872 case 2: SRI = PPC::sub_eq; break; 4873 case 3: SRI = PPC::sub_un; break; 4874 } 4875 4876 SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg); 4877 4878 SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1, 4879 CCBit, CCBit), 0); 4880 SDValue C = Inv ? NotCCBit : CCBit, 4881 NotC = Inv ? CCBit : NotCCBit; 4882 4883 SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, 4884 C, N->getOperand(2)), 0); 4885 SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, 4886 NotC, N->getOperand(3)), 0); 4887 4888 CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); 4889 return; 4890 } 4891 4892 unsigned BROpc = getPredicateForSetCC(CC); 4893 4894 unsigned SelectCCOp; 4895 if (N->getValueType(0) == MVT::i32) 4896 SelectCCOp = PPC::SELECT_CC_I4; 4897 else if (N->getValueType(0) == MVT::i64) 4898 SelectCCOp = PPC::SELECT_CC_I8; 4899 else if (N->getValueType(0) == MVT::f32) { 4900 if (PPCSubTarget->hasP8Vector()) 4901 SelectCCOp = PPC::SELECT_CC_VSSRC; 4902 else if (PPCSubTarget->hasSPE()) 4903 SelectCCOp = PPC::SELECT_CC_SPE4; 4904 else 4905 SelectCCOp = PPC::SELECT_CC_F4; 4906 } else if (N->getValueType(0) == MVT::f64) { 4907 if (PPCSubTarget->hasVSX()) 4908 SelectCCOp = PPC::SELECT_CC_VSFRC; 4909 else if (PPCSubTarget->hasSPE()) 4910 SelectCCOp = PPC::SELECT_CC_SPE; 4911 else 4912 SelectCCOp = PPC::SELECT_CC_F8; 4913 } else if (N->getValueType(0) == MVT::f128) 4914 SelectCCOp = PPC::SELECT_CC_F16; 4915 else if (PPCSubTarget->hasSPE()) 4916 SelectCCOp = PPC::SELECT_CC_SPE; 4917 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64) 4918 SelectCCOp = PPC::SELECT_CC_QFRC; 4919 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32) 4920 SelectCCOp = PPC::SELECT_CC_QSRC; 4921 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1) 4922 SelectCCOp = PPC::SELECT_CC_QBRC; 4923 else if (N->getValueType(0) == MVT::v2f64 || 4924 N->getValueType(0) == MVT::v2i64) 4925 SelectCCOp = PPC::SELECT_CC_VSRC; 4926 else 4927 SelectCCOp = PPC::SELECT_CC_VRRC; 4928 4929 SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3), 4930 getI32Imm(BROpc, dl) }; 4931 CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); 4932 return; 4933 } 4934 case ISD::VECTOR_SHUFFLE: 4935 if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || 4936 N->getValueType(0) == MVT::v2i64)) { 4937 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 4938 4939 SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1), 4940 Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1); 4941 unsigned DM[2]; 4942 4943 for (int i = 0; i < 2; ++i) 4944 if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2) 4945 DM[i] = 0; 4946 else 4947 DM[i] = 1; 4948 4949 if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 && 4950 Op1.getOpcode() == ISD::SCALAR_TO_VECTOR && 4951 isa<LoadSDNode>(Op1.getOperand(0))) { 4952 LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0)); 4953 SDValue Base, Offset; 4954 4955 if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() && 4956 (LD->getMemoryVT() == MVT::f64 || 4957 LD->getMemoryVT() == MVT::i64) && 4958 SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { 4959 SDValue Chain = LD->getChain(); 4960 SDValue Ops[] = { Base, Offset, Chain }; 4961 MachineMemOperand *MemOp = LD->getMemOperand(); 4962 SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, 4963 N->getValueType(0), Ops); 4964 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp}); 4965 return; 4966 } 4967 } 4968 4969 // For little endian, we must swap the input operands and adjust 4970 // the mask elements (reverse and invert them). 4971 if (PPCSubTarget->isLittleEndian()) { 4972 std::swap(Op1, Op2); 4973 unsigned tmp = DM[0]; 4974 DM[0] = 1 - DM[1]; 4975 DM[1] = 1 - tmp; 4976 } 4977 4978 SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl, 4979 MVT::i32); 4980 SDValue Ops[] = { Op1, Op2, DMV }; 4981 CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); 4982 return; 4983 } 4984 4985 break; 4986 case PPCISD::BDNZ: 4987 case PPCISD::BDZ: { 4988 bool IsPPC64 = PPCSubTarget->isPPC64(); 4989 SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; 4990 CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ 4991 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) 4992 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), 4993 MVT::Other, Ops); 4994 return; 4995 } 4996 case PPCISD::COND_BRANCH: { 4997 // Op #0 is the Chain. 4998 // Op #1 is the PPC::PRED_* number. 4999 // Op #2 is the CR# 5000 // Op #3 is the Dest MBB 5001 // Op #4 is the Flag. 5002 // Prevent PPC::PRED_* from being selected into LI. 5003 unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 5004 if (EnableBranchHint) 5005 PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3)); 5006 5007 SDValue Pred = getI32Imm(PCC, dl); 5008 SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), 5009 N->getOperand(0), N->getOperand(4) }; 5010 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); 5011 return; 5012 } 5013 case ISD::BR_CC: { 5014 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 5015 unsigned PCC = getPredicateForSetCC(CC); 5016 5017 if (N->getOperand(2).getValueType() == MVT::i1) { 5018 unsigned Opc; 5019 bool Swap; 5020 switch (PCC) { 5021 default: llvm_unreachable("Unexpected Boolean-operand predicate"); 5022 case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break; 5023 case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break; 5024 case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break; 5025 case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break; 5026 case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break; 5027 case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break; 5028 } 5029 5030 // A signed comparison of i1 values produces the opposite result to an 5031 // unsigned one if the condition code includes less-than or greater-than. 5032 // This is because 1 is the most negative signed i1 number and the most 5033 // positive unsigned i1 number. The CR-logical operations used for such 5034 // comparisons are non-commutative so for signed comparisons vs. unsigned 5035 // ones, the input operands just need to be swapped. 5036 if (ISD::isSignedIntSetCC(CC)) 5037 Swap = !Swap; 5038 5039 SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1, 5040 N->getOperand(Swap ? 3 : 2), 5041 N->getOperand(Swap ? 2 : 3)), 0); 5042 CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4), 5043 N->getOperand(0)); 5044 return; 5045 } 5046 5047 if (EnableBranchHint) 5048 PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4)); 5049 5050 SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); 5051 SDValue Ops[] = { getI32Imm(PCC, dl), CondCode, 5052 N->getOperand(4), N->getOperand(0) }; 5053 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); 5054 return; 5055 } 5056 case ISD::BRIND: { 5057 // FIXME: Should custom lower this. 5058 SDValue Chain = N->getOperand(0); 5059 SDValue Target = N->getOperand(1); 5060 unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8; 5061 unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8; 5062 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target, 5063 Chain), 0); 5064 CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); 5065 return; 5066 } 5067 case PPCISD::TOC_ENTRY: { 5068 assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) && 5069 "Only supported for 64-bit ABI and 32-bit SVR4"); 5070 if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) { 5071 SDValue GA = N->getOperand(0); 5072 SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, 5073 N->getOperand(1)); 5074 transferMemOperands(N, MN); 5075 ReplaceNode(N, MN); 5076 return; 5077 } 5078 5079 // For medium and large code model, we generate two instructions as 5080 // described below. Otherwise we allow SelectCodeCommon to handle this, 5081 // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. 5082 CodeModel::Model CModel = TM.getCodeModel(); 5083 if (CModel != CodeModel::Medium && CModel != CodeModel::Large) 5084 break; 5085 5086 // The first source operand is a TargetGlobalAddress or a TargetJumpTable. 5087 // If it must be toc-referenced according to PPCSubTarget, we generate: 5088 // LDtocL(@sym, ADDIStocHA(%x2, @sym)) 5089 // Otherwise we generate: 5090 // ADDItocL(ADDIStocHA(%x2, @sym), @sym) 5091 SDValue GA = N->getOperand(0); 5092 SDValue TOCbase = N->getOperand(1); 5093 SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, 5094 TOCbase, GA); 5095 if (PPCLowering->isAccessedAsGotIndirect(GA)) { 5096 // If it is access as got-indirect, we need an extra LD to load 5097 // the address. 5098 SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, 5099 SDValue(Tmp, 0)); 5100 transferMemOperands(N, MN); 5101 ReplaceNode(N, MN); 5102 return; 5103 } 5104 5105 // Build the address relative to the TOC-pointer.. 5106 ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, 5107 SDValue(Tmp, 0), GA)); 5108 return; 5109 } 5110 case PPCISD::PPC32_PICGOT: 5111 // Generate a PIC-safe GOT reference. 5112 assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() && 5113 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); 5114 CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, 5115 PPCLowering->getPointerTy(CurDAG->getDataLayout()), 5116 MVT::i32); 5117 return; 5118 5119 case PPCISD::VADD_SPLAT: { 5120 // This expands into one of three sequences, depending on whether 5121 // the first operand is odd or even, positive or negative. 5122 assert(isa<ConstantSDNode>(N->getOperand(0)) && 5123 isa<ConstantSDNode>(N->getOperand(1)) && 5124 "Invalid operand on VADD_SPLAT!"); 5125 5126 int Elt = N->getConstantOperandVal(0); 5127 int EltSize = N->getConstantOperandVal(1); 5128 unsigned Opc1, Opc2, Opc3; 5129 EVT VT; 5130 5131 if (EltSize == 1) { 5132 Opc1 = PPC::VSPLTISB; 5133 Opc2 = PPC::VADDUBM; 5134 Opc3 = PPC::VSUBUBM; 5135 VT = MVT::v16i8; 5136 } else if (EltSize == 2) { 5137 Opc1 = PPC::VSPLTISH; 5138 Opc2 = PPC::VADDUHM; 5139 Opc3 = PPC::VSUBUHM; 5140 VT = MVT::v8i16; 5141 } else { 5142 assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!"); 5143 Opc1 = PPC::VSPLTISW; 5144 Opc2 = PPC::VADDUWM; 5145 Opc3 = PPC::VSUBUWM; 5146 VT = MVT::v4i32; 5147 } 5148 5149 if ((Elt & 1) == 0) { 5150 // Elt is even, in the range [-32,-18] + [16,30]. 5151 // 5152 // Convert: VADD_SPLAT elt, size 5153 // Into: tmp = VSPLTIS[BHW] elt 5154 // VADDU[BHW]M tmp, tmp 5155 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 5156 SDValue EltVal = getI32Imm(Elt >> 1, dl); 5157 SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5158 SDValue TmpVal = SDValue(Tmp, 0); 5159 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal)); 5160 return; 5161 } else if (Elt > 0) { 5162 // Elt is odd and positive, in the range [17,31]. 5163 // 5164 // Convert: VADD_SPLAT elt, size 5165 // Into: tmp1 = VSPLTIS[BHW] elt-16 5166 // tmp2 = VSPLTIS[BHW] -16 5167 // VSUBU[BHW]M tmp1, tmp2 5168 SDValue EltVal = getI32Imm(Elt - 16, dl); 5169 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5170 EltVal = getI32Imm(-16, dl); 5171 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5172 ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), 5173 SDValue(Tmp2, 0))); 5174 return; 5175 } else { 5176 // Elt is odd and negative, in the range [-31,-17]. 5177 // 5178 // Convert: VADD_SPLAT elt, size 5179 // Into: tmp1 = VSPLTIS[BHW] elt+16 5180 // tmp2 = VSPLTIS[BHW] -16 5181 // VADDU[BHW]M tmp1, tmp2 5182 SDValue EltVal = getI32Imm(Elt + 16, dl); 5183 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5184 EltVal = getI32Imm(-16, dl); 5185 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5186 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), 5187 SDValue(Tmp2, 0))); 5188 return; 5189 } 5190 } 5191 } 5192 5193 SelectCode(N); 5194 } 5195 5196 // If the target supports the cmpb instruction, do the idiom recognition here. 5197 // We don't do this as a DAG combine because we don't want to do it as nodes 5198 // are being combined (because we might miss part of the eventual idiom). We 5199 // don't want to do it during instruction selection because we want to reuse 5200 // the logic for lowering the masking operations already part of the 5201 // instruction selector. 5202 SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { 5203 SDLoc dl(N); 5204 5205 assert(N->getOpcode() == ISD::OR && 5206 "Only OR nodes are supported for CMPB"); 5207 5208 SDValue Res; 5209 if (!PPCSubTarget->hasCMPB()) 5210 return Res; 5211 5212 if (N->getValueType(0) != MVT::i32 && 5213 N->getValueType(0) != MVT::i64) 5214 return Res; 5215 5216 EVT VT = N->getValueType(0); 5217 5218 SDValue RHS, LHS; 5219 bool BytesFound[8] = {false, false, false, false, false, false, false, false}; 5220 uint64_t Mask = 0, Alt = 0; 5221 5222 auto IsByteSelectCC = [this](SDValue O, unsigned &b, 5223 uint64_t &Mask, uint64_t &Alt, 5224 SDValue &LHS, SDValue &RHS) { 5225 if (O.getOpcode() != ISD::SELECT_CC) 5226 return false; 5227 ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get(); 5228 5229 if (!isa<ConstantSDNode>(O.getOperand(2)) || 5230 !isa<ConstantSDNode>(O.getOperand(3))) 5231 return false; 5232 5233 uint64_t PM = O.getConstantOperandVal(2); 5234 uint64_t PAlt = O.getConstantOperandVal(3); 5235 for (b = 0; b < 8; ++b) { 5236 uint64_t Mask = UINT64_C(0xFF) << (8*b); 5237 if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt) 5238 break; 5239 } 5240 5241 if (b == 8) 5242 return false; 5243 Mask |= PM; 5244 Alt |= PAlt; 5245 5246 if (!isa<ConstantSDNode>(O.getOperand(1)) || 5247 O.getConstantOperandVal(1) != 0) { 5248 SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1); 5249 if (Op0.getOpcode() == ISD::TRUNCATE) 5250 Op0 = Op0.getOperand(0); 5251 if (Op1.getOpcode() == ISD::TRUNCATE) 5252 Op1 = Op1.getOperand(0); 5253 5254 if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL && 5255 Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ && 5256 isa<ConstantSDNode>(Op0.getOperand(1))) { 5257 5258 unsigned Bits = Op0.getValueSizeInBits(); 5259 if (b != Bits/8-1) 5260 return false; 5261 if (Op0.getConstantOperandVal(1) != Bits-8) 5262 return false; 5263 5264 LHS = Op0.getOperand(0); 5265 RHS = Op1.getOperand(0); 5266 return true; 5267 } 5268 5269 // When we have small integers (i16 to be specific), the form present 5270 // post-legalization uses SETULT in the SELECT_CC for the 5271 // higher-order byte, depending on the fact that the 5272 // even-higher-order bytes are known to all be zero, for example: 5273 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult 5274 // (so when the second byte is the same, because all higher-order 5275 // bits from bytes 3 and 4 are known to be zero, the result of the 5276 // xor can be at most 255) 5277 if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT && 5278 isa<ConstantSDNode>(O.getOperand(1))) { 5279 5280 uint64_t ULim = O.getConstantOperandVal(1); 5281 if (ULim != (UINT64_C(1) << b*8)) 5282 return false; 5283 5284 // Now we need to make sure that the upper bytes are known to be 5285 // zero. 5286 unsigned Bits = Op0.getValueSizeInBits(); 5287 if (!CurDAG->MaskedValueIsZero( 5288 Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8))) 5289 return false; 5290 5291 LHS = Op0.getOperand(0); 5292 RHS = Op0.getOperand(1); 5293 return true; 5294 } 5295 5296 return false; 5297 } 5298 5299 if (CC != ISD::SETEQ) 5300 return false; 5301 5302 SDValue Op = O.getOperand(0); 5303 if (Op.getOpcode() == ISD::AND) { 5304 if (!isa<ConstantSDNode>(Op.getOperand(1))) 5305 return false; 5306 if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b))) 5307 return false; 5308 5309 SDValue XOR = Op.getOperand(0); 5310 if (XOR.getOpcode() == ISD::TRUNCATE) 5311 XOR = XOR.getOperand(0); 5312 if (XOR.getOpcode() != ISD::XOR) 5313 return false; 5314 5315 LHS = XOR.getOperand(0); 5316 RHS = XOR.getOperand(1); 5317 return true; 5318 } else if (Op.getOpcode() == ISD::SRL) { 5319 if (!isa<ConstantSDNode>(Op.getOperand(1))) 5320 return false; 5321 unsigned Bits = Op.getValueSizeInBits(); 5322 if (b != Bits/8-1) 5323 return false; 5324 if (Op.getConstantOperandVal(1) != Bits-8) 5325 return false; 5326 5327 SDValue XOR = Op.getOperand(0); 5328 if (XOR.getOpcode() == ISD::TRUNCATE) 5329 XOR = XOR.getOperand(0); 5330 if (XOR.getOpcode() != ISD::XOR) 5331 return false; 5332 5333 LHS = XOR.getOperand(0); 5334 RHS = XOR.getOperand(1); 5335 return true; 5336 } 5337 5338 return false; 5339 }; 5340 5341 SmallVector<SDValue, 8> Queue(1, SDValue(N, 0)); 5342 while (!Queue.empty()) { 5343 SDValue V = Queue.pop_back_val(); 5344 5345 for (const SDValue &O : V.getNode()->ops()) { 5346 unsigned b = 0; 5347 uint64_t M = 0, A = 0; 5348 SDValue OLHS, ORHS; 5349 if (O.getOpcode() == ISD::OR) { 5350 Queue.push_back(O); 5351 } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) { 5352 if (!LHS) { 5353 LHS = OLHS; 5354 RHS = ORHS; 5355 BytesFound[b] = true; 5356 Mask |= M; 5357 Alt |= A; 5358 } else if ((LHS == ORHS && RHS == OLHS) || 5359 (RHS == ORHS && LHS == OLHS)) { 5360 BytesFound[b] = true; 5361 Mask |= M; 5362 Alt |= A; 5363 } else { 5364 return Res; 5365 } 5366 } else { 5367 return Res; 5368 } 5369 } 5370 } 5371 5372 unsigned LastB = 0, BCnt = 0; 5373 for (unsigned i = 0; i < 8; ++i) 5374 if (BytesFound[LastB]) { 5375 ++BCnt; 5376 LastB = i; 5377 } 5378 5379 if (!LastB || BCnt < 2) 5380 return Res; 5381 5382 // Because we'll be zero-extending the output anyway if don't have a specific 5383 // value for each input byte (via the Mask), we can 'anyext' the inputs. 5384 if (LHS.getValueType() != VT) { 5385 LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT); 5386 RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT); 5387 } 5388 5389 Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS); 5390 5391 bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1); 5392 if (NonTrivialMask && !Alt) { 5393 // Res = Mask & CMPB 5394 Res = CurDAG->getNode(ISD::AND, dl, VT, Res, 5395 CurDAG->getConstant(Mask, dl, VT)); 5396 } else if (Alt) { 5397 // Res = (CMPB & Mask) | (~CMPB & Alt) 5398 // Which, as suggested here: 5399 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge 5400 // can be written as: 5401 // Res = Alt ^ ((Alt ^ Mask) & CMPB) 5402 // useful because the (Alt ^ Mask) can be pre-computed. 5403 Res = CurDAG->getNode(ISD::AND, dl, VT, Res, 5404 CurDAG->getConstant(Mask ^ Alt, dl, VT)); 5405 Res = CurDAG->getNode(ISD::XOR, dl, VT, Res, 5406 CurDAG->getConstant(Alt, dl, VT)); 5407 } 5408 5409 return Res; 5410 } 5411 5412 // When CR bit registers are enabled, an extension of an i1 variable to a i32 5413 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus 5414 // involves constant materialization of a 0 or a 1 or both. If the result of 5415 // the extension is then operated upon by some operator that can be constant 5416 // folded with a constant 0 or 1, and that constant can be materialized using 5417 // only one instruction (like a zero or one), then we should fold in those 5418 // operations with the select. 5419 void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { 5420 if (!PPCSubTarget->useCRBits()) 5421 return; 5422 5423 if (N->getOpcode() != ISD::ZERO_EXTEND && 5424 N->getOpcode() != ISD::SIGN_EXTEND && 5425 N->getOpcode() != ISD::ANY_EXTEND) 5426 return; 5427 5428 if (N->getOperand(0).getValueType() != MVT::i1) 5429 return; 5430 5431 if (!N->hasOneUse()) 5432 return; 5433 5434 SDLoc dl(N); 5435 EVT VT = N->getValueType(0); 5436 SDValue Cond = N->getOperand(0); 5437 SDValue ConstTrue = 5438 CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT); 5439 SDValue ConstFalse = CurDAG->getConstant(0, dl, VT); 5440 5441 do { 5442 SDNode *User = *N->use_begin(); 5443 if (User->getNumOperands() != 2) 5444 break; 5445 5446 auto TryFold = [this, N, User, dl](SDValue Val) { 5447 SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1); 5448 SDValue O0 = UserO0.getNode() == N ? Val : UserO0; 5449 SDValue O1 = UserO1.getNode() == N ? Val : UserO1; 5450 5451 return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl, 5452 User->getValueType(0), 5453 O0.getNode(), O1.getNode()); 5454 }; 5455 5456 // FIXME: When the semantics of the interaction between select and undef 5457 // are clearly defined, it may turn out to be unnecessary to break here. 5458 SDValue TrueRes = TryFold(ConstTrue); 5459 if (!TrueRes || TrueRes.isUndef()) 5460 break; 5461 SDValue FalseRes = TryFold(ConstFalse); 5462 if (!FalseRes || FalseRes.isUndef()) 5463 break; 5464 5465 // For us to materialize these using one instruction, we must be able to 5466 // represent them as signed 16-bit integers. 5467 uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(), 5468 False = cast<ConstantSDNode>(FalseRes)->getZExtValue(); 5469 if (!isInt<16>(True) || !isInt<16>(False)) 5470 break; 5471 5472 // We can replace User with a new SELECT node, and try again to see if we 5473 // can fold the select with its user. 5474 Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes); 5475 N = User; 5476 ConstTrue = TrueRes; 5477 ConstFalse = FalseRes; 5478 } while (N->hasOneUse()); 5479 } 5480 5481 void PPCDAGToDAGISel::PreprocessISelDAG() { 5482 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 5483 5484 bool MadeChange = false; 5485 while (Position != CurDAG->allnodes_begin()) { 5486 SDNode *N = &*--Position; 5487 if (N->use_empty()) 5488 continue; 5489 5490 SDValue Res; 5491 switch (N->getOpcode()) { 5492 default: break; 5493 case ISD::OR: 5494 Res = combineToCMPB(N); 5495 break; 5496 } 5497 5498 if (!Res) 5499 foldBoolExts(Res, N); 5500 5501 if (Res) { 5502 LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: "); 5503 LLVM_DEBUG(N->dump(CurDAG)); 5504 LLVM_DEBUG(dbgs() << "\nNew: "); 5505 LLVM_DEBUG(Res.getNode()->dump(CurDAG)); 5506 LLVM_DEBUG(dbgs() << "\n"); 5507 5508 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); 5509 MadeChange = true; 5510 } 5511 } 5512 5513 if (MadeChange) 5514 CurDAG->RemoveDeadNodes(); 5515 } 5516 5517 /// PostprocessISelDAG - Perform some late peephole optimizations 5518 /// on the DAG representation. 5519 void PPCDAGToDAGISel::PostprocessISelDAG() { 5520 // Skip peepholes at -O0. 5521 if (TM.getOptLevel() == CodeGenOpt::None) 5522 return; 5523 5524 PeepholePPC64(); 5525 PeepholeCROps(); 5526 PeepholePPC64ZExt(); 5527 } 5528 5529 // Check if all users of this node will become isel where the second operand 5530 // is the constant zero. If this is so, and if we can negate the condition, 5531 // then we can flip the true and false operands. This will allow the zero to 5532 // be folded with the isel so that we don't need to materialize a register 5533 // containing zero. 5534 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { 5535 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 5536 UI != UE; ++UI) { 5537 SDNode *User = *UI; 5538 if (!User->isMachineOpcode()) 5539 return false; 5540 if (User->getMachineOpcode() != PPC::SELECT_I4 && 5541 User->getMachineOpcode() != PPC::SELECT_I8) 5542 return false; 5543 5544 SDNode *Op2 = User->getOperand(2).getNode(); 5545 if (!Op2->isMachineOpcode()) 5546 return false; 5547 5548 if (Op2->getMachineOpcode() != PPC::LI && 5549 Op2->getMachineOpcode() != PPC::LI8) 5550 return false; 5551 5552 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0)); 5553 if (!C) 5554 return false; 5555 5556 if (!C->isNullValue()) 5557 return false; 5558 } 5559 5560 return true; 5561 } 5562 5563 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) { 5564 SmallVector<SDNode *, 4> ToReplace; 5565 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 5566 UI != UE; ++UI) { 5567 SDNode *User = *UI; 5568 assert((User->getMachineOpcode() == PPC::SELECT_I4 || 5569 User->getMachineOpcode() == PPC::SELECT_I8) && 5570 "Must have all select users"); 5571 ToReplace.push_back(User); 5572 } 5573 5574 for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(), 5575 UE = ToReplace.end(); UI != UE; ++UI) { 5576 SDNode *User = *UI; 5577 SDNode *ResNode = 5578 CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User), 5579 User->getValueType(0), User->getOperand(0), 5580 User->getOperand(2), 5581 User->getOperand(1)); 5582 5583 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); 5584 LLVM_DEBUG(User->dump(CurDAG)); 5585 LLVM_DEBUG(dbgs() << "\nNew: "); 5586 LLVM_DEBUG(ResNode->dump(CurDAG)); 5587 LLVM_DEBUG(dbgs() << "\n"); 5588 5589 ReplaceUses(User, ResNode); 5590 } 5591 } 5592 5593 void PPCDAGToDAGISel::PeepholeCROps() { 5594 bool IsModified; 5595 do { 5596 IsModified = false; 5597 for (SDNode &Node : CurDAG->allnodes()) { 5598 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 5599 if (!MachineNode || MachineNode->use_empty()) 5600 continue; 5601 SDNode *ResNode = MachineNode; 5602 5603 bool Op1Set = false, Op1Unset = false, 5604 Op1Not = false, 5605 Op2Set = false, Op2Unset = false, 5606 Op2Not = false; 5607 5608 unsigned Opcode = MachineNode->getMachineOpcode(); 5609 switch (Opcode) { 5610 default: break; 5611 case PPC::CRAND: 5612 case PPC::CRNAND: 5613 case PPC::CROR: 5614 case PPC::CRXOR: 5615 case PPC::CRNOR: 5616 case PPC::CREQV: 5617 case PPC::CRANDC: 5618 case PPC::CRORC: { 5619 SDValue Op = MachineNode->getOperand(1); 5620 if (Op.isMachineOpcode()) { 5621 if (Op.getMachineOpcode() == PPC::CRSET) 5622 Op2Set = true; 5623 else if (Op.getMachineOpcode() == PPC::CRUNSET) 5624 Op2Unset = true; 5625 else if (Op.getMachineOpcode() == PPC::CRNOR && 5626 Op.getOperand(0) == Op.getOperand(1)) 5627 Op2Not = true; 5628 } 5629 LLVM_FALLTHROUGH; 5630 } 5631 case PPC::BC: 5632 case PPC::BCn: 5633 case PPC::SELECT_I4: 5634 case PPC::SELECT_I8: 5635 case PPC::SELECT_F4: 5636 case PPC::SELECT_F8: 5637 case PPC::SELECT_QFRC: 5638 case PPC::SELECT_QSRC: 5639 case PPC::SELECT_QBRC: 5640 case PPC::SELECT_SPE: 5641 case PPC::SELECT_SPE4: 5642 case PPC::SELECT_VRRC: 5643 case PPC::SELECT_VSFRC: 5644 case PPC::SELECT_VSSRC: 5645 case PPC::SELECT_VSRC: { 5646 SDValue Op = MachineNode->getOperand(0); 5647 if (Op.isMachineOpcode()) { 5648 if (Op.getMachineOpcode() == PPC::CRSET) 5649 Op1Set = true; 5650 else if (Op.getMachineOpcode() == PPC::CRUNSET) 5651 Op1Unset = true; 5652 else if (Op.getMachineOpcode() == PPC::CRNOR && 5653 Op.getOperand(0) == Op.getOperand(1)) 5654 Op1Not = true; 5655 } 5656 } 5657 break; 5658 } 5659 5660 bool SelectSwap = false; 5661 switch (Opcode) { 5662 default: break; 5663 case PPC::CRAND: 5664 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5665 // x & x = x 5666 ResNode = MachineNode->getOperand(0).getNode(); 5667 else if (Op1Set) 5668 // 1 & y = y 5669 ResNode = MachineNode->getOperand(1).getNode(); 5670 else if (Op2Set) 5671 // x & 1 = x 5672 ResNode = MachineNode->getOperand(0).getNode(); 5673 else if (Op1Unset || Op2Unset) 5674 // x & 0 = 0 & y = 0 5675 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 5676 MVT::i1); 5677 else if (Op1Not) 5678 // ~x & y = andc(y, x) 5679 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 5680 MVT::i1, MachineNode->getOperand(1), 5681 MachineNode->getOperand(0). 5682 getOperand(0)); 5683 else if (Op2Not) 5684 // x & ~y = andc(x, y) 5685 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 5686 MVT::i1, MachineNode->getOperand(0), 5687 MachineNode->getOperand(1). 5688 getOperand(0)); 5689 else if (AllUsersSelectZero(MachineNode)) { 5690 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), 5691 MVT::i1, MachineNode->getOperand(0), 5692 MachineNode->getOperand(1)); 5693 SelectSwap = true; 5694 } 5695 break; 5696 case PPC::CRNAND: 5697 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5698 // nand(x, x) -> nor(x, x) 5699 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5700 MVT::i1, MachineNode->getOperand(0), 5701 MachineNode->getOperand(0)); 5702 else if (Op1Set) 5703 // nand(1, y) -> nor(y, y) 5704 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5705 MVT::i1, MachineNode->getOperand(1), 5706 MachineNode->getOperand(1)); 5707 else if (Op2Set) 5708 // nand(x, 1) -> nor(x, x) 5709 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5710 MVT::i1, MachineNode->getOperand(0), 5711 MachineNode->getOperand(0)); 5712 else if (Op1Unset || Op2Unset) 5713 // nand(x, 0) = nand(0, y) = 1 5714 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 5715 MVT::i1); 5716 else if (Op1Not) 5717 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y) 5718 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5719 MVT::i1, MachineNode->getOperand(0). 5720 getOperand(0), 5721 MachineNode->getOperand(1)); 5722 else if (Op2Not) 5723 // nand(x, ~y) = ~x | y = orc(y, x) 5724 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5725 MVT::i1, MachineNode->getOperand(1). 5726 getOperand(0), 5727 MachineNode->getOperand(0)); 5728 else if (AllUsersSelectZero(MachineNode)) { 5729 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), 5730 MVT::i1, MachineNode->getOperand(0), 5731 MachineNode->getOperand(1)); 5732 SelectSwap = true; 5733 } 5734 break; 5735 case PPC::CROR: 5736 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5737 // x | x = x 5738 ResNode = MachineNode->getOperand(0).getNode(); 5739 else if (Op1Set || Op2Set) 5740 // x | 1 = 1 | y = 1 5741 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 5742 MVT::i1); 5743 else if (Op1Unset) 5744 // 0 | y = y 5745 ResNode = MachineNode->getOperand(1).getNode(); 5746 else if (Op2Unset) 5747 // x | 0 = x 5748 ResNode = MachineNode->getOperand(0).getNode(); 5749 else if (Op1Not) 5750 // ~x | y = orc(y, x) 5751 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5752 MVT::i1, MachineNode->getOperand(1), 5753 MachineNode->getOperand(0). 5754 getOperand(0)); 5755 else if (Op2Not) 5756 // x | ~y = orc(x, y) 5757 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5758 MVT::i1, MachineNode->getOperand(0), 5759 MachineNode->getOperand(1). 5760 getOperand(0)); 5761 else if (AllUsersSelectZero(MachineNode)) { 5762 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5763 MVT::i1, MachineNode->getOperand(0), 5764 MachineNode->getOperand(1)); 5765 SelectSwap = true; 5766 } 5767 break; 5768 case PPC::CRXOR: 5769 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5770 // xor(x, x) = 0 5771 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 5772 MVT::i1); 5773 else if (Op1Set) 5774 // xor(1, y) -> nor(y, y) 5775 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5776 MVT::i1, MachineNode->getOperand(1), 5777 MachineNode->getOperand(1)); 5778 else if (Op2Set) 5779 // xor(x, 1) -> nor(x, x) 5780 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5781 MVT::i1, MachineNode->getOperand(0), 5782 MachineNode->getOperand(0)); 5783 else if (Op1Unset) 5784 // xor(0, y) = y 5785 ResNode = MachineNode->getOperand(1).getNode(); 5786 else if (Op2Unset) 5787 // xor(x, 0) = x 5788 ResNode = MachineNode->getOperand(0).getNode(); 5789 else if (Op1Not) 5790 // xor(~x, y) = eqv(x, y) 5791 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 5792 MVT::i1, MachineNode->getOperand(0). 5793 getOperand(0), 5794 MachineNode->getOperand(1)); 5795 else if (Op2Not) 5796 // xor(x, ~y) = eqv(x, y) 5797 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 5798 MVT::i1, MachineNode->getOperand(0), 5799 MachineNode->getOperand(1). 5800 getOperand(0)); 5801 else if (AllUsersSelectZero(MachineNode)) { 5802 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 5803 MVT::i1, MachineNode->getOperand(0), 5804 MachineNode->getOperand(1)); 5805 SelectSwap = true; 5806 } 5807 break; 5808 case PPC::CRNOR: 5809 if (Op1Set || Op2Set) 5810 // nor(1, y) -> 0 5811 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 5812 MVT::i1); 5813 else if (Op1Unset) 5814 // nor(0, y) = ~y -> nor(y, y) 5815 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5816 MVT::i1, MachineNode->getOperand(1), 5817 MachineNode->getOperand(1)); 5818 else if (Op2Unset) 5819 // nor(x, 0) = ~x 5820 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5821 MVT::i1, MachineNode->getOperand(0), 5822 MachineNode->getOperand(0)); 5823 else if (Op1Not) 5824 // nor(~x, y) = andc(x, y) 5825 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 5826 MVT::i1, MachineNode->getOperand(0). 5827 getOperand(0), 5828 MachineNode->getOperand(1)); 5829 else if (Op2Not) 5830 // nor(x, ~y) = andc(y, x) 5831 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 5832 MVT::i1, MachineNode->getOperand(1). 5833 getOperand(0), 5834 MachineNode->getOperand(0)); 5835 else if (AllUsersSelectZero(MachineNode)) { 5836 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), 5837 MVT::i1, MachineNode->getOperand(0), 5838 MachineNode->getOperand(1)); 5839 SelectSwap = true; 5840 } 5841 break; 5842 case PPC::CREQV: 5843 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5844 // eqv(x, x) = 1 5845 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 5846 MVT::i1); 5847 else if (Op1Set) 5848 // eqv(1, y) = y 5849 ResNode = MachineNode->getOperand(1).getNode(); 5850 else if (Op2Set) 5851 // eqv(x, 1) = x 5852 ResNode = MachineNode->getOperand(0).getNode(); 5853 else if (Op1Unset) 5854 // eqv(0, y) = ~y -> nor(y, y) 5855 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5856 MVT::i1, MachineNode->getOperand(1), 5857 MachineNode->getOperand(1)); 5858 else if (Op2Unset) 5859 // eqv(x, 0) = ~x 5860 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5861 MVT::i1, MachineNode->getOperand(0), 5862 MachineNode->getOperand(0)); 5863 else if (Op1Not) 5864 // eqv(~x, y) = xor(x, y) 5865 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 5866 MVT::i1, MachineNode->getOperand(0). 5867 getOperand(0), 5868 MachineNode->getOperand(1)); 5869 else if (Op2Not) 5870 // eqv(x, ~y) = xor(x, y) 5871 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 5872 MVT::i1, MachineNode->getOperand(0), 5873 MachineNode->getOperand(1). 5874 getOperand(0)); 5875 else if (AllUsersSelectZero(MachineNode)) { 5876 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 5877 MVT::i1, MachineNode->getOperand(0), 5878 MachineNode->getOperand(1)); 5879 SelectSwap = true; 5880 } 5881 break; 5882 case PPC::CRANDC: 5883 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5884 // andc(x, x) = 0 5885 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 5886 MVT::i1); 5887 else if (Op1Set) 5888 // andc(1, y) = ~y 5889 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5890 MVT::i1, MachineNode->getOperand(1), 5891 MachineNode->getOperand(1)); 5892 else if (Op1Unset || Op2Set) 5893 // andc(0, y) = andc(x, 1) = 0 5894 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 5895 MVT::i1); 5896 else if (Op2Unset) 5897 // andc(x, 0) = x 5898 ResNode = MachineNode->getOperand(0).getNode(); 5899 else if (Op1Not) 5900 // andc(~x, y) = ~(x | y) = nor(x, y) 5901 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5902 MVT::i1, MachineNode->getOperand(0). 5903 getOperand(0), 5904 MachineNode->getOperand(1)); 5905 else if (Op2Not) 5906 // andc(x, ~y) = x & y 5907 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), 5908 MVT::i1, MachineNode->getOperand(0), 5909 MachineNode->getOperand(1). 5910 getOperand(0)); 5911 else if (AllUsersSelectZero(MachineNode)) { 5912 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5913 MVT::i1, MachineNode->getOperand(1), 5914 MachineNode->getOperand(0)); 5915 SelectSwap = true; 5916 } 5917 break; 5918 case PPC::CRORC: 5919 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5920 // orc(x, x) = 1 5921 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 5922 MVT::i1); 5923 else if (Op1Set || Op2Unset) 5924 // orc(1, y) = orc(x, 0) = 1 5925 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 5926 MVT::i1); 5927 else if (Op2Set) 5928 // orc(x, 1) = x 5929 ResNode = MachineNode->getOperand(0).getNode(); 5930 else if (Op1Unset) 5931 // orc(0, y) = ~y 5932 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5933 MVT::i1, MachineNode->getOperand(1), 5934 MachineNode->getOperand(1)); 5935 else if (Op1Not) 5936 // orc(~x, y) = ~(x & y) = nand(x, y) 5937 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), 5938 MVT::i1, MachineNode->getOperand(0). 5939 getOperand(0), 5940 MachineNode->getOperand(1)); 5941 else if (Op2Not) 5942 // orc(x, ~y) = x | y 5943 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), 5944 MVT::i1, MachineNode->getOperand(0), 5945 MachineNode->getOperand(1). 5946 getOperand(0)); 5947 else if (AllUsersSelectZero(MachineNode)) { 5948 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 5949 MVT::i1, MachineNode->getOperand(1), 5950 MachineNode->getOperand(0)); 5951 SelectSwap = true; 5952 } 5953 break; 5954 case PPC::SELECT_I4: 5955 case PPC::SELECT_I8: 5956 case PPC::SELECT_F4: 5957 case PPC::SELECT_F8: 5958 case PPC::SELECT_QFRC: 5959 case PPC::SELECT_QSRC: 5960 case PPC::SELECT_QBRC: 5961 case PPC::SELECT_SPE: 5962 case PPC::SELECT_SPE4: 5963 case PPC::SELECT_VRRC: 5964 case PPC::SELECT_VSFRC: 5965 case PPC::SELECT_VSSRC: 5966 case PPC::SELECT_VSRC: 5967 if (Op1Set) 5968 ResNode = MachineNode->getOperand(1).getNode(); 5969 else if (Op1Unset) 5970 ResNode = MachineNode->getOperand(2).getNode(); 5971 else if (Op1Not) 5972 ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(), 5973 SDLoc(MachineNode), 5974 MachineNode->getValueType(0), 5975 MachineNode->getOperand(0). 5976 getOperand(0), 5977 MachineNode->getOperand(2), 5978 MachineNode->getOperand(1)); 5979 break; 5980 case PPC::BC: 5981 case PPC::BCn: 5982 if (Op1Not) 5983 ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn : 5984 PPC::BC, 5985 SDLoc(MachineNode), 5986 MVT::Other, 5987 MachineNode->getOperand(0). 5988 getOperand(0), 5989 MachineNode->getOperand(1), 5990 MachineNode->getOperand(2)); 5991 // FIXME: Handle Op1Set, Op1Unset here too. 5992 break; 5993 } 5994 5995 // If we're inverting this node because it is used only by selects that 5996 // we'd like to swap, then swap the selects before the node replacement. 5997 if (SelectSwap) 5998 SwapAllSelectUsers(MachineNode); 5999 6000 if (ResNode != MachineNode) { 6001 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); 6002 LLVM_DEBUG(MachineNode->dump(CurDAG)); 6003 LLVM_DEBUG(dbgs() << "\nNew: "); 6004 LLVM_DEBUG(ResNode->dump(CurDAG)); 6005 LLVM_DEBUG(dbgs() << "\n"); 6006 6007 ReplaceUses(MachineNode, ResNode); 6008 IsModified = true; 6009 } 6010 } 6011 if (IsModified) 6012 CurDAG->RemoveDeadNodes(); 6013 } while (IsModified); 6014 } 6015 6016 // Gather the set of 32-bit operations that are known to have their 6017 // higher-order 32 bits zero, where ToPromote contains all such operations. 6018 static bool PeepholePPC64ZExtGather(SDValue Op32, 6019 SmallPtrSetImpl<SDNode *> &ToPromote) { 6020 if (!Op32.isMachineOpcode()) 6021 return false; 6022 6023 // First, check for the "frontier" instructions (those that will clear the 6024 // higher-order 32 bits. 6025 6026 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap 6027 // around. If it does not, then these instructions will clear the 6028 // higher-order bits. 6029 if ((Op32.getMachineOpcode() == PPC::RLWINM || 6030 Op32.getMachineOpcode() == PPC::RLWNM) && 6031 Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) { 6032 ToPromote.insert(Op32.getNode()); 6033 return true; 6034 } 6035 6036 // SLW and SRW always clear the higher-order bits. 6037 if (Op32.getMachineOpcode() == PPC::SLW || 6038 Op32.getMachineOpcode() == PPC::SRW) { 6039 ToPromote.insert(Op32.getNode()); 6040 return true; 6041 } 6042 6043 // For LI and LIS, we need the immediate to be positive (so that it is not 6044 // sign extended). 6045 if (Op32.getMachineOpcode() == PPC::LI || 6046 Op32.getMachineOpcode() == PPC::LIS) { 6047 if (!isUInt<15>(Op32.getConstantOperandVal(0))) 6048 return false; 6049 6050 ToPromote.insert(Op32.getNode()); 6051 return true; 6052 } 6053 6054 // LHBRX and LWBRX always clear the higher-order bits. 6055 if (Op32.getMachineOpcode() == PPC::LHBRX || 6056 Op32.getMachineOpcode() == PPC::LWBRX) { 6057 ToPromote.insert(Op32.getNode()); 6058 return true; 6059 } 6060 6061 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended. 6062 if (Op32.getMachineOpcode() == PPC::CNTLZW || 6063 Op32.getMachineOpcode() == PPC::CNTTZW) { 6064 ToPromote.insert(Op32.getNode()); 6065 return true; 6066 } 6067 6068 // Next, check for those instructions we can look through. 6069 6070 // Assuming the mask does not wrap around, then the higher-order bits are 6071 // taken directly from the first operand. 6072 if (Op32.getMachineOpcode() == PPC::RLWIMI && 6073 Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) { 6074 SmallPtrSet<SDNode *, 16> ToPromote1; 6075 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) 6076 return false; 6077 6078 ToPromote.insert(Op32.getNode()); 6079 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6080 return true; 6081 } 6082 6083 // For OR, the higher-order bits are zero if that is true for both operands. 6084 // For SELECT_I4, the same is true (but the relevant operand numbers are 6085 // shifted by 1). 6086 if (Op32.getMachineOpcode() == PPC::OR || 6087 Op32.getMachineOpcode() == PPC::SELECT_I4) { 6088 unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0; 6089 SmallPtrSet<SDNode *, 16> ToPromote1; 6090 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1)) 6091 return false; 6092 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1)) 6093 return false; 6094 6095 ToPromote.insert(Op32.getNode()); 6096 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6097 return true; 6098 } 6099 6100 // For ORI and ORIS, we need the higher-order bits of the first operand to be 6101 // zero, and also for the constant to be positive (so that it is not sign 6102 // extended). 6103 if (Op32.getMachineOpcode() == PPC::ORI || 6104 Op32.getMachineOpcode() == PPC::ORIS) { 6105 SmallPtrSet<SDNode *, 16> ToPromote1; 6106 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) 6107 return false; 6108 if (!isUInt<15>(Op32.getConstantOperandVal(1))) 6109 return false; 6110 6111 ToPromote.insert(Op32.getNode()); 6112 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6113 return true; 6114 } 6115 6116 // The higher-order bits of AND are zero if that is true for at least one of 6117 // the operands. 6118 if (Op32.getMachineOpcode() == PPC::AND) { 6119 SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2; 6120 bool Op0OK = 6121 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); 6122 bool Op1OK = 6123 PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2); 6124 if (!Op0OK && !Op1OK) 6125 return false; 6126 6127 ToPromote.insert(Op32.getNode()); 6128 6129 if (Op0OK) 6130 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6131 6132 if (Op1OK) 6133 ToPromote.insert(ToPromote2.begin(), ToPromote2.end()); 6134 6135 return true; 6136 } 6137 6138 // For ANDI and ANDIS, the higher-order bits are zero if either that is true 6139 // of the first operand, or if the second operand is positive (so that it is 6140 // not sign extended). 6141 if (Op32.getMachineOpcode() == PPC::ANDIo || 6142 Op32.getMachineOpcode() == PPC::ANDISo) { 6143 SmallPtrSet<SDNode *, 16> ToPromote1; 6144 bool Op0OK = 6145 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); 6146 bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1)); 6147 if (!Op0OK && !Op1OK) 6148 return false; 6149 6150 ToPromote.insert(Op32.getNode()); 6151 6152 if (Op0OK) 6153 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6154 6155 return true; 6156 } 6157 6158 return false; 6159 } 6160 6161 void PPCDAGToDAGISel::PeepholePPC64ZExt() { 6162 if (!PPCSubTarget->isPPC64()) 6163 return; 6164 6165 // When we zero-extend from i32 to i64, we use a pattern like this: 6166 // def : Pat<(i64 (zext i32:$in)), 6167 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32), 6168 // 0, 32)>; 6169 // There are several 32-bit shift/rotate instructions, however, that will 6170 // clear the higher-order bits of their output, rendering the RLDICL 6171 // unnecessary. When that happens, we remove it here, and redefine the 6172 // relevant 32-bit operation to be a 64-bit operation. 6173 6174 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 6175 6176 bool MadeChange = false; 6177 while (Position != CurDAG->allnodes_begin()) { 6178 SDNode *N = &*--Position; 6179 // Skip dead nodes and any non-machine opcodes. 6180 if (N->use_empty() || !N->isMachineOpcode()) 6181 continue; 6182 6183 if (N->getMachineOpcode() != PPC::RLDICL) 6184 continue; 6185 6186 if (N->getConstantOperandVal(1) != 0 || 6187 N->getConstantOperandVal(2) != 32) 6188 continue; 6189 6190 SDValue ISR = N->getOperand(0); 6191 if (!ISR.isMachineOpcode() || 6192 ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG) 6193 continue; 6194 6195 if (!ISR.hasOneUse()) 6196 continue; 6197 6198 if (ISR.getConstantOperandVal(2) != PPC::sub_32) 6199 continue; 6200 6201 SDValue IDef = ISR.getOperand(0); 6202 if (!IDef.isMachineOpcode() || 6203 IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF) 6204 continue; 6205 6206 // We now know that we're looking at a canonical i32 -> i64 zext. See if we 6207 // can get rid of it. 6208 6209 SDValue Op32 = ISR->getOperand(1); 6210 if (!Op32.isMachineOpcode()) 6211 continue; 6212 6213 // There are some 32-bit instructions that always clear the high-order 32 6214 // bits, there are also some instructions (like AND) that we can look 6215 // through. 6216 SmallPtrSet<SDNode *, 16> ToPromote; 6217 if (!PeepholePPC64ZExtGather(Op32, ToPromote)) 6218 continue; 6219 6220 // If the ToPromote set contains nodes that have uses outside of the set 6221 // (except for the original INSERT_SUBREG), then abort the transformation. 6222 bool OutsideUse = false; 6223 for (SDNode *PN : ToPromote) { 6224 for (SDNode *UN : PN->uses()) { 6225 if (!ToPromote.count(UN) && UN != ISR.getNode()) { 6226 OutsideUse = true; 6227 break; 6228 } 6229 } 6230 6231 if (OutsideUse) 6232 break; 6233 } 6234 if (OutsideUse) 6235 continue; 6236 6237 MadeChange = true; 6238 6239 // We now know that this zero extension can be removed by promoting to 6240 // nodes in ToPromote to 64-bit operations, where for operations in the 6241 // frontier of the set, we need to insert INSERT_SUBREGs for their 6242 // operands. 6243 for (SDNode *PN : ToPromote) { 6244 unsigned NewOpcode; 6245 switch (PN->getMachineOpcode()) { 6246 default: 6247 llvm_unreachable("Don't know the 64-bit variant of this instruction"); 6248 case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break; 6249 case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break; 6250 case PPC::SLW: NewOpcode = PPC::SLW8; break; 6251 case PPC::SRW: NewOpcode = PPC::SRW8; break; 6252 case PPC::LI: NewOpcode = PPC::LI8; break; 6253 case PPC::LIS: NewOpcode = PPC::LIS8; break; 6254 case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break; 6255 case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break; 6256 case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break; 6257 case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break; 6258 case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break; 6259 case PPC::OR: NewOpcode = PPC::OR8; break; 6260 case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break; 6261 case PPC::ORI: NewOpcode = PPC::ORI8; break; 6262 case PPC::ORIS: NewOpcode = PPC::ORIS8; break; 6263 case PPC::AND: NewOpcode = PPC::AND8; break; 6264 case PPC::ANDIo: NewOpcode = PPC::ANDIo8; break; 6265 case PPC::ANDISo: NewOpcode = PPC::ANDISo8; break; 6266 } 6267 6268 // Note: During the replacement process, the nodes will be in an 6269 // inconsistent state (some instructions will have operands with values 6270 // of the wrong type). Once done, however, everything should be right 6271 // again. 6272 6273 SmallVector<SDValue, 4> Ops; 6274 for (const SDValue &V : PN->ops()) { 6275 if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 && 6276 !isa<ConstantSDNode>(V)) { 6277 SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) }; 6278 SDNode *ReplOp = 6279 CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V), 6280 ISR.getNode()->getVTList(), ReplOpOps); 6281 Ops.push_back(SDValue(ReplOp, 0)); 6282 } else { 6283 Ops.push_back(V); 6284 } 6285 } 6286 6287 // Because all to-be-promoted nodes only have users that are other 6288 // promoted nodes (or the original INSERT_SUBREG), we can safely replace 6289 // the i32 result value type with i64. 6290 6291 SmallVector<EVT, 2> NewVTs; 6292 SDVTList VTs = PN->getVTList(); 6293 for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i) 6294 if (VTs.VTs[i] == MVT::i32) 6295 NewVTs.push_back(MVT::i64); 6296 else 6297 NewVTs.push_back(VTs.VTs[i]); 6298 6299 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: "); 6300 LLVM_DEBUG(PN->dump(CurDAG)); 6301 6302 CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops); 6303 6304 LLVM_DEBUG(dbgs() << "\nNew: "); 6305 LLVM_DEBUG(PN->dump(CurDAG)); 6306 LLVM_DEBUG(dbgs() << "\n"); 6307 } 6308 6309 // Now we replace the original zero extend and its associated INSERT_SUBREG 6310 // with the value feeding the INSERT_SUBREG (which has now been promoted to 6311 // return an i64). 6312 6313 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: "); 6314 LLVM_DEBUG(N->dump(CurDAG)); 6315 LLVM_DEBUG(dbgs() << "\nNew: "); 6316 LLVM_DEBUG(Op32.getNode()->dump(CurDAG)); 6317 LLVM_DEBUG(dbgs() << "\n"); 6318 6319 ReplaceUses(N, Op32.getNode()); 6320 } 6321 6322 if (MadeChange) 6323 CurDAG->RemoveDeadNodes(); 6324 } 6325 6326 void PPCDAGToDAGISel::PeepholePPC64() { 6327 // These optimizations are currently supported only for 64-bit SVR4. 6328 if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64()) 6329 return; 6330 6331 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 6332 6333 while (Position != CurDAG->allnodes_begin()) { 6334 SDNode *N = &*--Position; 6335 // Skip dead nodes and any non-machine opcodes. 6336 if (N->use_empty() || !N->isMachineOpcode()) 6337 continue; 6338 6339 unsigned FirstOp; 6340 unsigned StorageOpcode = N->getMachineOpcode(); 6341 bool RequiresMod4Offset = false; 6342 6343 switch (StorageOpcode) { 6344 default: continue; 6345 6346 case PPC::LWA: 6347 case PPC::LD: 6348 case PPC::DFLOADf64: 6349 case PPC::DFLOADf32: 6350 RequiresMod4Offset = true; 6351 LLVM_FALLTHROUGH; 6352 case PPC::LBZ: 6353 case PPC::LBZ8: 6354 case PPC::LFD: 6355 case PPC::LFS: 6356 case PPC::LHA: 6357 case PPC::LHA8: 6358 case PPC::LHZ: 6359 case PPC::LHZ8: 6360 case PPC::LWZ: 6361 case PPC::LWZ8: 6362 FirstOp = 0; 6363 break; 6364 6365 case PPC::STD: 6366 case PPC::DFSTOREf64: 6367 case PPC::DFSTOREf32: 6368 RequiresMod4Offset = true; 6369 LLVM_FALLTHROUGH; 6370 case PPC::STB: 6371 case PPC::STB8: 6372 case PPC::STFD: 6373 case PPC::STFS: 6374 case PPC::STH: 6375 case PPC::STH8: 6376 case PPC::STW: 6377 case PPC::STW8: 6378 FirstOp = 1; 6379 break; 6380 } 6381 6382 // If this is a load or store with a zero offset, or within the alignment, 6383 // we may be able to fold an add-immediate into the memory operation. 6384 // The check against alignment is below, as it can't occur until we check 6385 // the arguments to N 6386 if (!isa<ConstantSDNode>(N->getOperand(FirstOp))) 6387 continue; 6388 6389 SDValue Base = N->getOperand(FirstOp + 1); 6390 if (!Base.isMachineOpcode()) 6391 continue; 6392 6393 unsigned Flags = 0; 6394 bool ReplaceFlags = true; 6395 6396 // When the feeding operation is an add-immediate of some sort, 6397 // determine whether we need to add relocation information to the 6398 // target flags on the immediate operand when we fold it into the 6399 // load instruction. 6400 // 6401 // For something like ADDItocL, the relocation information is 6402 // inferred from the opcode; when we process it in the AsmPrinter, 6403 // we add the necessary relocation there. A load, though, can receive 6404 // relocation from various flavors of ADDIxxx, so we need to carry 6405 // the relocation information in the target flags. 6406 switch (Base.getMachineOpcode()) { 6407 default: continue; 6408 6409 case PPC::ADDI8: 6410 case PPC::ADDI: 6411 // In some cases (such as TLS) the relocation information 6412 // is already in place on the operand, so copying the operand 6413 // is sufficient. 6414 ReplaceFlags = false; 6415 // For these cases, the immediate may not be divisible by 4, in 6416 // which case the fold is illegal for DS-form instructions. (The 6417 // other cases provide aligned addresses and are always safe.) 6418 if (RequiresMod4Offset && 6419 (!isa<ConstantSDNode>(Base.getOperand(1)) || 6420 Base.getConstantOperandVal(1) % 4 != 0)) 6421 continue; 6422 break; 6423 case PPC::ADDIdtprelL: 6424 Flags = PPCII::MO_DTPREL_LO; 6425 break; 6426 case PPC::ADDItlsldL: 6427 Flags = PPCII::MO_TLSLD_LO; 6428 break; 6429 case PPC::ADDItocL: 6430 Flags = PPCII::MO_TOC_LO; 6431 break; 6432 } 6433 6434 SDValue ImmOpnd = Base.getOperand(1); 6435 6436 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have 6437 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise, 6438 // we might have needed different @ha relocation values for the offset 6439 // pointers). 6440 int MaxDisplacement = 7; 6441 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { 6442 const GlobalValue *GV = GA->getGlobal(); 6443 MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement); 6444 } 6445 6446 bool UpdateHBase = false; 6447 SDValue HBase = Base.getOperand(0); 6448 6449 int Offset = N->getConstantOperandVal(FirstOp); 6450 if (ReplaceFlags) { 6451 if (Offset < 0 || Offset > MaxDisplacement) { 6452 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only 6453 // one use, then we can do this for any offset, we just need to also 6454 // update the offset (i.e. the symbol addend) on the addis also. 6455 if (Base.getMachineOpcode() != PPC::ADDItocL) 6456 continue; 6457 6458 if (!HBase.isMachineOpcode() || 6459 HBase.getMachineOpcode() != PPC::ADDIStocHA) 6460 continue; 6461 6462 if (!Base.hasOneUse() || !HBase.hasOneUse()) 6463 continue; 6464 6465 SDValue HImmOpnd = HBase.getOperand(1); 6466 if (HImmOpnd != ImmOpnd) 6467 continue; 6468 6469 UpdateHBase = true; 6470 } 6471 } else { 6472 // If we're directly folding the addend from an addi instruction, then: 6473 // 1. In general, the offset on the memory access must be zero. 6474 // 2. If the addend is a constant, then it can be combined with a 6475 // non-zero offset, but only if the result meets the encoding 6476 // requirements. 6477 if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) { 6478 Offset += C->getSExtValue(); 6479 6480 if (RequiresMod4Offset && (Offset % 4) != 0) 6481 continue; 6482 6483 if (!isInt<16>(Offset)) 6484 continue; 6485 6486 ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd), 6487 ImmOpnd.getValueType()); 6488 } else if (Offset != 0) { 6489 continue; 6490 } 6491 } 6492 6493 // We found an opportunity. Reverse the operands from the add 6494 // immediate and substitute them into the load or store. If 6495 // needed, update the target flags for the immediate operand to 6496 // reflect the necessary relocation information. 6497 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 6498 LLVM_DEBUG(Base->dump(CurDAG)); 6499 LLVM_DEBUG(dbgs() << "\nN: "); 6500 LLVM_DEBUG(N->dump(CurDAG)); 6501 LLVM_DEBUG(dbgs() << "\n"); 6502 6503 // If the relocation information isn't already present on the 6504 // immediate operand, add it now. 6505 if (ReplaceFlags) { 6506 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { 6507 SDLoc dl(GA); 6508 const GlobalValue *GV = GA->getGlobal(); 6509 // We can't perform this optimization for data whose alignment 6510 // is insufficient for the instruction encoding. 6511 if (GV->getAlignment() < 4 && 6512 (RequiresMod4Offset || (Offset % 4) != 0)) { 6513 LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); 6514 continue; 6515 } 6516 ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); 6517 } else if (ConstantPoolSDNode *CP = 6518 dyn_cast<ConstantPoolSDNode>(ImmOpnd)) { 6519 const Constant *C = CP->getConstVal(); 6520 ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, 6521 CP->getAlignment(), 6522 Offset, Flags); 6523 } 6524 } 6525 6526 if (FirstOp == 1) // Store 6527 (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, 6528 Base.getOperand(0), N->getOperand(3)); 6529 else // Load 6530 (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), 6531 N->getOperand(2)); 6532 6533 if (UpdateHBase) 6534 (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0), 6535 ImmOpnd); 6536 6537 // The add-immediate may now be dead, in which case remove it. 6538 if (Base.getNode()->use_empty()) 6539 CurDAG->RemoveDeadNode(Base.getNode()); 6540 } 6541 } 6542 6543 /// createPPCISelDag - This pass converts a legalized DAG into a 6544 /// PowerPC-specific DAG, ready for instruction scheduling. 6545 /// 6546 FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM, 6547 CodeGenOpt::Level OptLevel) { 6548 return new PPCDAGToDAGISel(TM, OptLevel); 6549 } 6550