1 //===- InferAddressSpace.cpp - --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // CUDA C/C++ includes memory space designation as variable type qualifers (such 10 // as __global__ and __shared__). Knowing the space of a memory access allows 11 // CUDA compilers to emit faster PTX loads and stores. For example, a load from 12 // shared memory can be translated to `ld.shared` which is roughly 10% faster 13 // than a generic `ld` on an NVIDIA Tesla K40c. 14 // 15 // Unfortunately, type qualifiers only apply to variable declarations, so CUDA 16 // compilers must infer the memory space of an address expression from 17 // type-qualified variables. 18 // 19 // LLVM IR uses non-zero (so-called) specific address spaces to represent memory 20 // spaces (e.g. addrspace(3) means shared memory). The Clang frontend 21 // places only type-qualified variables in specific address spaces, and then 22 // conservatively `addrspacecast`s each type-qualified variable to addrspace(0) 23 // (so-called the generic address space) for other instructions to use. 24 // 25 // For example, the Clang translates the following CUDA code 26 // __shared__ float a[10]; 27 // float v = a[i]; 28 // to 29 // %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* 30 // %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i 31 // %v = load float, float* %1 ; emits ld.f32 32 // @a is in addrspace(3) since it's type-qualified, but its use from %1 is 33 // redirected to %0 (the generic version of @a). 34 // 35 // The optimization implemented in this file propagates specific address spaces 36 // from type-qualified variable declarations to its users. For example, it 37 // optimizes the above IR to 38 // %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i 39 // %v = load float addrspace(3)* %1 ; emits ld.shared.f32 40 // propagating the addrspace(3) from @a to %1. As the result, the NVPTX 41 // codegen is able to emit ld.shared.f32 for %v. 42 // 43 // Address space inference works in two steps. First, it uses a data-flow 44 // analysis to infer as many generic pointers as possible to point to only one 45 // specific address space. In the above example, it can prove that %1 only 46 // points to addrspace(3). This algorithm was published in 47 // CUDA: Compiling and optimizing for a GPU platform 48 // Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang 49 // ICCS 2012 50 // 51 // Then, address space inference replaces all refinable generic pointers with 52 // equivalent specific pointers. 53 // 54 // The major challenge of implementing this optimization is handling PHINodes, 55 // which may create loops in the data flow graph. This brings two complications. 56 // 57 // First, the data flow analysis in Step 1 needs to be circular. For example, 58 // %generic.input = addrspacecast float addrspace(3)* %input to float* 59 // loop: 60 // %y = phi [ %generic.input, %y2 ] 61 // %y2 = getelementptr %y, 1 62 // %v = load %y2 63 // br ..., label %loop, ... 64 // proving %y specific requires proving both %generic.input and %y2 specific, 65 // but proving %y2 specific circles back to %y. To address this complication, 66 // the data flow analysis operates on a lattice: 67 // uninitialized > specific address spaces > generic. 68 // All address expressions (our implementation only considers phi, bitcast, 69 // addrspacecast, and getelementptr) start with the uninitialized address space. 70 // The monotone transfer function moves the address space of a pointer down a 71 // lattice path from uninitialized to specific and then to generic. A join 72 // operation of two different specific address spaces pushes the expression down 73 // to the generic address space. The analysis completes once it reaches a fixed 74 // point. 75 // 76 // Second, IR rewriting in Step 2 also needs to be circular. For example, 77 // converting %y to addrspace(3) requires the compiler to know the converted 78 // %y2, but converting %y2 needs the converted %y. To address this complication, 79 // we break these cycles using "undef" placeholders. When converting an 80 // instruction `I` to a new address space, if its operand `Op` is not converted 81 // yet, we let `I` temporarily use `undef` and fix all the uses of undef later. 82 // For instance, our algorithm first converts %y to 83 // %y' = phi float addrspace(3)* [ %input, undef ] 84 // Then, it converts %y2 to 85 // %y2' = getelementptr %y', 1 86 // Finally, it fixes the undef in %y' so that 87 // %y' = phi float addrspace(3)* [ %input, %y2' ] 88 // 89 //===----------------------------------------------------------------------===// 90 91 #include "llvm/ADT/ArrayRef.h" 92 #include "llvm/ADT/DenseMap.h" 93 #include "llvm/ADT/DenseSet.h" 94 #include "llvm/ADT/None.h" 95 #include "llvm/ADT/Optional.h" 96 #include "llvm/ADT/SetVector.h" 97 #include "llvm/ADT/SmallVector.h" 98 #include "llvm/Analysis/TargetTransformInfo.h" 99 #include "llvm/Transforms/Utils/Local.h" 100 #include "llvm/IR/BasicBlock.h" 101 #include "llvm/IR/Constant.h" 102 #include "llvm/IR/Constants.h" 103 #include "llvm/IR/Function.h" 104 #include "llvm/IR/IRBuilder.h" 105 #include "llvm/IR/InstIterator.h" 106 #include "llvm/IR/Instruction.h" 107 #include "llvm/IR/Instructions.h" 108 #include "llvm/IR/IntrinsicInst.h" 109 #include "llvm/IR/Intrinsics.h" 110 #include "llvm/IR/LLVMContext.h" 111 #include "llvm/IR/Operator.h" 112 #include "llvm/IR/Type.h" 113 #include "llvm/IR/Use.h" 114 #include "llvm/IR/User.h" 115 #include "llvm/IR/Value.h" 116 #include "llvm/IR/ValueHandle.h" 117 #include "llvm/Pass.h" 118 #include "llvm/Support/Casting.h" 119 #include "llvm/Support/Compiler.h" 120 #include "llvm/Support/Debug.h" 121 #include "llvm/Support/ErrorHandling.h" 122 #include "llvm/Support/raw_ostream.h" 123 #include "llvm/Transforms/Scalar.h" 124 #include "llvm/Transforms/Utils/ValueMapper.h" 125 #include <cassert> 126 #include <iterator> 127 #include <limits> 128 #include <utility> 129 #include <vector> 130 131 #define DEBUG_TYPE "infer-address-spaces" 132 133 using namespace llvm; 134 135 static const unsigned UninitializedAddressSpace = 136 std::numeric_limits<unsigned>::max(); 137 138 namespace { 139 140 using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>; 141 142 /// InferAddressSpaces 143 class InferAddressSpaces : public FunctionPass { 144 const TargetTransformInfo *TTI = nullptr; 145 146 /// Target specific address space which uses of should be replaced if 147 /// possible. 148 unsigned FlatAddrSpace = 0; 149 150 public: 151 static char ID; 152 153 InferAddressSpaces() : 154 FunctionPass(ID), FlatAddrSpace(UninitializedAddressSpace) {} 155 InferAddressSpaces(unsigned AS) : FunctionPass(ID), FlatAddrSpace(AS) {} 156 157 void getAnalysisUsage(AnalysisUsage &AU) const override { 158 AU.setPreservesCFG(); 159 AU.addRequired<TargetTransformInfoWrapperPass>(); 160 } 161 162 bool runOnFunction(Function &F) override; 163 164 private: 165 // Returns the new address space of V if updated; otherwise, returns None. 166 Optional<unsigned> 167 updateAddressSpace(const Value &V, 168 const ValueToAddrSpaceMapTy &InferredAddrSpace) const; 169 170 // Tries to infer the specific address space of each address expression in 171 // Postorder. 172 void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder, 173 ValueToAddrSpaceMapTy *InferredAddrSpace) const; 174 175 bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const; 176 177 // Changes the flat address expressions in function F to point to specific 178 // address spaces if InferredAddrSpace says so. Postorder is the postorder of 179 // all flat expressions in the use-def graph of function F. 180 bool rewriteWithNewAddressSpaces( 181 const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder, 182 const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const; 183 184 void appendsFlatAddressExpressionToPostorderStack( 185 Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack, 186 DenseSet<Value *> &Visited) const; 187 188 bool rewriteIntrinsicOperands(IntrinsicInst *II, 189 Value *OldV, Value *NewV) const; 190 void collectRewritableIntrinsicOperands( 191 IntrinsicInst *II, 192 std::vector<std::pair<Value *, bool>> &PostorderStack, 193 DenseSet<Value *> &Visited) const; 194 195 std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const; 196 197 Value *cloneValueWithNewAddressSpace( 198 Value *V, unsigned NewAddrSpace, 199 const ValueToValueMapTy &ValueWithNewAddrSpace, 200 SmallVectorImpl<const Use *> *UndefUsesToFix) const; 201 unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const; 202 }; 203 204 } // end anonymous namespace 205 206 char InferAddressSpaces::ID = 0; 207 208 namespace llvm { 209 210 void initializeInferAddressSpacesPass(PassRegistry &); 211 212 } // end namespace llvm 213 214 INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces", 215 false, false) 216 217 // Returns true if V is an address expression. 218 // TODO: Currently, we consider only phi, bitcast, addrspacecast, and 219 // getelementptr operators. 220 static bool isAddressExpression(const Value &V) { 221 if (!isa<Operator>(V)) 222 return false; 223 224 const Operator &Op = cast<Operator>(V); 225 switch (Op.getOpcode()) { 226 case Instruction::PHI: 227 assert(Op.getType()->isPointerTy()); 228 return true; 229 case Instruction::BitCast: 230 case Instruction::AddrSpaceCast: 231 case Instruction::GetElementPtr: 232 return true; 233 case Instruction::Select: 234 return Op.getType()->isPointerTy(); 235 default: 236 return false; 237 } 238 } 239 240 // Returns the pointer operands of V. 241 // 242 // Precondition: V is an address expression. 243 static SmallVector<Value *, 2> getPointerOperands(const Value &V) { 244 const Operator &Op = cast<Operator>(V); 245 switch (Op.getOpcode()) { 246 case Instruction::PHI: { 247 auto IncomingValues = cast<PHINode>(Op).incoming_values(); 248 return SmallVector<Value *, 2>(IncomingValues.begin(), 249 IncomingValues.end()); 250 } 251 case Instruction::BitCast: 252 case Instruction::AddrSpaceCast: 253 case Instruction::GetElementPtr: 254 return {Op.getOperand(0)}; 255 case Instruction::Select: 256 return {Op.getOperand(1), Op.getOperand(2)}; 257 default: 258 llvm_unreachable("Unexpected instruction type."); 259 } 260 } 261 262 // TODO: Move logic to TTI? 263 bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, 264 Value *OldV, 265 Value *NewV) const { 266 Module *M = II->getParent()->getParent()->getParent(); 267 268 switch (II->getIntrinsicID()) { 269 case Intrinsic::objectsize: { 270 Type *DestTy = II->getType(); 271 Type *SrcTy = NewV->getType(); 272 Function *NewDecl = 273 Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy}); 274 II->setArgOperand(0, NewV); 275 II->setCalledFunction(NewDecl); 276 return true; 277 } 278 default: 279 return TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); 280 } 281 } 282 283 void InferAddressSpaces::collectRewritableIntrinsicOperands( 284 IntrinsicInst *II, std::vector<std::pair<Value *, bool>> &PostorderStack, 285 DenseSet<Value *> &Visited) const { 286 auto IID = II->getIntrinsicID(); 287 switch (IID) { 288 case Intrinsic::objectsize: 289 appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0), 290 PostorderStack, Visited); 291 break; 292 default: 293 SmallVector<int, 2> OpIndexes; 294 if (TTI->collectFlatAddressOperands(OpIndexes, IID)) { 295 for (int Idx : OpIndexes) { 296 appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(Idx), 297 PostorderStack, Visited); 298 } 299 } 300 break; 301 } 302 } 303 304 // Returns all flat address expressions in function F. The elements are 305 // If V is an unvisited flat address expression, appends V to PostorderStack 306 // and marks it as visited. 307 void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack( 308 Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack, 309 DenseSet<Value *> &Visited) const { 310 assert(V->getType()->isPointerTy()); 311 312 // Generic addressing expressions may be hidden in nested constant 313 // expressions. 314 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 315 // TODO: Look in non-address parts, like icmp operands. 316 if (isAddressExpression(*CE) && Visited.insert(CE).second) 317 PostorderStack.push_back(std::make_pair(CE, false)); 318 319 return; 320 } 321 322 if (isAddressExpression(*V) && 323 V->getType()->getPointerAddressSpace() == FlatAddrSpace) { 324 if (Visited.insert(V).second) { 325 PostorderStack.push_back(std::make_pair(V, false)); 326 327 Operator *Op = cast<Operator>(V); 328 for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) { 329 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) { 330 if (isAddressExpression(*CE) && Visited.insert(CE).second) 331 PostorderStack.emplace_back(CE, false); 332 } 333 } 334 } 335 } 336 } 337 338 // Returns all flat address expressions in function F. The elements are ordered 339 // ordered in postorder. 340 std::vector<WeakTrackingVH> 341 InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { 342 // This function implements a non-recursive postorder traversal of a partial 343 // use-def graph of function F. 344 std::vector<std::pair<Value *, bool>> PostorderStack; 345 // The set of visited expressions. 346 DenseSet<Value *> Visited; 347 348 auto PushPtrOperand = [&](Value *Ptr) { 349 appendsFlatAddressExpressionToPostorderStack(Ptr, PostorderStack, 350 Visited); 351 }; 352 353 // Look at operations that may be interesting accelerate by moving to a known 354 // address space. We aim at generating after loads and stores, but pure 355 // addressing calculations may also be faster. 356 for (Instruction &I : instructions(F)) { 357 if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { 358 if (!GEP->getType()->isVectorTy()) 359 PushPtrOperand(GEP->getPointerOperand()); 360 } else if (auto *LI = dyn_cast<LoadInst>(&I)) 361 PushPtrOperand(LI->getPointerOperand()); 362 else if (auto *SI = dyn_cast<StoreInst>(&I)) 363 PushPtrOperand(SI->getPointerOperand()); 364 else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I)) 365 PushPtrOperand(RMW->getPointerOperand()); 366 else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I)) 367 PushPtrOperand(CmpX->getPointerOperand()); 368 else if (auto *MI = dyn_cast<MemIntrinsic>(&I)) { 369 // For memset/memcpy/memmove, any pointer operand can be replaced. 370 PushPtrOperand(MI->getRawDest()); 371 372 // Handle 2nd operand for memcpy/memmove. 373 if (auto *MTI = dyn_cast<MemTransferInst>(MI)) 374 PushPtrOperand(MTI->getRawSource()); 375 } else if (auto *II = dyn_cast<IntrinsicInst>(&I)) 376 collectRewritableIntrinsicOperands(II, PostorderStack, Visited); 377 else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(&I)) { 378 // FIXME: Handle vectors of pointers 379 if (Cmp->getOperand(0)->getType()->isPointerTy()) { 380 PushPtrOperand(Cmp->getOperand(0)); 381 PushPtrOperand(Cmp->getOperand(1)); 382 } 383 } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 384 if (!ASC->getType()->isVectorTy()) 385 PushPtrOperand(ASC->getPointerOperand()); 386 } 387 } 388 389 std::vector<WeakTrackingVH> Postorder; // The resultant postorder. 390 while (!PostorderStack.empty()) { 391 Value *TopVal = PostorderStack.back().first; 392 // If the operands of the expression on the top are already explored, 393 // adds that expression to the resultant postorder. 394 if (PostorderStack.back().second) { 395 if (TopVal->getType()->getPointerAddressSpace() == FlatAddrSpace) 396 Postorder.push_back(TopVal); 397 PostorderStack.pop_back(); 398 continue; 399 } 400 // Otherwise, adds its operands to the stack and explores them. 401 PostorderStack.back().second = true; 402 for (Value *PtrOperand : getPointerOperands(*TopVal)) { 403 appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack, 404 Visited); 405 } 406 } 407 return Postorder; 408 } 409 410 // A helper function for cloneInstructionWithNewAddressSpace. Returns the clone 411 // of OperandUse.get() in the new address space. If the clone is not ready yet, 412 // returns an undef in the new address space as a placeholder. 413 static Value *operandWithNewAddressSpaceOrCreateUndef( 414 const Use &OperandUse, unsigned NewAddrSpace, 415 const ValueToValueMapTy &ValueWithNewAddrSpace, 416 SmallVectorImpl<const Use *> *UndefUsesToFix) { 417 Value *Operand = OperandUse.get(); 418 419 Type *NewPtrTy = 420 Operand->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); 421 422 if (Constant *C = dyn_cast<Constant>(Operand)) 423 return ConstantExpr::getAddrSpaceCast(C, NewPtrTy); 424 425 if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) 426 return NewOperand; 427 428 UndefUsesToFix->push_back(&OperandUse); 429 return UndefValue::get(NewPtrTy); 430 } 431 432 // Returns a clone of `I` with its operands converted to those specified in 433 // ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an 434 // operand whose address space needs to be modified might not exist in 435 // ValueWithNewAddrSpace. In that case, uses undef as a placeholder operand and 436 // adds that operand use to UndefUsesToFix so that caller can fix them later. 437 // 438 // Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast 439 // from a pointer whose type already matches. Therefore, this function returns a 440 // Value* instead of an Instruction*. 441 static Value *cloneInstructionWithNewAddressSpace( 442 Instruction *I, unsigned NewAddrSpace, 443 const ValueToValueMapTy &ValueWithNewAddrSpace, 444 SmallVectorImpl<const Use *> *UndefUsesToFix) { 445 Type *NewPtrType = 446 I->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); 447 448 if (I->getOpcode() == Instruction::AddrSpaceCast) { 449 Value *Src = I->getOperand(0); 450 // Because `I` is flat, the source address space must be specific. 451 // Therefore, the inferred address space must be the source space, according 452 // to our algorithm. 453 assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace); 454 if (Src->getType() != NewPtrType) 455 return new BitCastInst(Src, NewPtrType); 456 return Src; 457 } 458 459 // Computes the converted pointer operands. 460 SmallVector<Value *, 4> NewPointerOperands; 461 for (const Use &OperandUse : I->operands()) { 462 if (!OperandUse.get()->getType()->isPointerTy()) 463 NewPointerOperands.push_back(nullptr); 464 else 465 NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef( 466 OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix)); 467 } 468 469 switch (I->getOpcode()) { 470 case Instruction::BitCast: 471 return new BitCastInst(NewPointerOperands[0], NewPtrType); 472 case Instruction::PHI: { 473 assert(I->getType()->isPointerTy()); 474 PHINode *PHI = cast<PHINode>(I); 475 PHINode *NewPHI = PHINode::Create(NewPtrType, PHI->getNumIncomingValues()); 476 for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); ++Index) { 477 unsigned OperandNo = PHINode::getOperandNumForIncomingValue(Index); 478 NewPHI->addIncoming(NewPointerOperands[OperandNo], 479 PHI->getIncomingBlock(Index)); 480 } 481 return NewPHI; 482 } 483 case Instruction::GetElementPtr: { 484 GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); 485 GetElementPtrInst *NewGEP = GetElementPtrInst::Create( 486 GEP->getSourceElementType(), NewPointerOperands[0], 487 SmallVector<Value *, 4>(GEP->idx_begin(), GEP->idx_end())); 488 NewGEP->setIsInBounds(GEP->isInBounds()); 489 return NewGEP; 490 } 491 case Instruction::Select: 492 assert(I->getType()->isPointerTy()); 493 return SelectInst::Create(I->getOperand(0), NewPointerOperands[1], 494 NewPointerOperands[2], "", nullptr, I); 495 default: 496 llvm_unreachable("Unexpected opcode"); 497 } 498 } 499 500 // Similar to cloneInstructionWithNewAddressSpace, returns a clone of the 501 // constant expression `CE` with its operands replaced as specified in 502 // ValueWithNewAddrSpace. 503 static Value *cloneConstantExprWithNewAddressSpace( 504 ConstantExpr *CE, unsigned NewAddrSpace, 505 const ValueToValueMapTy &ValueWithNewAddrSpace) { 506 Type *TargetType = 507 CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); 508 509 if (CE->getOpcode() == Instruction::AddrSpaceCast) { 510 // Because CE is flat, the source address space must be specific. 511 // Therefore, the inferred address space must be the source space according 512 // to our algorithm. 513 assert(CE->getOperand(0)->getType()->getPointerAddressSpace() == 514 NewAddrSpace); 515 return ConstantExpr::getBitCast(CE->getOperand(0), TargetType); 516 } 517 518 if (CE->getOpcode() == Instruction::BitCast) { 519 if (Value *NewOperand = ValueWithNewAddrSpace.lookup(CE->getOperand(0))) 520 return ConstantExpr::getBitCast(cast<Constant>(NewOperand), TargetType); 521 return ConstantExpr::getAddrSpaceCast(CE, TargetType); 522 } 523 524 if (CE->getOpcode() == Instruction::Select) { 525 Constant *Src0 = CE->getOperand(1); 526 Constant *Src1 = CE->getOperand(2); 527 if (Src0->getType()->getPointerAddressSpace() == 528 Src1->getType()->getPointerAddressSpace()) { 529 530 return ConstantExpr::getSelect( 531 CE->getOperand(0), ConstantExpr::getAddrSpaceCast(Src0, TargetType), 532 ConstantExpr::getAddrSpaceCast(Src1, TargetType)); 533 } 534 } 535 536 // Computes the operands of the new constant expression. 537 bool IsNew = false; 538 SmallVector<Constant *, 4> NewOperands; 539 for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) { 540 Constant *Operand = CE->getOperand(Index); 541 // If the address space of `Operand` needs to be modified, the new operand 542 // with the new address space should already be in ValueWithNewAddrSpace 543 // because (1) the constant expressions we consider (i.e. addrspacecast, 544 // bitcast, and getelementptr) do not incur cycles in the data flow graph 545 // and (2) this function is called on constant expressions in postorder. 546 if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) { 547 IsNew = true; 548 NewOperands.push_back(cast<Constant>(NewOperand)); 549 continue; 550 } 551 if (auto CExpr = dyn_cast<ConstantExpr>(Operand)) 552 if (Value *NewOperand = cloneConstantExprWithNewAddressSpace( 553 CExpr, NewAddrSpace, ValueWithNewAddrSpace)) { 554 IsNew = true; 555 NewOperands.push_back(cast<Constant>(NewOperand)); 556 continue; 557 } 558 // Otherwise, reuses the old operand. 559 NewOperands.push_back(Operand); 560 } 561 562 // If !IsNew, we will replace the Value with itself. However, replaced values 563 // are assumed to wrapped in a addrspace cast later so drop it now. 564 if (!IsNew) 565 return nullptr; 566 567 if (CE->getOpcode() == Instruction::GetElementPtr) { 568 // Needs to specify the source type while constructing a getelementptr 569 // constant expression. 570 return CE->getWithOperands( 571 NewOperands, TargetType, /*OnlyIfReduced=*/false, 572 NewOperands[0]->getType()->getPointerElementType()); 573 } 574 575 return CE->getWithOperands(NewOperands, TargetType); 576 } 577 578 // Returns a clone of the value `V`, with its operands replaced as specified in 579 // ValueWithNewAddrSpace. This function is called on every flat address 580 // expression whose address space needs to be modified, in postorder. 581 // 582 // See cloneInstructionWithNewAddressSpace for the meaning of UndefUsesToFix. 583 Value *InferAddressSpaces::cloneValueWithNewAddressSpace( 584 Value *V, unsigned NewAddrSpace, 585 const ValueToValueMapTy &ValueWithNewAddrSpace, 586 SmallVectorImpl<const Use *> *UndefUsesToFix) const { 587 // All values in Postorder are flat address expressions. 588 assert(isAddressExpression(*V) && 589 V->getType()->getPointerAddressSpace() == FlatAddrSpace); 590 591 if (Instruction *I = dyn_cast<Instruction>(V)) { 592 Value *NewV = cloneInstructionWithNewAddressSpace( 593 I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix); 594 if (Instruction *NewI = dyn_cast<Instruction>(NewV)) { 595 if (NewI->getParent() == nullptr) { 596 NewI->insertBefore(I); 597 NewI->takeName(I); 598 } 599 } 600 return NewV; 601 } 602 603 return cloneConstantExprWithNewAddressSpace( 604 cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace); 605 } 606 607 // Defines the join operation on the address space lattice (see the file header 608 // comments). 609 unsigned InferAddressSpaces::joinAddressSpaces(unsigned AS1, 610 unsigned AS2) const { 611 if (AS1 == FlatAddrSpace || AS2 == FlatAddrSpace) 612 return FlatAddrSpace; 613 614 if (AS1 == UninitializedAddressSpace) 615 return AS2; 616 if (AS2 == UninitializedAddressSpace) 617 return AS1; 618 619 // The join of two different specific address spaces is flat. 620 return (AS1 == AS2) ? AS1 : FlatAddrSpace; 621 } 622 623 bool InferAddressSpaces::runOnFunction(Function &F) { 624 if (skipFunction(F)) 625 return false; 626 627 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 628 629 if (FlatAddrSpace == UninitializedAddressSpace) { 630 FlatAddrSpace = TTI->getFlatAddressSpace(); 631 if (FlatAddrSpace == UninitializedAddressSpace) 632 return false; 633 } 634 635 // Collects all flat address expressions in postorder. 636 std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F); 637 638 // Runs a data-flow analysis to refine the address spaces of every expression 639 // in Postorder. 640 ValueToAddrSpaceMapTy InferredAddrSpace; 641 inferAddressSpaces(Postorder, &InferredAddrSpace); 642 643 // Changes the address spaces of the flat address expressions who are inferred 644 // to point to a specific address space. 645 return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace, &F); 646 } 647 648 // Constants need to be tracked through RAUW to handle cases with nested 649 // constant expressions, so wrap values in WeakTrackingVH. 650 void InferAddressSpaces::inferAddressSpaces( 651 ArrayRef<WeakTrackingVH> Postorder, 652 ValueToAddrSpaceMapTy *InferredAddrSpace) const { 653 SetVector<Value *> Worklist(Postorder.begin(), Postorder.end()); 654 // Initially, all expressions are in the uninitialized address space. 655 for (Value *V : Postorder) 656 (*InferredAddrSpace)[V] = UninitializedAddressSpace; 657 658 while (!Worklist.empty()) { 659 Value *V = Worklist.pop_back_val(); 660 661 // Tries to update the address space of the stack top according to the 662 // address spaces of its operands. 663 LLVM_DEBUG(dbgs() << "Updating the address space of\n " << *V << '\n'); 664 Optional<unsigned> NewAS = updateAddressSpace(*V, *InferredAddrSpace); 665 if (!NewAS.hasValue()) 666 continue; 667 // If any updates are made, grabs its users to the worklist because 668 // their address spaces can also be possibly updated. 669 LLVM_DEBUG(dbgs() << " to " << NewAS.getValue() << '\n'); 670 (*InferredAddrSpace)[V] = NewAS.getValue(); 671 672 for (Value *User : V->users()) { 673 // Skip if User is already in the worklist. 674 if (Worklist.count(User)) 675 continue; 676 677 auto Pos = InferredAddrSpace->find(User); 678 // Our algorithm only updates the address spaces of flat address 679 // expressions, which are those in InferredAddrSpace. 680 if (Pos == InferredAddrSpace->end()) 681 continue; 682 683 // Function updateAddressSpace moves the address space down a lattice 684 // path. Therefore, nothing to do if User is already inferred as flat (the 685 // bottom element in the lattice). 686 if (Pos->second == FlatAddrSpace) 687 continue; 688 689 Worklist.insert(User); 690 } 691 } 692 } 693 694 Optional<unsigned> InferAddressSpaces::updateAddressSpace( 695 const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const { 696 assert(InferredAddrSpace.count(&V)); 697 698 // The new inferred address space equals the join of the address spaces 699 // of all its pointer operands. 700 unsigned NewAS = UninitializedAddressSpace; 701 702 const Operator &Op = cast<Operator>(V); 703 if (Op.getOpcode() == Instruction::Select) { 704 Value *Src0 = Op.getOperand(1); 705 Value *Src1 = Op.getOperand(2); 706 707 auto I = InferredAddrSpace.find(Src0); 708 unsigned Src0AS = (I != InferredAddrSpace.end()) ? 709 I->second : Src0->getType()->getPointerAddressSpace(); 710 711 auto J = InferredAddrSpace.find(Src1); 712 unsigned Src1AS = (J != InferredAddrSpace.end()) ? 713 J->second : Src1->getType()->getPointerAddressSpace(); 714 715 auto *C0 = dyn_cast<Constant>(Src0); 716 auto *C1 = dyn_cast<Constant>(Src1); 717 718 // If one of the inputs is a constant, we may be able to do a constant 719 // addrspacecast of it. Defer inferring the address space until the input 720 // address space is known. 721 if ((C1 && Src0AS == UninitializedAddressSpace) || 722 (C0 && Src1AS == UninitializedAddressSpace)) 723 return None; 724 725 if (C0 && isSafeToCastConstAddrSpace(C0, Src1AS)) 726 NewAS = Src1AS; 727 else if (C1 && isSafeToCastConstAddrSpace(C1, Src0AS)) 728 NewAS = Src0AS; 729 else 730 NewAS = joinAddressSpaces(Src0AS, Src1AS); 731 } else { 732 for (Value *PtrOperand : getPointerOperands(V)) { 733 auto I = InferredAddrSpace.find(PtrOperand); 734 unsigned OperandAS = I != InferredAddrSpace.end() ? 735 I->second : PtrOperand->getType()->getPointerAddressSpace(); 736 737 // join(flat, *) = flat. So we can break if NewAS is already flat. 738 NewAS = joinAddressSpaces(NewAS, OperandAS); 739 if (NewAS == FlatAddrSpace) 740 break; 741 } 742 } 743 744 unsigned OldAS = InferredAddrSpace.lookup(&V); 745 assert(OldAS != FlatAddrSpace); 746 if (OldAS == NewAS) 747 return None; 748 return NewAS; 749 } 750 751 /// \p returns true if \p U is the pointer operand of a memory instruction with 752 /// a single pointer operand that can have its address space changed by simply 753 /// mutating the use to a new value. If the memory instruction is volatile, 754 /// return true only if the target allows the memory instruction to be volatile 755 /// in the new address space. 756 static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI, 757 Use &U, unsigned AddrSpace) { 758 User *Inst = U.getUser(); 759 unsigned OpNo = U.getOperandNo(); 760 bool VolatileIsAllowed = false; 761 if (auto *I = dyn_cast<Instruction>(Inst)) 762 VolatileIsAllowed = TTI.hasVolatileVariant(I, AddrSpace); 763 764 if (auto *LI = dyn_cast<LoadInst>(Inst)) 765 return OpNo == LoadInst::getPointerOperandIndex() && 766 (VolatileIsAllowed || !LI->isVolatile()); 767 768 if (auto *SI = dyn_cast<StoreInst>(Inst)) 769 return OpNo == StoreInst::getPointerOperandIndex() && 770 (VolatileIsAllowed || !SI->isVolatile()); 771 772 if (auto *RMW = dyn_cast<AtomicRMWInst>(Inst)) 773 return OpNo == AtomicRMWInst::getPointerOperandIndex() && 774 (VolatileIsAllowed || !RMW->isVolatile()); 775 776 if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) 777 return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() && 778 (VolatileIsAllowed || !CmpX->isVolatile()); 779 780 return false; 781 } 782 783 /// Update memory intrinsic uses that require more complex processing than 784 /// simple memory instructions. Thse require re-mangling and may have multiple 785 /// pointer operands. 786 static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV, 787 Value *NewV) { 788 IRBuilder<> B(MI); 789 MDNode *TBAA = MI->getMetadata(LLVMContext::MD_tbaa); 790 MDNode *ScopeMD = MI->getMetadata(LLVMContext::MD_alias_scope); 791 MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias); 792 793 if (auto *MSI = dyn_cast<MemSetInst>(MI)) { 794 B.CreateMemSet(NewV, MSI->getValue(), MSI->getLength(), 795 MaybeAlign(MSI->getDestAlignment()), 796 false, // isVolatile 797 TBAA, ScopeMD, NoAliasMD); 798 } else if (auto *MTI = dyn_cast<MemTransferInst>(MI)) { 799 Value *Src = MTI->getRawSource(); 800 Value *Dest = MTI->getRawDest(); 801 802 // Be careful in case this is a self-to-self copy. 803 if (Src == OldV) 804 Src = NewV; 805 806 if (Dest == OldV) 807 Dest = NewV; 808 809 if (isa<MemCpyInst>(MTI)) { 810 MDNode *TBAAStruct = MTI->getMetadata(LLVMContext::MD_tbaa_struct); 811 B.CreateMemCpy(Dest, MTI->getDestAlign(), Src, MTI->getSourceAlign(), 812 MTI->getLength(), 813 false, // isVolatile 814 TBAA, TBAAStruct, ScopeMD, NoAliasMD); 815 } else { 816 assert(isa<MemMoveInst>(MTI)); 817 B.CreateMemMove(Dest, MTI->getDestAlign(), Src, MTI->getSourceAlign(), 818 MTI->getLength(), 819 false, // isVolatile 820 TBAA, ScopeMD, NoAliasMD); 821 } 822 } else 823 llvm_unreachable("unhandled MemIntrinsic"); 824 825 MI->eraseFromParent(); 826 return true; 827 } 828 829 // \p returns true if it is OK to change the address space of constant \p C with 830 // a ConstantExpr addrspacecast. 831 bool InferAddressSpaces::isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const { 832 assert(NewAS != UninitializedAddressSpace); 833 834 unsigned SrcAS = C->getType()->getPointerAddressSpace(); 835 if (SrcAS == NewAS || isa<UndefValue>(C)) 836 return true; 837 838 // Prevent illegal casts between different non-flat address spaces. 839 if (SrcAS != FlatAddrSpace && NewAS != FlatAddrSpace) 840 return false; 841 842 if (isa<ConstantPointerNull>(C)) 843 return true; 844 845 if (auto *Op = dyn_cast<Operator>(C)) { 846 // If we already have a constant addrspacecast, it should be safe to cast it 847 // off. 848 if (Op->getOpcode() == Instruction::AddrSpaceCast) 849 return isSafeToCastConstAddrSpace(cast<Constant>(Op->getOperand(0)), NewAS); 850 851 if (Op->getOpcode() == Instruction::IntToPtr && 852 Op->getType()->getPointerAddressSpace() == FlatAddrSpace) 853 return true; 854 } 855 856 return false; 857 } 858 859 static Value::use_iterator skipToNextUser(Value::use_iterator I, 860 Value::use_iterator End) { 861 User *CurUser = I->getUser(); 862 ++I; 863 864 while (I != End && I->getUser() == CurUser) 865 ++I; 866 867 return I; 868 } 869 870 bool InferAddressSpaces::rewriteWithNewAddressSpaces( 871 const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder, 872 const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { 873 // For each address expression to be modified, creates a clone of it with its 874 // pointer operands converted to the new address space. Since the pointer 875 // operands are converted, the clone is naturally in the new address space by 876 // construction. 877 ValueToValueMapTy ValueWithNewAddrSpace; 878 SmallVector<const Use *, 32> UndefUsesToFix; 879 for (Value* V : Postorder) { 880 unsigned NewAddrSpace = InferredAddrSpace.lookup(V); 881 if (V->getType()->getPointerAddressSpace() != NewAddrSpace) { 882 ValueWithNewAddrSpace[V] = cloneValueWithNewAddressSpace( 883 V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix); 884 } 885 } 886 887 if (ValueWithNewAddrSpace.empty()) 888 return false; 889 890 // Fixes all the undef uses generated by cloneInstructionWithNewAddressSpace. 891 for (const Use *UndefUse : UndefUsesToFix) { 892 User *V = UndefUse->getUser(); 893 User *NewV = cast<User>(ValueWithNewAddrSpace.lookup(V)); 894 unsigned OperandNo = UndefUse->getOperandNo(); 895 assert(isa<UndefValue>(NewV->getOperand(OperandNo))); 896 NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get())); 897 } 898 899 SmallVector<Instruction *, 16> DeadInstructions; 900 901 // Replaces the uses of the old address expressions with the new ones. 902 for (const WeakTrackingVH &WVH : Postorder) { 903 assert(WVH && "value was unexpectedly deleted"); 904 Value *V = WVH; 905 Value *NewV = ValueWithNewAddrSpace.lookup(V); 906 if (NewV == nullptr) 907 continue; 908 909 LLVM_DEBUG(dbgs() << "Replacing the uses of " << *V << "\n with\n " 910 << *NewV << '\n'); 911 912 if (Constant *C = dyn_cast<Constant>(V)) { 913 Constant *Replace = ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV), 914 C->getType()); 915 if (C != Replace) { 916 LLVM_DEBUG(dbgs() << "Inserting replacement const cast: " << Replace 917 << ": " << *Replace << '\n'); 918 C->replaceAllUsesWith(Replace); 919 V = Replace; 920 } 921 } 922 923 Value::use_iterator I, E, Next; 924 for (I = V->use_begin(), E = V->use_end(); I != E; ) { 925 Use &U = *I; 926 927 // Some users may see the same pointer operand in multiple operands. Skip 928 // to the next instruction. 929 I = skipToNextUser(I, E); 930 931 if (isSimplePointerUseValidToReplace( 932 TTI, U, V->getType()->getPointerAddressSpace())) { 933 // If V is used as the pointer operand of a compatible memory operation, 934 // sets the pointer operand to NewV. This replacement does not change 935 // the element type, so the resultant load/store is still valid. 936 U.set(NewV); 937 continue; 938 } 939 940 User *CurUser = U.getUser(); 941 // Handle more complex cases like intrinsic that need to be remangled. 942 if (auto *MI = dyn_cast<MemIntrinsic>(CurUser)) { 943 if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, V, NewV)) 944 continue; 945 } 946 947 if (auto *II = dyn_cast<IntrinsicInst>(CurUser)) { 948 if (rewriteIntrinsicOperands(II, V, NewV)) 949 continue; 950 } 951 952 if (isa<Instruction>(CurUser)) { 953 if (ICmpInst *Cmp = dyn_cast<ICmpInst>(CurUser)) { 954 // If we can infer that both pointers are in the same addrspace, 955 // transform e.g. 956 // %cmp = icmp eq float* %p, %q 957 // into 958 // %cmp = icmp eq float addrspace(3)* %new_p, %new_q 959 960 unsigned NewAS = NewV->getType()->getPointerAddressSpace(); 961 int SrcIdx = U.getOperandNo(); 962 int OtherIdx = (SrcIdx == 0) ? 1 : 0; 963 Value *OtherSrc = Cmp->getOperand(OtherIdx); 964 965 if (Value *OtherNewV = ValueWithNewAddrSpace.lookup(OtherSrc)) { 966 if (OtherNewV->getType()->getPointerAddressSpace() == NewAS) { 967 Cmp->setOperand(OtherIdx, OtherNewV); 968 Cmp->setOperand(SrcIdx, NewV); 969 continue; 970 } 971 } 972 973 // Even if the type mismatches, we can cast the constant. 974 if (auto *KOtherSrc = dyn_cast<Constant>(OtherSrc)) { 975 if (isSafeToCastConstAddrSpace(KOtherSrc, NewAS)) { 976 Cmp->setOperand(SrcIdx, NewV); 977 Cmp->setOperand(OtherIdx, 978 ConstantExpr::getAddrSpaceCast(KOtherSrc, NewV->getType())); 979 continue; 980 } 981 } 982 } 983 984 if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) { 985 unsigned NewAS = NewV->getType()->getPointerAddressSpace(); 986 if (ASC->getDestAddressSpace() == NewAS) { 987 if (ASC->getType()->getPointerElementType() != 988 NewV->getType()->getPointerElementType()) { 989 NewV = CastInst::Create(Instruction::BitCast, NewV, 990 ASC->getType(), "", ASC); 991 } 992 ASC->replaceAllUsesWith(NewV); 993 DeadInstructions.push_back(ASC); 994 continue; 995 } 996 } 997 998 // Otherwise, replaces the use with flat(NewV). 999 if (Instruction *Inst = dyn_cast<Instruction>(V)) { 1000 // Don't create a copy of the original addrspacecast. 1001 if (U == V && isa<AddrSpaceCastInst>(V)) 1002 continue; 1003 1004 BasicBlock::iterator InsertPos = std::next(Inst->getIterator()); 1005 while (isa<PHINode>(InsertPos)) 1006 ++InsertPos; 1007 U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos)); 1008 } else { 1009 U.set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV), 1010 V->getType())); 1011 } 1012 } 1013 } 1014 1015 if (V->use_empty()) { 1016 if (Instruction *I = dyn_cast<Instruction>(V)) 1017 DeadInstructions.push_back(I); 1018 } 1019 } 1020 1021 for (Instruction *I : DeadInstructions) 1022 RecursivelyDeleteTriviallyDeadInstructions(I); 1023 1024 return true; 1025 } 1026 1027 FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) { 1028 return new InferAddressSpaces(AddressSpace); 1029 } 1030