1 //===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "NVPTXTargetTransformInfo.h" 10 #include "NVPTXUtilities.h" 11 #include "llvm/Analysis/LoopInfo.h" 12 #include "llvm/Analysis/TargetTransformInfo.h" 13 #include "llvm/Analysis/ValueTracking.h" 14 #include "llvm/CodeGen/BasicTTIImpl.h" 15 #include "llvm/CodeGen/CostTable.h" 16 #include "llvm/CodeGen/TargetLowering.h" 17 #include "llvm/IR/IntrinsicsNVPTX.h" 18 #include "llvm/Support/Debug.h" 19 #include <optional> 20 using namespace llvm; 21 22 #define DEBUG_TYPE "NVPTXtti" 23 24 // Whether the given intrinsic reads threadIdx.x/y/z. 25 static bool readsThreadIndex(const IntrinsicInst *II) { 26 switch (II->getIntrinsicID()) { 27 default: return false; 28 case Intrinsic::nvvm_read_ptx_sreg_tid_x: 29 case Intrinsic::nvvm_read_ptx_sreg_tid_y: 30 case Intrinsic::nvvm_read_ptx_sreg_tid_z: 31 return true; 32 } 33 } 34 35 static bool readsLaneId(const IntrinsicInst *II) { 36 return II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_laneid; 37 } 38 39 // Whether the given intrinsic is an atomic instruction in PTX. 40 static bool isNVVMAtomic(const IntrinsicInst *II) { 41 switch (II->getIntrinsicID()) { 42 default: return false; 43 case Intrinsic::nvvm_atomic_load_inc_32: 44 case Intrinsic::nvvm_atomic_load_dec_32: 45 46 case Intrinsic::nvvm_atomic_add_gen_f_cta: 47 case Intrinsic::nvvm_atomic_add_gen_f_sys: 48 case Intrinsic::nvvm_atomic_add_gen_i_cta: 49 case Intrinsic::nvvm_atomic_add_gen_i_sys: 50 case Intrinsic::nvvm_atomic_and_gen_i_cta: 51 case Intrinsic::nvvm_atomic_and_gen_i_sys: 52 case Intrinsic::nvvm_atomic_cas_gen_i_cta: 53 case Intrinsic::nvvm_atomic_cas_gen_i_sys: 54 case Intrinsic::nvvm_atomic_dec_gen_i_cta: 55 case Intrinsic::nvvm_atomic_dec_gen_i_sys: 56 case Intrinsic::nvvm_atomic_inc_gen_i_cta: 57 case Intrinsic::nvvm_atomic_inc_gen_i_sys: 58 case Intrinsic::nvvm_atomic_max_gen_i_cta: 59 case Intrinsic::nvvm_atomic_max_gen_i_sys: 60 case Intrinsic::nvvm_atomic_min_gen_i_cta: 61 case Intrinsic::nvvm_atomic_min_gen_i_sys: 62 case Intrinsic::nvvm_atomic_or_gen_i_cta: 63 case Intrinsic::nvvm_atomic_or_gen_i_sys: 64 case Intrinsic::nvvm_atomic_exch_gen_i_cta: 65 case Intrinsic::nvvm_atomic_exch_gen_i_sys: 66 case Intrinsic::nvvm_atomic_xor_gen_i_cta: 67 case Intrinsic::nvvm_atomic_xor_gen_i_sys: 68 return true; 69 } 70 } 71 72 bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) { 73 // Without inter-procedural analysis, we conservatively assume that arguments 74 // to __device__ functions are divergent. 75 if (const Argument *Arg = dyn_cast<Argument>(V)) 76 return !isKernelFunction(*Arg->getParent()); 77 78 if (const Instruction *I = dyn_cast<Instruction>(V)) { 79 // Without pointer analysis, we conservatively assume values loaded from 80 // generic or local address space are divergent. 81 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { 82 unsigned AS = LI->getPointerAddressSpace(); 83 return AS == ADDRESS_SPACE_GENERIC || AS == ADDRESS_SPACE_LOCAL; 84 } 85 // Atomic instructions may cause divergence. Atomic instructions are 86 // executed sequentially across all threads in a warp. Therefore, an earlier 87 // executed thread may see different memory inputs than a later executed 88 // thread. For example, suppose *a = 0 initially. 89 // 90 // atom.global.add.s32 d, [a], 1 91 // 92 // returns 0 for the first thread that enters the critical region, and 1 for 93 // the second thread. 94 if (I->isAtomic()) 95 return true; 96 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 97 // Instructions that read threadIdx are obviously divergent. 98 if (readsThreadIndex(II) || readsLaneId(II)) 99 return true; 100 // Handle the NVPTX atomic intrinsics that cannot be represented as an 101 // atomic IR instruction. 102 if (isNVVMAtomic(II)) 103 return true; 104 } 105 // Conservatively consider the return value of function calls as divergent. 106 // We could analyze callees with bodies more precisely using 107 // inter-procedural analysis. 108 if (isa<CallInst>(I)) 109 return true; 110 } 111 112 return false; 113 } 114 115 // Convert NVVM intrinsics to target-generic LLVM code where possible. 116 static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { 117 // Each NVVM intrinsic we can simplify can be replaced with one of: 118 // 119 // * an LLVM intrinsic, 120 // * an LLVM cast operation, 121 // * an LLVM binary operation, or 122 // * ad-hoc LLVM IR for the particular operation. 123 124 // Some transformations are only valid when the module's 125 // flush-denormals-to-zero (ftz) setting is true/false, whereas other 126 // transformations are valid regardless of the module's ftz setting. 127 enum FtzRequirementTy { 128 FTZ_Any, // Any ftz setting is ok. 129 FTZ_MustBeOn, // Transformation is valid only if ftz is on. 130 FTZ_MustBeOff, // Transformation is valid only if ftz is off. 131 }; 132 // Classes of NVVM intrinsics that can't be replaced one-to-one with a 133 // target-generic intrinsic, cast op, or binary op but that we can nonetheless 134 // simplify. 135 enum SpecialCase { 136 SPC_Reciprocal, 137 }; 138 139 // SimplifyAction is a poor-man's variant (plus an additional flag) that 140 // represents how to replace an NVVM intrinsic with target-generic LLVM IR. 141 struct SimplifyAction { 142 // Invariant: At most one of these Optionals has a value. 143 std::optional<Intrinsic::ID> IID; 144 std::optional<Instruction::CastOps> CastOp; 145 std::optional<Instruction::BinaryOps> BinaryOp; 146 std::optional<SpecialCase> Special; 147 148 FtzRequirementTy FtzRequirement = FTZ_Any; 149 // Denormal handling is guarded by different attributes depending on the 150 // type (denormal-fp-math vs denormal-fp-math-f32), take note of halfs. 151 bool IsHalfTy = false; 152 153 SimplifyAction() = default; 154 155 SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq, 156 bool IsHalfTy = false) 157 : IID(IID), FtzRequirement(FtzReq), IsHalfTy(IsHalfTy) {} 158 159 // Cast operations don't have anything to do with FTZ, so we skip that 160 // argument. 161 SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {} 162 163 SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq) 164 : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {} 165 166 SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq) 167 : Special(Special), FtzRequirement(FtzReq) {} 168 }; 169 170 // Try to generate a SimplifyAction describing how to replace our 171 // IntrinsicInstr with target-generic LLVM IR. 172 const SimplifyAction Action = [II]() -> SimplifyAction { 173 switch (II->getIntrinsicID()) { 174 // NVVM intrinsics that map directly to LLVM intrinsics. 175 case Intrinsic::nvvm_ceil_d: 176 return {Intrinsic::ceil, FTZ_Any}; 177 case Intrinsic::nvvm_ceil_f: 178 return {Intrinsic::ceil, FTZ_MustBeOff}; 179 case Intrinsic::nvvm_ceil_ftz_f: 180 return {Intrinsic::ceil, FTZ_MustBeOn}; 181 case Intrinsic::nvvm_fabs_d: 182 return {Intrinsic::fabs, FTZ_Any}; 183 case Intrinsic::nvvm_fabs_f: 184 return {Intrinsic::fabs, FTZ_MustBeOff}; 185 case Intrinsic::nvvm_fabs_ftz_f: 186 return {Intrinsic::fabs, FTZ_MustBeOn}; 187 case Intrinsic::nvvm_floor_d: 188 return {Intrinsic::floor, FTZ_Any}; 189 case Intrinsic::nvvm_floor_f: 190 return {Intrinsic::floor, FTZ_MustBeOff}; 191 case Intrinsic::nvvm_floor_ftz_f: 192 return {Intrinsic::floor, FTZ_MustBeOn}; 193 case Intrinsic::nvvm_fma_rn_d: 194 return {Intrinsic::fma, FTZ_Any}; 195 case Intrinsic::nvvm_fma_rn_f: 196 return {Intrinsic::fma, FTZ_MustBeOff}; 197 case Intrinsic::nvvm_fma_rn_ftz_f: 198 return {Intrinsic::fma, FTZ_MustBeOn}; 199 case Intrinsic::nvvm_fma_rn_f16: 200 return {Intrinsic::fma, FTZ_MustBeOff, true}; 201 case Intrinsic::nvvm_fma_rn_ftz_f16: 202 return {Intrinsic::fma, FTZ_MustBeOn, true}; 203 case Intrinsic::nvvm_fma_rn_f16x2: 204 return {Intrinsic::fma, FTZ_MustBeOff, true}; 205 case Intrinsic::nvvm_fma_rn_ftz_f16x2: 206 return {Intrinsic::fma, FTZ_MustBeOn, true}; 207 case Intrinsic::nvvm_fma_rn_bf16: 208 return {Intrinsic::fma, FTZ_MustBeOff, true}; 209 case Intrinsic::nvvm_fma_rn_ftz_bf16: 210 return {Intrinsic::fma, FTZ_MustBeOn, true}; 211 case Intrinsic::nvvm_fma_rn_bf16x2: 212 return {Intrinsic::fma, FTZ_MustBeOff, true}; 213 case Intrinsic::nvvm_fma_rn_ftz_bf16x2: 214 return {Intrinsic::fma, FTZ_MustBeOn, true}; 215 case Intrinsic::nvvm_fmax_d: 216 return {Intrinsic::maxnum, FTZ_Any}; 217 case Intrinsic::nvvm_fmax_f: 218 return {Intrinsic::maxnum, FTZ_MustBeOff}; 219 case Intrinsic::nvvm_fmax_ftz_f: 220 return {Intrinsic::maxnum, FTZ_MustBeOn}; 221 case Intrinsic::nvvm_fmax_nan_f: 222 return {Intrinsic::maximum, FTZ_MustBeOff}; 223 case Intrinsic::nvvm_fmax_ftz_nan_f: 224 return {Intrinsic::maximum, FTZ_MustBeOn}; 225 case Intrinsic::nvvm_fmax_f16: 226 return {Intrinsic::maxnum, FTZ_MustBeOff, true}; 227 case Intrinsic::nvvm_fmax_ftz_f16: 228 return {Intrinsic::maxnum, FTZ_MustBeOn, true}; 229 case Intrinsic::nvvm_fmax_f16x2: 230 return {Intrinsic::maxnum, FTZ_MustBeOff, true}; 231 case Intrinsic::nvvm_fmax_ftz_f16x2: 232 return {Intrinsic::maxnum, FTZ_MustBeOn, true}; 233 case Intrinsic::nvvm_fmax_nan_f16: 234 return {Intrinsic::maximum, FTZ_MustBeOff, true}; 235 case Intrinsic::nvvm_fmax_ftz_nan_f16: 236 return {Intrinsic::maximum, FTZ_MustBeOn, true}; 237 case Intrinsic::nvvm_fmax_nan_f16x2: 238 return {Intrinsic::maximum, FTZ_MustBeOff, true}; 239 case Intrinsic::nvvm_fmax_ftz_nan_f16x2: 240 return {Intrinsic::maximum, FTZ_MustBeOn, true}; 241 case Intrinsic::nvvm_fmin_d: 242 return {Intrinsic::minnum, FTZ_Any}; 243 case Intrinsic::nvvm_fmin_f: 244 return {Intrinsic::minnum, FTZ_MustBeOff}; 245 case Intrinsic::nvvm_fmin_ftz_f: 246 return {Intrinsic::minnum, FTZ_MustBeOn}; 247 case Intrinsic::nvvm_fmin_nan_f: 248 return {Intrinsic::minimum, FTZ_MustBeOff}; 249 case Intrinsic::nvvm_fmin_ftz_nan_f: 250 return {Intrinsic::minimum, FTZ_MustBeOn}; 251 case Intrinsic::nvvm_fmin_f16: 252 return {Intrinsic::minnum, FTZ_MustBeOff, true}; 253 case Intrinsic::nvvm_fmin_ftz_f16: 254 return {Intrinsic::minnum, FTZ_MustBeOn, true}; 255 case Intrinsic::nvvm_fmin_f16x2: 256 return {Intrinsic::minnum, FTZ_MustBeOff, true}; 257 case Intrinsic::nvvm_fmin_ftz_f16x2: 258 return {Intrinsic::minnum, FTZ_MustBeOn, true}; 259 case Intrinsic::nvvm_fmin_nan_f16: 260 return {Intrinsic::minimum, FTZ_MustBeOff, true}; 261 case Intrinsic::nvvm_fmin_ftz_nan_f16: 262 return {Intrinsic::minimum, FTZ_MustBeOn, true}; 263 case Intrinsic::nvvm_fmin_nan_f16x2: 264 return {Intrinsic::minimum, FTZ_MustBeOff, true}; 265 case Intrinsic::nvvm_fmin_ftz_nan_f16x2: 266 return {Intrinsic::minimum, FTZ_MustBeOn, true}; 267 case Intrinsic::nvvm_round_d: 268 return {Intrinsic::round, FTZ_Any}; 269 case Intrinsic::nvvm_round_f: 270 return {Intrinsic::round, FTZ_MustBeOff}; 271 case Intrinsic::nvvm_round_ftz_f: 272 return {Intrinsic::round, FTZ_MustBeOn}; 273 case Intrinsic::nvvm_sqrt_rn_d: 274 return {Intrinsic::sqrt, FTZ_Any}; 275 case Intrinsic::nvvm_sqrt_f: 276 // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the 277 // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts 278 // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are 279 // the versions with explicit ftz-ness. 280 return {Intrinsic::sqrt, FTZ_Any}; 281 case Intrinsic::nvvm_sqrt_rn_f: 282 return {Intrinsic::sqrt, FTZ_MustBeOff}; 283 case Intrinsic::nvvm_sqrt_rn_ftz_f: 284 return {Intrinsic::sqrt, FTZ_MustBeOn}; 285 case Intrinsic::nvvm_trunc_d: 286 return {Intrinsic::trunc, FTZ_Any}; 287 case Intrinsic::nvvm_trunc_f: 288 return {Intrinsic::trunc, FTZ_MustBeOff}; 289 case Intrinsic::nvvm_trunc_ftz_f: 290 return {Intrinsic::trunc, FTZ_MustBeOn}; 291 292 // NVVM intrinsics that map to LLVM cast operations. 293 // 294 // Note that llvm's target-generic conversion operators correspond to the rz 295 // (round to zero) versions of the nvvm conversion intrinsics, even though 296 // most everything else here uses the rn (round to nearest even) nvvm ops. 297 case Intrinsic::nvvm_d2i_rz: 298 case Intrinsic::nvvm_f2i_rz: 299 case Intrinsic::nvvm_d2ll_rz: 300 case Intrinsic::nvvm_f2ll_rz: 301 return {Instruction::FPToSI}; 302 case Intrinsic::nvvm_d2ui_rz: 303 case Intrinsic::nvvm_f2ui_rz: 304 case Intrinsic::nvvm_d2ull_rz: 305 case Intrinsic::nvvm_f2ull_rz: 306 return {Instruction::FPToUI}; 307 case Intrinsic::nvvm_i2d_rz: 308 case Intrinsic::nvvm_i2f_rz: 309 case Intrinsic::nvvm_ll2d_rz: 310 case Intrinsic::nvvm_ll2f_rz: 311 return {Instruction::SIToFP}; 312 case Intrinsic::nvvm_ui2d_rz: 313 case Intrinsic::nvvm_ui2f_rz: 314 case Intrinsic::nvvm_ull2d_rz: 315 case Intrinsic::nvvm_ull2f_rz: 316 return {Instruction::UIToFP}; 317 318 // NVVM intrinsics that map to LLVM binary ops. 319 case Intrinsic::nvvm_add_rn_d: 320 return {Instruction::FAdd, FTZ_Any}; 321 case Intrinsic::nvvm_add_rn_f: 322 return {Instruction::FAdd, FTZ_MustBeOff}; 323 case Intrinsic::nvvm_add_rn_ftz_f: 324 return {Instruction::FAdd, FTZ_MustBeOn}; 325 case Intrinsic::nvvm_mul_rn_d: 326 return {Instruction::FMul, FTZ_Any}; 327 case Intrinsic::nvvm_mul_rn_f: 328 return {Instruction::FMul, FTZ_MustBeOff}; 329 case Intrinsic::nvvm_mul_rn_ftz_f: 330 return {Instruction::FMul, FTZ_MustBeOn}; 331 case Intrinsic::nvvm_div_rn_d: 332 return {Instruction::FDiv, FTZ_Any}; 333 case Intrinsic::nvvm_div_rn_f: 334 return {Instruction::FDiv, FTZ_MustBeOff}; 335 case Intrinsic::nvvm_div_rn_ftz_f: 336 return {Instruction::FDiv, FTZ_MustBeOn}; 337 338 // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but 339 // need special handling. 340 // 341 // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just 342 // as well. 343 case Intrinsic::nvvm_rcp_rn_d: 344 return {SPC_Reciprocal, FTZ_Any}; 345 case Intrinsic::nvvm_rcp_rn_f: 346 return {SPC_Reciprocal, FTZ_MustBeOff}; 347 case Intrinsic::nvvm_rcp_rn_ftz_f: 348 return {SPC_Reciprocal, FTZ_MustBeOn}; 349 350 // We do not currently simplify intrinsics that give an approximate 351 // answer. These include: 352 // 353 // - nvvm_cos_approx_{f,ftz_f} 354 // - nvvm_ex2_approx_{d,f,ftz_f} 355 // - nvvm_lg2_approx_{d,f,ftz_f} 356 // - nvvm_sin_approx_{f,ftz_f} 357 // - nvvm_sqrt_approx_{f,ftz_f} 358 // - nvvm_rsqrt_approx_{d,f,ftz_f} 359 // - nvvm_div_approx_{ftz_d,ftz_f,f} 360 // - nvvm_rcp_approx_ftz_d 361 // 362 // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast" 363 // means that fastmath is enabled in the intrinsic. Unfortunately only 364 // binary operators (currently) have a fastmath bit in SelectionDAG, so 365 // this information gets lost and we can't select on it. 366 // 367 // TODO: div and rcp are lowered to a binary op, so these we could in 368 // theory lower them to "fast fdiv". 369 370 default: 371 return {}; 372 } 373 }(); 374 375 // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we 376 // can bail out now. (Notice that in the case that IID is not an NVVM 377 // intrinsic, we don't have to look up any module metadata, as 378 // FtzRequirementTy will be FTZ_Any.) 379 if (Action.FtzRequirement != FTZ_Any) { 380 // FIXME: Broken for f64 381 DenormalMode Mode = II->getFunction()->getDenormalMode( 382 Action.IsHalfTy ? APFloat::IEEEhalf() : APFloat::IEEEsingle()); 383 bool FtzEnabled = Mode.Output == DenormalMode::PreserveSign; 384 385 if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn)) 386 return nullptr; 387 } 388 389 // Simplify to target-generic intrinsic. 390 if (Action.IID) { 391 SmallVector<Value *, 4> Args(II->args()); 392 // All the target-generic intrinsics currently of interest to us have one 393 // type argument, equal to that of the nvvm intrinsic's argument. 394 Type *Tys[] = {II->getArgOperand(0)->getType()}; 395 return CallInst::Create( 396 Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args); 397 } 398 399 // Simplify to target-generic binary op. 400 if (Action.BinaryOp) 401 return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0), 402 II->getArgOperand(1), II->getName()); 403 404 // Simplify to target-generic cast op. 405 if (Action.CastOp) 406 return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(), 407 II->getName()); 408 409 // All that's left are the special cases. 410 if (!Action.Special) 411 return nullptr; 412 413 switch (*Action.Special) { 414 case SPC_Reciprocal: 415 // Simplify reciprocal. 416 return BinaryOperator::Create( 417 Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1), 418 II->getArgOperand(0), II->getName()); 419 } 420 llvm_unreachable("All SpecialCase enumerators should be handled in switch."); 421 } 422 423 std::optional<Instruction *> 424 NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { 425 if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) { 426 return I; 427 } 428 return std::nullopt; 429 } 430 431 InstructionCost NVPTXTTIImpl::getArithmeticInstrCost( 432 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 433 TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, 434 ArrayRef<const Value *> Args, 435 const Instruction *CxtI) { 436 // Legalize the type. 437 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty); 438 439 int ISD = TLI->InstructionOpcodeToISD(Opcode); 440 441 switch (ISD) { 442 default: 443 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, 444 Op2Info); 445 case ISD::ADD: 446 case ISD::MUL: 447 case ISD::XOR: 448 case ISD::OR: 449 case ISD::AND: 450 // The machine code (SASS) simulates an i64 with two i32. Therefore, we 451 // estimate that arithmetic operations on i64 are twice as expensive as 452 // those on types that can fit into one machine register. 453 if (LT.second.SimpleTy == MVT::i64) 454 return 2 * LT.first; 455 // Delegate other cases to the basic TTI. 456 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, 457 Op2Info); 458 } 459 } 460 461 void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 462 TTI::UnrollingPreferences &UP, 463 OptimizationRemarkEmitter *ORE) { 464 BaseT::getUnrollingPreferences(L, SE, UP, ORE); 465 466 // Enable partial unrolling and runtime unrolling, but reduce the 467 // threshold. This partially unrolls small loops which are often 468 // unrolled by the PTX to SASS compiler and unrolling earlier can be 469 // beneficial. 470 UP.Partial = UP.Runtime = true; 471 UP.PartialThreshold = UP.Threshold / 4; 472 } 473 474 void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, 475 TTI::PeelingPreferences &PP) { 476 BaseT::getPeelingPreferences(L, SE, PP); 477 } 478